|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 50, |
|
"global_step": 436, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022935779816513763, |
|
"grad_norm": 5.597671583305064, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"logits/chosen": -2.6193342208862305, |
|
"logits/rejected": -2.5525386333465576, |
|
"logps/chosen": -265.3854064941406, |
|
"logps/rejected": -236.1589813232422, |
|
"loss": 0.1941, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": 0.0006044252077117562, |
|
"rewards/margins": 0.0008946189773268998, |
|
"rewards/rejected": -0.000290193798718974, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.045871559633027525, |
|
"grad_norm": 5.252146327563864, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits/chosen": -2.657933473587036, |
|
"logits/rejected": -2.576028347015381, |
|
"logps/chosen": -298.8043518066406, |
|
"logps/rejected": -274.30718994140625, |
|
"loss": 0.1958, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.00019080772472079843, |
|
"rewards/margins": 0.0017816700274124742, |
|
"rewards/rejected": -0.001972477650269866, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06880733944954129, |
|
"grad_norm": 4.974318928321663, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"logits/chosen": -2.6759729385375977, |
|
"logits/rejected": -2.6023097038269043, |
|
"logps/chosen": -290.3868713378906, |
|
"logps/rejected": -234.3571319580078, |
|
"loss": 0.1923, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.009458948858082294, |
|
"rewards/margins": 0.013211230747401714, |
|
"rewards/rejected": -0.0037522814236581326, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09174311926605505, |
|
"grad_norm": 5.209557025843711, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -2.659982204437256, |
|
"logits/rejected": -2.6102194786071777, |
|
"logps/chosen": -280.9975891113281, |
|
"logps/rejected": -267.7278747558594, |
|
"loss": 0.1836, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.04171619936823845, |
|
"rewards/margins": 0.041064582765102386, |
|
"rewards/rejected": 0.0006516153225675225, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11467889908256881, |
|
"grad_norm": 5.582751836908906, |
|
"learning_rate": 4.997110275491701e-07, |
|
"logits/chosen": -2.6198437213897705, |
|
"logits/rejected": -2.6123862266540527, |
|
"logps/chosen": -293.4254455566406, |
|
"logps/rejected": -303.90386962890625, |
|
"loss": 0.1732, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.028257910162210464, |
|
"rewards/margins": 0.0754549652338028, |
|
"rewards/rejected": -0.047197047621011734, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11467889908256881, |
|
"eval_logits/chosen": -2.5737149715423584, |
|
"eval_logits/rejected": -2.494300365447998, |
|
"eval_logps/chosen": -283.4266662597656, |
|
"eval_logps/rejected": -257.4446105957031, |
|
"eval_loss": 0.16405636072158813, |
|
"eval_rewards/accuracies": 0.7068965435028076, |
|
"eval_rewards/chosen": 0.016634328290820122, |
|
"eval_rewards/margins": 0.12585583329200745, |
|
"eval_rewards/rejected": -0.10922150313854218, |
|
"eval_runtime": 93.7026, |
|
"eval_samples_per_second": 19.402, |
|
"eval_steps_per_second": 0.309, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13761467889908258, |
|
"grad_norm": 7.241200335566244, |
|
"learning_rate": 4.979475034558115e-07, |
|
"logits/chosen": -2.562969207763672, |
|
"logits/rejected": -2.5053610801696777, |
|
"logps/chosen": -293.1821594238281, |
|
"logps/rejected": -272.9414367675781, |
|
"loss": 0.1593, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.03697499260306358, |
|
"rewards/margins": 0.11687393486499786, |
|
"rewards/rejected": -0.15384894609451294, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16055045871559634, |
|
"grad_norm": 7.058665086800562, |
|
"learning_rate": 4.945923025551788e-07, |
|
"logits/chosen": -2.4592204093933105, |
|
"logits/rejected": -2.4099252223968506, |
|
"logps/chosen": -327.544189453125, |
|
"logps/rejected": -284.48760986328125, |
|
"loss": 0.1564, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.00026793667348101735, |
|
"rewards/margins": 0.23106786608695984, |
|
"rewards/rejected": -0.23079994320869446, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1834862385321101, |
|
"grad_norm": 8.05919017424508, |
|
"learning_rate": 4.896669632591651e-07, |
|
"logits/chosen": -2.3360755443573, |
|
"logits/rejected": -2.2115492820739746, |
|
"logps/chosen": -298.69482421875, |
|
"logps/rejected": -292.111328125, |
|
"loss": 0.1478, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.128449946641922, |
|
"rewards/margins": 0.26610296964645386, |
|
"rewards/rejected": -0.39455294609069824, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.20642201834862386, |
|
"grad_norm": 8.402102216426163, |
|
"learning_rate": 4.832031033425662e-07, |
|
"logits/chosen": -1.9170547723770142, |
|
"logits/rejected": -1.8550583124160767, |
|
"logps/chosen": -299.20281982421875, |
|
"logps/rejected": -289.49163818359375, |
|
"loss": 0.1441, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.1160941943526268, |
|
"rewards/margins": 0.3108092248439789, |
|
"rewards/rejected": -0.4269034266471863, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22935779816513763, |
|
"grad_norm": 8.625504630985954, |
|
"learning_rate": 4.752422169756047e-07, |
|
"logits/chosen": -0.48900121450424194, |
|
"logits/rejected": -0.44961875677108765, |
|
"logps/chosen": -311.1794738769531, |
|
"logps/rejected": -339.92254638671875, |
|
"loss": 0.1412, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.3875480592250824, |
|
"rewards/margins": 0.2923927903175354, |
|
"rewards/rejected": -0.6799408197402954, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22935779816513763, |
|
"eval_logits/chosen": -0.4184306561946869, |
|
"eval_logits/rejected": -0.1537734419107437, |
|
"eval_logps/chosen": -328.5106201171875, |
|
"eval_logps/rejected": -326.1405944824219, |
|
"eval_loss": 0.13468901813030243, |
|
"eval_rewards/accuracies": 0.7284482717514038, |
|
"eval_rewards/chosen": -0.4342052936553955, |
|
"eval_rewards/margins": 0.3619759678840637, |
|
"eval_rewards/rejected": -0.796181321144104, |
|
"eval_runtime": 92.0893, |
|
"eval_samples_per_second": 19.742, |
|
"eval_steps_per_second": 0.315, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25229357798165136, |
|
"grad_norm": 10.103043851045701, |
|
"learning_rate": 4.658354083558188e-07, |
|
"logits/chosen": -0.40221118927001953, |
|
"logits/rejected": -0.13967064023017883, |
|
"logps/chosen": -312.624267578125, |
|
"logps/rejected": -311.3497009277344, |
|
"loss": 0.1354, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.4743613302707672, |
|
"rewards/margins": 0.3018040060997009, |
|
"rewards/rejected": -0.776165246963501, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27522935779816515, |
|
"grad_norm": 10.671011000324334, |
|
"learning_rate": 4.550430636492389e-07, |
|
"logits/chosen": -0.04719700291752815, |
|
"logits/rejected": 0.09439365565776825, |
|
"logps/chosen": -324.4268798828125, |
|
"logps/rejected": -335.16424560546875, |
|
"loss": 0.1353, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4860754609107971, |
|
"rewards/margins": 0.3509276807308197, |
|
"rewards/rejected": -0.8370031118392944, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2981651376146789, |
|
"grad_norm": 11.348522180816168, |
|
"learning_rate": 4.429344633468004e-07, |
|
"logits/chosen": -0.058751799166202545, |
|
"logits/rejected": 0.29970529675483704, |
|
"logps/chosen": -296.40399169921875, |
|
"logps/rejected": -316.8427429199219, |
|
"loss": 0.1373, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.4022388458251953, |
|
"rewards/margins": 0.3400627374649048, |
|
"rewards/rejected": -0.7423015832901001, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3211009174311927, |
|
"grad_norm": 25.0836527688945, |
|
"learning_rate": 4.2958733752443187e-07, |
|
"logits/chosen": -0.29172104597091675, |
|
"logits/rejected": 0.11182677745819092, |
|
"logps/chosen": -303.4015197753906, |
|
"logps/rejected": -286.45526123046875, |
|
"loss": 0.1334, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.2721634805202484, |
|
"rewards/margins": 0.34942418336868286, |
|
"rewards/rejected": -0.6215876340866089, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3440366972477064, |
|
"grad_norm": 11.640337926409853, |
|
"learning_rate": 4.150873668617898e-07, |
|
"logits/chosen": 0.08257939666509628, |
|
"logits/rejected": 0.5254168510437012, |
|
"logps/chosen": -308.8121337890625, |
|
"logps/rejected": -314.32220458984375, |
|
"loss": 0.1307, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.42268529534339905, |
|
"rewards/margins": 0.3886614143848419, |
|
"rewards/rejected": -0.8113466501235962, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3440366972477064, |
|
"eval_logits/chosen": 0.018126631155610085, |
|
"eval_logits/rejected": 0.7143937349319458, |
|
"eval_logps/chosen": -320.6210021972656, |
|
"eval_logps/rejected": -332.353271484375, |
|
"eval_loss": 0.12613436579704285, |
|
"eval_rewards/accuracies": 0.7284482717514038, |
|
"eval_rewards/chosen": -0.3553086221218109, |
|
"eval_rewards/margins": 0.5029994249343872, |
|
"eval_rewards/rejected": -0.8583080768585205, |
|
"eval_runtime": 91.5757, |
|
"eval_samples_per_second": 19.852, |
|
"eval_steps_per_second": 0.317, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3669724770642202, |
|
"grad_norm": 11.56271576551216, |
|
"learning_rate": 3.9952763262280397e-07, |
|
"logits/chosen": 0.02932182513177395, |
|
"logits/rejected": 0.4751170575618744, |
|
"logps/chosen": -337.37237548828125, |
|
"logps/rejected": -373.10382080078125, |
|
"loss": 0.1247, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.40821242332458496, |
|
"rewards/margins": 0.46313947439193726, |
|
"rewards/rejected": -0.8713518381118774, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.38990825688073394, |
|
"grad_norm": 7.985791463305948, |
|
"learning_rate": 3.8300801912883414e-07, |
|
"logits/chosen": 0.06568197906017303, |
|
"logits/rejected": 0.3852883279323578, |
|
"logps/chosen": -311.62481689453125, |
|
"logps/rejected": -353.85443115234375, |
|
"loss": 0.1191, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.40023931860923767, |
|
"rewards/margins": 0.4376160204410553, |
|
"rewards/rejected": -0.837855339050293, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.41284403669724773, |
|
"grad_norm": 9.33389793686436, |
|
"learning_rate": 3.6563457256020884e-07, |
|
"logits/chosen": 0.15779247879981995, |
|
"logits/rejected": 0.6506751775741577, |
|
"logps/chosen": -348.14398193359375, |
|
"logps/rejected": -325.84637451171875, |
|
"loss": 0.1278, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.44786080718040466, |
|
"rewards/margins": 0.4256533682346344, |
|
"rewards/rejected": -0.8735141754150391, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.43577981651376146, |
|
"grad_norm": 10.313716932222244, |
|
"learning_rate": 3.475188202022617e-07, |
|
"logits/chosen": -0.43816322088241577, |
|
"logits/rejected": 0.023333895951509476, |
|
"logps/chosen": -283.8222351074219, |
|
"logps/rejected": -337.9872741699219, |
|
"loss": 0.1236, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2570773959159851, |
|
"rewards/margins": 0.47935089468955994, |
|
"rewards/rejected": -0.7364283800125122, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.45871559633027525, |
|
"grad_norm": 9.248153089741166, |
|
"learning_rate": 3.287770545059052e-07, |
|
"logits/chosen": -0.08292710781097412, |
|
"logits/rejected": 0.5214661359786987, |
|
"logps/chosen": -312.56500244140625, |
|
"logps/rejected": -325.7038269042969, |
|
"loss": 0.1238, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.34283357858657837, |
|
"rewards/margins": 0.4756447672843933, |
|
"rewards/rejected": -0.8184784054756165, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45871559633027525, |
|
"eval_logits/chosen": 0.2969372272491455, |
|
"eval_logits/rejected": 1.2989132404327393, |
|
"eval_logps/chosen": -326.1716613769531, |
|
"eval_logps/rejected": -341.28619384765625, |
|
"eval_loss": 0.11991516500711441, |
|
"eval_rewards/accuracies": 0.732758641242981, |
|
"eval_rewards/chosen": -0.41081586480140686, |
|
"eval_rewards/margins": 0.5368219614028931, |
|
"eval_rewards/rejected": -0.9476377367973328, |
|
"eval_runtime": 92.1824, |
|
"eval_samples_per_second": 19.722, |
|
"eval_steps_per_second": 0.315, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.481651376146789, |
|
"grad_norm": 8.486691996298621, |
|
"learning_rate": 3.0952958655864954e-07, |
|
"logits/chosen": 0.504795253276825, |
|
"logits/rejected": 1.0519931316375732, |
|
"logps/chosen": -317.31756591796875, |
|
"logps/rejected": -346.00689697265625, |
|
"loss": 0.1175, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.44013166427612305, |
|
"rewards/margins": 0.47651535272598267, |
|
"rewards/rejected": -0.9166469573974609, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5045871559633027, |
|
"grad_norm": 9.587760322945808, |
|
"learning_rate": 2.898999737583448e-07, |
|
"logits/chosen": 0.056784339249134064, |
|
"logits/rejected": 1.057108998298645, |
|
"logps/chosen": -358.15313720703125, |
|
"logps/rejected": -375.32452392578125, |
|
"loss": 0.1134, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.3547281324863434, |
|
"rewards/margins": 0.548815131187439, |
|
"rewards/rejected": -0.9035432934761047, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5275229357798165, |
|
"grad_norm": 9.093354835969592, |
|
"learning_rate": 2.7001422664752333e-07, |
|
"logits/chosen": -0.12167753279209137, |
|
"logits/rejected": 0.5305444002151489, |
|
"logps/chosen": -298.165283203125, |
|
"logps/rejected": -341.3959045410156, |
|
"loss": 0.1206, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3022175431251526, |
|
"rewards/margins": 0.4555422365665436, |
|
"rewards/rejected": -0.7577598094940186, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5504587155963303, |
|
"grad_norm": 9.439667913770359, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -0.21128520369529724, |
|
"logits/rejected": 0.5969688296318054, |
|
"logps/chosen": -321.5914306640625, |
|
"logps/rejected": -339.0221862792969, |
|
"loss": 0.1218, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.34059566259384155, |
|
"rewards/margins": 0.43612655997276306, |
|
"rewards/rejected": -0.7767221927642822, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.573394495412844, |
|
"grad_norm": 9.292342727311365, |
|
"learning_rate": 2.2998577335247667e-07, |
|
"logits/chosen": -0.020272482186555862, |
|
"logits/rejected": 0.808585524559021, |
|
"logps/chosen": -337.55767822265625, |
|
"logps/rejected": -346.179443359375, |
|
"loss": 0.1185, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.337638795375824, |
|
"rewards/margins": 0.5197167992591858, |
|
"rewards/rejected": -0.8573554754257202, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.573394495412844, |
|
"eval_logits/chosen": -0.1745007336139679, |
|
"eval_logits/rejected": 0.8515735268592834, |
|
"eval_logps/chosen": -315.9549560546875, |
|
"eval_logps/rejected": -335.7632751464844, |
|
"eval_loss": 0.11658623069524765, |
|
"eval_rewards/accuracies": 0.7543103694915771, |
|
"eval_rewards/chosen": -0.3086487948894501, |
|
"eval_rewards/margins": 0.5837592482566833, |
|
"eval_rewards/rejected": -0.8924079537391663, |
|
"eval_runtime": 92.3611, |
|
"eval_samples_per_second": 19.684, |
|
"eval_steps_per_second": 0.314, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5963302752293578, |
|
"grad_norm": 8.186794570934321, |
|
"learning_rate": 2.1010002624165524e-07, |
|
"logits/chosen": -0.006458556745201349, |
|
"logits/rejected": 0.6227847933769226, |
|
"logps/chosen": -308.7506408691406, |
|
"logps/rejected": -372.9380798339844, |
|
"loss": 0.1168, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3803611397743225, |
|
"rewards/margins": 0.6050828695297241, |
|
"rewards/rejected": -0.9854438900947571, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6192660550458715, |
|
"grad_norm": 10.337683797583873, |
|
"learning_rate": 1.9047041344135043e-07, |
|
"logits/chosen": 0.44111576676368713, |
|
"logits/rejected": 0.8292443156242371, |
|
"logps/chosen": -313.4148254394531, |
|
"logps/rejected": -354.33258056640625, |
|
"loss": 0.1168, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.47352510690689087, |
|
"rewards/margins": 0.5513299703598022, |
|
"rewards/rejected": -1.0248548984527588, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6422018348623854, |
|
"grad_norm": 10.116957119119249, |
|
"learning_rate": 1.7122294549409482e-07, |
|
"logits/chosen": 0.1775570809841156, |
|
"logits/rejected": 1.0465366840362549, |
|
"logps/chosen": -317.04180908203125, |
|
"logps/rejected": -370.72479248046875, |
|
"loss": 0.1221, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.4404297471046448, |
|
"rewards/margins": 0.6288600564002991, |
|
"rewards/rejected": -1.0692898035049438, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6651376146788991, |
|
"grad_norm": 9.609746165977107, |
|
"learning_rate": 1.524811797977383e-07, |
|
"logits/chosen": 0.02145857736468315, |
|
"logits/rejected": 0.823032021522522, |
|
"logps/chosen": -322.24542236328125, |
|
"logps/rejected": -342.22503662109375, |
|
"loss": 0.1156, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.3451458811759949, |
|
"rewards/margins": 0.5379365086555481, |
|
"rewards/rejected": -0.883082389831543, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6880733944954128, |
|
"grad_norm": 9.08082569283608, |
|
"learning_rate": 1.3436542743979125e-07, |
|
"logits/chosen": -0.18167677521705627, |
|
"logits/rejected": 0.25138911604881287, |
|
"logps/chosen": -341.6298828125, |
|
"logps/rejected": -338.09869384765625, |
|
"loss": 0.1228, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.3568056523799896, |
|
"rewards/margins": 0.43558257818222046, |
|
"rewards/rejected": -0.7923881411552429, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6880733944954128, |
|
"eval_logits/chosen": -0.13157324492931366, |
|
"eval_logits/rejected": 0.8573818206787109, |
|
"eval_logps/chosen": -322.0434265136719, |
|
"eval_logps/rejected": -339.1875305175781, |
|
"eval_loss": 0.11548212170600891, |
|
"eval_rewards/accuracies": 0.7456896305084229, |
|
"eval_rewards/chosen": -0.3695334494113922, |
|
"eval_rewards/margins": 0.5571174621582031, |
|
"eval_rewards/rejected": -0.9266510009765625, |
|
"eval_runtime": 91.8257, |
|
"eval_samples_per_second": 19.798, |
|
"eval_steps_per_second": 0.316, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7110091743119266, |
|
"grad_norm": 9.32642322141669, |
|
"learning_rate": 1.1699198087116588e-07, |
|
"logits/chosen": -0.07652176916599274, |
|
"logits/rejected": 0.6259859800338745, |
|
"logps/chosen": -321.20538330078125, |
|
"logps/rejected": -355.15618896484375, |
|
"loss": 0.1239, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.43240228295326233, |
|
"rewards/margins": 0.4546627104282379, |
|
"rewards/rejected": -0.887065052986145, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7339449541284404, |
|
"grad_norm": 9.870138396091912, |
|
"learning_rate": 1.00472367377196e-07, |
|
"logits/chosen": 0.15437743067741394, |
|
"logits/rejected": 0.9939621686935425, |
|
"logps/chosen": -314.3326416015625, |
|
"logps/rejected": -335.50787353515625, |
|
"loss": 0.1193, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.40100646018981934, |
|
"rewards/margins": 0.6091704964637756, |
|
"rewards/rejected": -1.0101768970489502, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7568807339449541, |
|
"grad_norm": 10.470680354911353, |
|
"learning_rate": 8.49126331382102e-08, |
|
"logits/chosen": 0.41432857513427734, |
|
"logits/rejected": 0.9864773750305176, |
|
"logps/chosen": -319.98114013671875, |
|
"logps/rejected": -336.5364074707031, |
|
"loss": 0.1183, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.45458975434303284, |
|
"rewards/margins": 0.43977364897727966, |
|
"rewards/rejected": -0.8943634033203125, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7798165137614679, |
|
"grad_norm": 10.41854471188631, |
|
"learning_rate": 7.041266247556812e-08, |
|
"logits/chosen": 0.3841426968574524, |
|
"logits/rejected": 0.9351291656494141, |
|
"logps/chosen": -334.6560974121094, |
|
"logps/rejected": -349.6623229980469, |
|
"loss": 0.1152, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.4666348397731781, |
|
"rewards/margins": 0.4301484227180481, |
|
"rewards/rejected": -0.8967832326889038, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8027522935779816, |
|
"grad_norm": 9.996573814918259, |
|
"learning_rate": 5.706553665319955e-08, |
|
"logits/chosen": 0.6401292085647583, |
|
"logits/rejected": 1.3996922969818115, |
|
"logps/chosen": -324.6096496582031, |
|
"logps/rejected": -337.38555908203125, |
|
"loss": 0.1213, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.5011857151985168, |
|
"rewards/margins": 0.4960555136203766, |
|
"rewards/rejected": -0.997241199016571, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8027522935779816, |
|
"eval_logits/chosen": 0.5151604413986206, |
|
"eval_logits/rejected": 1.5740225315093994, |
|
"eval_logps/chosen": -329.048583984375, |
|
"eval_logps/rejected": -348.0972900390625, |
|
"eval_loss": 0.113578200340271, |
|
"eval_rewards/accuracies": 0.7629310488700867, |
|
"eval_rewards/chosen": -0.4395850598812103, |
|
"eval_rewards/margins": 0.5761635303497314, |
|
"eval_rewards/rejected": -1.0157485008239746, |
|
"eval_runtime": 92.2267, |
|
"eval_samples_per_second": 19.712, |
|
"eval_steps_per_second": 0.314, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8256880733944955, |
|
"grad_norm": 8.23863269387286, |
|
"learning_rate": 4.4956936350761005e-08, |
|
"logits/chosen": 0.49835261702537537, |
|
"logits/rejected": 1.0901247262954712, |
|
"logps/chosen": -292.41424560546875, |
|
"logps/rejected": -345.85540771484375, |
|
"loss": 0.1131, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.40094178915023804, |
|
"rewards/margins": 0.4722411632537842, |
|
"rewards/rejected": -0.8731829524040222, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8486238532110092, |
|
"grad_norm": 10.10305933177313, |
|
"learning_rate": 3.416459164418123e-08, |
|
"logits/chosen": 0.16921785473823547, |
|
"logits/rejected": 0.9673768877983093, |
|
"logps/chosen": -340.80059814453125, |
|
"logps/rejected": -353.12274169921875, |
|
"loss": 0.1129, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.41216129064559937, |
|
"rewards/margins": 0.4933200776576996, |
|
"rewards/rejected": -0.9054813385009766, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8715596330275229, |
|
"grad_norm": 9.150940276647933, |
|
"learning_rate": 2.475778302439524e-08, |
|
"logits/chosen": 0.3220874071121216, |
|
"logits/rejected": 1.2661678791046143, |
|
"logps/chosen": -332.2058410644531, |
|
"logps/rejected": -351.96856689453125, |
|
"loss": 0.1174, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.35827815532684326, |
|
"rewards/margins": 0.5848454236984253, |
|
"rewards/rejected": -0.9431236386299133, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8944954128440367, |
|
"grad_norm": 9.488885903438641, |
|
"learning_rate": 1.6796896657433805e-08, |
|
"logits/chosen": 0.6681944727897644, |
|
"logits/rejected": 1.2935806512832642, |
|
"logps/chosen": -292.4158630371094, |
|
"logps/rejected": -315.7539978027344, |
|
"loss": 0.123, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.40458402037620544, |
|
"rewards/margins": 0.4598938524723053, |
|
"rewards/rejected": -0.8644779324531555, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"grad_norm": 9.000204295912624, |
|
"learning_rate": 1.0333036740834855e-08, |
|
"logits/chosen": 0.7832537889480591, |
|
"logits/rejected": 1.2279746532440186, |
|
"logps/chosen": -260.876708984375, |
|
"logps/rejected": -314.25390625, |
|
"loss": 0.12, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3631708323955536, |
|
"rewards/margins": 0.4872121214866638, |
|
"rewards/rejected": -0.8503829836845398, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"eval_logits/chosen": 0.40672120451927185, |
|
"eval_logits/rejected": 1.4352502822875977, |
|
"eval_logps/chosen": -324.5692443847656, |
|
"eval_logps/rejected": -342.49981689453125, |
|
"eval_loss": 0.11321888864040375, |
|
"eval_rewards/accuracies": 0.7543103694915771, |
|
"eval_rewards/chosen": -0.39479103684425354, |
|
"eval_rewards/margins": 0.5649827122688293, |
|
"eval_rewards/rejected": -0.9597737789154053, |
|
"eval_runtime": 92.1464, |
|
"eval_samples_per_second": 19.729, |
|
"eval_steps_per_second": 0.315, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9403669724770642, |
|
"grad_norm": 12.797858678996972, |
|
"learning_rate": 5.4076974448211685e-09, |
|
"logits/chosen": 0.733371913433075, |
|
"logits/rejected": 1.6556295156478882, |
|
"logps/chosen": -313.32049560546875, |
|
"logps/rejected": -336.0906677246094, |
|
"loss": 0.1171, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.42368635535240173, |
|
"rewards/margins": 0.5567531585693359, |
|
"rewards/rejected": -0.9804395437240601, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.963302752293578, |
|
"grad_norm": 11.330514295442143, |
|
"learning_rate": 2.052496544188487e-09, |
|
"logits/chosen": 0.8590083122253418, |
|
"logits/rejected": 1.5339264869689941, |
|
"logps/chosen": -304.95831298828125, |
|
"logps/rejected": -347.80169677734375, |
|
"loss": 0.1153, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4551934599876404, |
|
"rewards/margins": 0.5640500783920288, |
|
"rewards/rejected": -1.019243597984314, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9862385321100917, |
|
"grad_norm": 9.086504274831077, |
|
"learning_rate": 2.889724508297886e-10, |
|
"logits/chosen": 0.5266289710998535, |
|
"logits/rejected": 1.5435163974761963, |
|
"logps/chosen": -344.6377868652344, |
|
"logps/rejected": -336.5421447753906, |
|
"loss": 0.1158, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.41462403535842896, |
|
"rewards/margins": 0.5104321837425232, |
|
"rewards/rejected": -0.9250561594963074, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 436, |
|
"total_flos": 0.0, |
|
"train_loss": 0.13218348616853767, |
|
"train_runtime": 11343.5766, |
|
"train_samples_per_second": 4.915, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 436, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|