|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 1751, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.70925584855601, |
|
"learning_rate": 2.8409090909090907e-09, |
|
"logits/chosen": -2.915217876434326, |
|
"logits/rejected": -2.758937358856201, |
|
"logps/chosen": -1027.0467529296875, |
|
"logps/rejected": -801.54931640625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.305868983825744, |
|
"learning_rate": 2.8409090909090908e-08, |
|
"logits/chosen": -2.853972911834717, |
|
"logits/rejected": -2.8207626342773438, |
|
"logps/chosen": -688.71875, |
|
"logps/rejected": -695.2182006835938, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": -8.214871922973543e-05, |
|
"rewards/margins": 0.00015560892643406987, |
|
"rewards/rejected": -0.00023775763111189008, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 2.40743980345329, |
|
"learning_rate": 5.6818181818181815e-08, |
|
"logits/chosen": -3.022716999053955, |
|
"logits/rejected": -2.9431874752044678, |
|
"logps/chosen": -762.6065063476562, |
|
"logps/rejected": -704.7117919921875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -6.342886626953259e-05, |
|
"rewards/margins": 0.00014013503096066415, |
|
"rewards/rejected": -0.0002035639190580696, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.104625945949737, |
|
"learning_rate": 8.522727272727271e-08, |
|
"logits/chosen": -2.8718769550323486, |
|
"logits/rejected": -2.782773494720459, |
|
"logps/chosen": -806.1029052734375, |
|
"logps/rejected": -715.8812255859375, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.0003297650837339461, |
|
"rewards/margins": -0.00031846793717704713, |
|
"rewards/rejected": 0.0006482329918071628, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 2.55572189108705, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"logits/chosen": -2.820253849029541, |
|
"logits/rejected": -2.7977638244628906, |
|
"logps/chosen": -628.6044921875, |
|
"logps/rejected": -596.6134643554688, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.0016179379308596253, |
|
"rewards/margins": 0.0007343190954998136, |
|
"rewards/rejected": 0.0008836188353598118, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.231257874310367, |
|
"learning_rate": 1.4204545454545455e-07, |
|
"logits/chosen": -2.891606569290161, |
|
"logits/rejected": -2.8657639026641846, |
|
"logps/chosen": -737.377197265625, |
|
"logps/rejected": -629.8726806640625, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.00467439740896225, |
|
"rewards/margins": 0.0006132819689810276, |
|
"rewards/rejected": 0.004061114974319935, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.205514048621008, |
|
"learning_rate": 1.7045454545454543e-07, |
|
"logits/chosen": -2.945676326751709, |
|
"logits/rejected": -2.934192419052124, |
|
"logps/chosen": -706.3095703125, |
|
"logps/rejected": -644.1196899414062, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.008496693335473537, |
|
"rewards/margins": 0.0008374860626645386, |
|
"rewards/rejected": 0.007659205701202154, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.011345495557958, |
|
"learning_rate": 1.9886363636363636e-07, |
|
"logits/chosen": -2.988844394683838, |
|
"logits/rejected": -2.8398101329803467, |
|
"logps/chosen": -726.2923583984375, |
|
"logps/rejected": -645.0869140625, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.01823153905570507, |
|
"rewards/margins": 0.0032039093784987926, |
|
"rewards/rejected": 0.01502762921154499, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.8344720832998354, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits/chosen": -2.8927783966064453, |
|
"logits/rejected": -2.8410775661468506, |
|
"logps/chosen": -790.5596923828125, |
|
"logps/rejected": -713.123046875, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.03424953669309616, |
|
"rewards/margins": 0.00334340101107955, |
|
"rewards/rejected": 0.030906137079000473, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.005554229830089, |
|
"learning_rate": 2.5568181818181816e-07, |
|
"logits/chosen": -2.9193904399871826, |
|
"logits/rejected": -2.8237431049346924, |
|
"logps/chosen": -765.6085815429688, |
|
"logps/rejected": -687.0435791015625, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.052040331065654755, |
|
"rewards/margins": 0.007134866900742054, |
|
"rewards/rejected": 0.044905465096235275, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.7536228949783403, |
|
"learning_rate": 2.840909090909091e-07, |
|
"logits/chosen": -2.7679712772369385, |
|
"logits/rejected": -2.750290870666504, |
|
"logps/chosen": -661.1817626953125, |
|
"logps/rejected": -607.760986328125, |
|
"loss": 0.6884, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.06972592324018478, |
|
"rewards/margins": 0.008985200896859169, |
|
"rewards/rejected": 0.06074073165655136, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_logits/chosen": -2.8753762245178223, |
|
"eval_logits/rejected": -2.778611421585083, |
|
"eval_logps/chosen": -714.9249877929688, |
|
"eval_logps/rejected": -645.173095703125, |
|
"eval_loss": 0.6886128187179565, |
|
"eval_rewards/accuracies": 0.5647435784339905, |
|
"eval_rewards/chosen": 0.08787301182746887, |
|
"eval_rewards/margins": 0.010522950440645218, |
|
"eval_rewards/rejected": 0.07735005766153336, |
|
"eval_runtime": 837.7219, |
|
"eval_samples_per_second": 14.863, |
|
"eval_steps_per_second": 0.466, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.989207588161087, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.879096508026123, |
|
"logits/rejected": -2.6906533241271973, |
|
"logps/chosen": -679.3154296875, |
|
"logps/rejected": -712.8575439453125, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.0985824465751648, |
|
"rewards/margins": 0.0015855018282309175, |
|
"rewards/rejected": 0.09699694812297821, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.261619042072944, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"logits/chosen": -2.7942328453063965, |
|
"logits/rejected": -2.7030820846557617, |
|
"logps/chosen": -665.7606201171875, |
|
"logps/rejected": -582.5177612304688, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.11793726682662964, |
|
"rewards/margins": 0.015646493062376976, |
|
"rewards/rejected": 0.10229077190160751, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.270843600939803, |
|
"learning_rate": 3.693181818181818e-07, |
|
"logits/chosen": -2.782252788543701, |
|
"logits/rejected": -2.760941743850708, |
|
"logps/chosen": -715.648193359375, |
|
"logps/rejected": -647.896484375, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.14008425176143646, |
|
"rewards/margins": 0.015175814740359783, |
|
"rewards/rejected": 0.124908447265625, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.2952967047901565, |
|
"learning_rate": 3.977272727272727e-07, |
|
"logits/chosen": -2.753657817840576, |
|
"logits/rejected": -2.718437910079956, |
|
"logps/chosen": -732.1031494140625, |
|
"logps/rejected": -675.71435546875, |
|
"loss": 0.6835, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.15980581939220428, |
|
"rewards/margins": 0.014441452920436859, |
|
"rewards/rejected": 0.14536437392234802, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.0745365771074096, |
|
"learning_rate": 4.2613636363636364e-07, |
|
"logits/chosen": -2.818753480911255, |
|
"logits/rejected": -2.866856098175049, |
|
"logps/chosen": -700.4889526367188, |
|
"logps/rejected": -673.15234375, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.1768052875995636, |
|
"rewards/margins": 0.01371350884437561, |
|
"rewards/rejected": 0.163091778755188, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.3049728868710417, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -2.6952528953552246, |
|
"logits/rejected": -2.6161999702453613, |
|
"logps/chosen": -756.7824096679688, |
|
"logps/rejected": -669.5311279296875, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.22233958542346954, |
|
"rewards/margins": 0.03233207389712334, |
|
"rewards/rejected": 0.1900075227022171, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.503833153793772, |
|
"learning_rate": 4.829545454545455e-07, |
|
"logits/chosen": -2.7758500576019287, |
|
"logits/rejected": -2.693150281906128, |
|
"logps/chosen": -721.1317138671875, |
|
"logps/rejected": -693.0599365234375, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.2289244681596756, |
|
"rewards/margins": 0.01609339565038681, |
|
"rewards/rejected": 0.212831050157547, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.963645529752969, |
|
"learning_rate": 4.999920426892062e-07, |
|
"logits/chosen": -2.728468418121338, |
|
"logits/rejected": -2.7938730716705322, |
|
"logps/chosen": -697.4301147460938, |
|
"logps/rejected": -639.5413818359375, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.22587458789348602, |
|
"rewards/margins": 0.011290490627288818, |
|
"rewards/rejected": 0.214584082365036, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.9105830549467013, |
|
"learning_rate": 4.999025287600885e-07, |
|
"logits/chosen": -2.569119453430176, |
|
"logits/rejected": -2.600816011428833, |
|
"logps/chosen": -764.7642211914062, |
|
"logps/rejected": -736.1318359375, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.24947170913219452, |
|
"rewards/margins": 0.015052946284413338, |
|
"rewards/rejected": 0.23441874980926514, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 2.1951075055673153, |
|
"learning_rate": 4.997135899956001e-07, |
|
"logits/chosen": -2.7701027393341064, |
|
"logits/rejected": -2.6743502616882324, |
|
"logps/chosen": -697.3702392578125, |
|
"logps/rejected": -623.8272705078125, |
|
"loss": 0.6769, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.2433600127696991, |
|
"rewards/margins": 0.034381039440631866, |
|
"rewards/rejected": 0.20897898077964783, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_logits/chosen": -2.6971323490142822, |
|
"eval_logits/rejected": -2.6093711853027344, |
|
"eval_logps/chosen": -698.2555541992188, |
|
"eval_logps/rejected": -630.9727783203125, |
|
"eval_loss": 0.6808642745018005, |
|
"eval_rewards/accuracies": 0.5746794939041138, |
|
"eval_rewards/chosen": 0.25456854701042175, |
|
"eval_rewards/margins": 0.03521544486284256, |
|
"eval_rewards/rejected": 0.2193530946969986, |
|
"eval_runtime": 833.7444, |
|
"eval_samples_per_second": 14.934, |
|
"eval_steps_per_second": 0.468, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.386315180732009, |
|
"learning_rate": 4.994253015658708e-07, |
|
"logits/chosen": -2.6340126991271973, |
|
"logits/rejected": -2.6266329288482666, |
|
"logps/chosen": -687.16845703125, |
|
"logps/rejected": -614.4462280273438, |
|
"loss": 0.682, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.24873919785022736, |
|
"rewards/margins": 0.029811348766088486, |
|
"rewards/rejected": 0.21892781555652618, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.2083799767077594, |
|
"learning_rate": 4.990377781677378e-07, |
|
"logits/chosen": -2.6552016735076904, |
|
"logits/rejected": -2.60087251663208, |
|
"logps/chosen": -786.2576904296875, |
|
"logps/rejected": -686.0176391601562, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.29123130440711975, |
|
"rewards/margins": 0.05502746254205704, |
|
"rewards/rejected": 0.23620383441448212, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 2.096783018708116, |
|
"learning_rate": 4.985511739791128e-07, |
|
"logits/chosen": -2.7540948390960693, |
|
"logits/rejected": -2.6966335773468018, |
|
"logps/chosen": -676.9022216796875, |
|
"logps/rejected": -581.7147827148438, |
|
"loss": 0.6785, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.25563615560531616, |
|
"rewards/margins": 0.03196953982114792, |
|
"rewards/rejected": 0.22366662323474884, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.084361296501474, |
|
"learning_rate": 4.979656825976425e-07, |
|
"logits/chosen": -2.655158281326294, |
|
"logits/rejected": -2.5282509326934814, |
|
"logps/chosen": -687.6748657226562, |
|
"logps/rejected": -673.4171142578125, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.29197677969932556, |
|
"rewards/margins": 0.03262047842144966, |
|
"rewards/rejected": 0.2593563497066498, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.391265956326263, |
|
"learning_rate": 4.972815369636832e-07, |
|
"logits/chosen": -2.5233607292175293, |
|
"logits/rejected": -2.435659170150757, |
|
"logps/chosen": -794.3279418945312, |
|
"logps/rejected": -715.5684204101562, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.3191401958465576, |
|
"rewards/margins": 0.04753274470567703, |
|
"rewards/rejected": 0.2716074585914612, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.149818689029419, |
|
"learning_rate": 4.964990092676262e-07, |
|
"logits/chosen": -2.6722426414489746, |
|
"logits/rejected": -2.4782052040100098, |
|
"logps/chosen": -764.1699829101562, |
|
"logps/rejected": -645.8834228515625, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.31941699981689453, |
|
"rewards/margins": 0.07869166880846024, |
|
"rewards/rejected": 0.2407253533601761, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.035030934228571, |
|
"learning_rate": 4.956184108416046e-07, |
|
"logits/chosen": -2.5263333320617676, |
|
"logits/rejected": -2.5625710487365723, |
|
"logps/chosen": -683.1992797851562, |
|
"logps/rejected": -625.9034423828125, |
|
"loss": 0.6778, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.27234965562820435, |
|
"rewards/margins": 0.041879888623952866, |
|
"rewards/rejected": 0.23046977818012238, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.9569259926485463, |
|
"learning_rate": 4.946400920356287e-07, |
|
"logits/chosen": -2.7372500896453857, |
|
"logits/rejected": -2.6020898818969727, |
|
"logps/chosen": -719.0564575195312, |
|
"logps/rejected": -637.1673583984375, |
|
"loss": 0.6754, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.28345972299575806, |
|
"rewards/margins": 0.04788089543581009, |
|
"rewards/rejected": 0.23557886481285095, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.0726336037515845, |
|
"learning_rate": 4.935644420781978e-07, |
|
"logits/chosen": -2.479055881500244, |
|
"logits/rejected": -2.4796347618103027, |
|
"logps/chosen": -706.4686889648438, |
|
"logps/rejected": -655.3255004882812, |
|
"loss": 0.6822, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.3067619204521179, |
|
"rewards/margins": 0.023757971823215485, |
|
"rewards/rejected": 0.28300395607948303, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.7082840348298203, |
|
"learning_rate": 4.923918889214436e-07, |
|
"logits/chosen": -2.414355754852295, |
|
"logits/rejected": -2.386721611022949, |
|
"logps/chosen": -693.1402587890625, |
|
"logps/rejected": -700.3583984375, |
|
"loss": 0.6734, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.2925127148628235, |
|
"rewards/margins": 0.026889953762292862, |
|
"rewards/rejected": 0.26562273502349854, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_logits/chosen": -2.6062464714050293, |
|
"eval_logits/rejected": -2.522627353668213, |
|
"eval_logps/chosen": -693.9142456054688, |
|
"eval_logps/rejected": -628.194580078125, |
|
"eval_loss": 0.6755158305168152, |
|
"eval_rewards/accuracies": 0.5833333134651184, |
|
"eval_rewards/chosen": 0.2979806959629059, |
|
"eval_rewards/margins": 0.0508454330265522, |
|
"eval_rewards/rejected": 0.24713526666164398, |
|
"eval_runtime": 811.8177, |
|
"eval_samples_per_second": 15.337, |
|
"eval_steps_per_second": 0.48, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.075320347171257, |
|
"learning_rate": 4.91122899070868e-07, |
|
"logits/chosen": -2.5357015132904053, |
|
"logits/rejected": -2.490417003631592, |
|
"logps/chosen": -755.467041015625, |
|
"logps/rejected": -663.8169555664062, |
|
"loss": 0.6797, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.3274896740913391, |
|
"rewards/margins": 0.05784725025296211, |
|
"rewards/rejected": 0.2696423828601837, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.145836596773884, |
|
"learning_rate": 4.897579773997414e-07, |
|
"logits/chosen": -2.5054690837860107, |
|
"logits/rejected": -2.509059429168701, |
|
"logps/chosen": -684.943359375, |
|
"logps/rejected": -603.5328369140625, |
|
"loss": 0.6739, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.30167508125305176, |
|
"rewards/margins": 0.05937837436795235, |
|
"rewards/rejected": 0.2422967255115509, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.048334012658428, |
|
"learning_rate": 4.882976669482367e-07, |
|
"logits/chosen": -2.792116165161133, |
|
"logits/rejected": -2.664175271987915, |
|
"logps/chosen": -693.2596435546875, |
|
"logps/rejected": -672.1799926757812, |
|
"loss": 0.6794, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.3001101613044739, |
|
"rewards/margins": 0.024096574634313583, |
|
"rewards/rejected": 0.2760135531425476, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.008493005665859, |
|
"learning_rate": 4.867425487073786e-07, |
|
"logits/chosen": -2.3512167930603027, |
|
"logits/rejected": -2.393057346343994, |
|
"logps/chosen": -756.4876708984375, |
|
"logps/rejected": -652.0723876953125, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.3165535628795624, |
|
"rewards/margins": 0.07065500319004059, |
|
"rewards/rejected": 0.2458985596895218, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.1734372333374883, |
|
"learning_rate": 4.850932413878934e-07, |
|
"logits/chosen": -2.340710401535034, |
|
"logits/rejected": -2.298480272293091, |
|
"logps/chosen": -745.8781127929688, |
|
"logps/rejected": -674.5993041992188, |
|
"loss": 0.6757, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.2814175486564636, |
|
"rewards/margins": 0.06212268024682999, |
|
"rewards/rejected": 0.21929486095905304, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.4301611366343288, |
|
"learning_rate": 4.833504011740522e-07, |
|
"logits/chosen": -2.3556628227233887, |
|
"logits/rejected": -2.399705410003662, |
|
"logps/chosen": -717.0030517578125, |
|
"logps/rejected": -651.791259765625, |
|
"loss": 0.6731, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.2667848467826843, |
|
"rewards/margins": 0.041503336280584335, |
|
"rewards/rejected": 0.2252815216779709, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 2.1135061185396005, |
|
"learning_rate": 4.815147214626056e-07, |
|
"logits/chosen": -2.3883965015411377, |
|
"logits/rejected": -2.427729845046997, |
|
"logps/chosen": -803.894775390625, |
|
"logps/rejected": -620.780517578125, |
|
"loss": 0.6723, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.3185453712940216, |
|
"rewards/margins": 0.08323224633932114, |
|
"rewards/rejected": 0.23531313240528107, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.3634608846601175, |
|
"learning_rate": 4.795869325869116e-07, |
|
"logits/chosen": -2.328643560409546, |
|
"logits/rejected": -2.403995990753174, |
|
"logps/chosen": -687.8956909179688, |
|
"logps/rejected": -643.009765625, |
|
"loss": 0.6724, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.2998669743537903, |
|
"rewards/margins": 0.047370560467243195, |
|
"rewards/rejected": 0.2524964213371277, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 2.196700529213078, |
|
"learning_rate": 4.775678015263708e-07, |
|
"logits/chosen": -2.3591794967651367, |
|
"logits/rejected": -2.330289125442505, |
|
"logps/chosen": -672.6624755859375, |
|
"logps/rejected": -570.5098266601562, |
|
"loss": 0.6688, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.3420453667640686, |
|
"rewards/margins": 0.07790260016918182, |
|
"rewards/rejected": 0.264142781496048, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.1383346208020733, |
|
"learning_rate": 4.7545813160127845e-07, |
|
"logits/chosen": -2.499473810195923, |
|
"logits/rejected": -2.3798794746398926, |
|
"logps/chosen": -691.5140380859375, |
|
"logps/rejected": -599.8192749023438, |
|
"loss": 0.6684, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.3622116148471832, |
|
"rewards/margins": 0.1057838648557663, |
|
"rewards/rejected": 0.25642773509025574, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_logits/chosen": -2.4781997203826904, |
|
"eval_logits/rejected": -2.400707960128784, |
|
"eval_logps/chosen": -688.9108276367188, |
|
"eval_logps/rejected": -624.684814453125, |
|
"eval_loss": 0.671257734298706, |
|
"eval_rewards/accuracies": 0.5887820720672607, |
|
"eval_rewards/chosen": 0.34801578521728516, |
|
"eval_rewards/margins": 0.06578347086906433, |
|
"eval_rewards/rejected": 0.2822323143482208, |
|
"eval_runtime": 834.4654, |
|
"eval_samples_per_second": 14.921, |
|
"eval_steps_per_second": 0.467, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.1073018777983776, |
|
"learning_rate": 4.732587621532214e-07, |
|
"logits/chosen": -2.4502573013305664, |
|
"logits/rejected": -2.4475185871124268, |
|
"logps/chosen": -683.3175659179688, |
|
"logps/rejected": -611.3568115234375, |
|
"loss": 0.674, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.3536176085472107, |
|
"rewards/margins": 0.057841986417770386, |
|
"rewards/rejected": 0.29577556252479553, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 2.1659650914628155, |
|
"learning_rate": 4.709705682111412e-07, |
|
"logits/chosen": -2.4668691158294678, |
|
"logits/rejected": -2.4713261127471924, |
|
"logps/chosen": -733.1044921875, |
|
"logps/rejected": -634.1087036132812, |
|
"loss": 0.6715, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.3784140944480896, |
|
"rewards/margins": 0.07397519052028656, |
|
"rewards/rejected": 0.30443888902664185, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 1.9726863761871838, |
|
"learning_rate": 4.68594460143201e-07, |
|
"logits/chosen": -2.469764232635498, |
|
"logits/rejected": -2.364992618560791, |
|
"logps/chosen": -707.831298828125, |
|
"logps/rejected": -595.4861450195312, |
|
"loss": 0.6661, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.3741925060749054, |
|
"rewards/margins": 0.128498837351799, |
|
"rewards/rejected": 0.245693638920784, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.006300708272278, |
|
"learning_rate": 4.661313832945903e-07, |
|
"logits/chosen": -2.4452786445617676, |
|
"logits/rejected": -2.4417500495910645, |
|
"logps/chosen": -713.32763671875, |
|
"logps/rejected": -662.4024047851562, |
|
"loss": 0.6711, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.35015571117401123, |
|
"rewards/margins": 0.06933808326721191, |
|
"rewards/rejected": 0.28081759810447693, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.6249198820895963, |
|
"learning_rate": 4.635823176114162e-07, |
|
"logits/chosen": -2.2743911743164062, |
|
"logits/rejected": -2.248422384262085, |
|
"logps/chosen": -747.7679443359375, |
|
"logps/rejected": -706.8677978515625, |
|
"loss": 0.6784, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.326291024684906, |
|
"rewards/margins": 0.049748796969652176, |
|
"rewards/rejected": 0.2765422463417053, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 2.2372785611262884, |
|
"learning_rate": 4.6094827725082684e-07, |
|
"logits/chosen": -2.3613054752349854, |
|
"logits/rejected": -2.334916353225708, |
|
"logps/chosen": -705.9910888671875, |
|
"logps/rejected": -635.3782958984375, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.31965959072113037, |
|
"rewards/margins": 0.08668084442615509, |
|
"rewards/rejected": 0.2329787313938141, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.544144441997542, |
|
"learning_rate": 4.582303101775248e-07, |
|
"logits/chosen": -2.4770946502685547, |
|
"logits/rejected": -2.3431713581085205, |
|
"logps/chosen": -731.3635864257812, |
|
"logps/rejected": -648.119873046875, |
|
"loss": 0.6706, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.3089877963066101, |
|
"rewards/margins": 0.08075568079948425, |
|
"rewards/rejected": 0.22823214530944824, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 2.269046581247883, |
|
"learning_rate": 4.5542949774682956e-07, |
|
"logits/chosen": -2.2857303619384766, |
|
"logits/rejected": -2.31972074508667, |
|
"logps/chosen": -721.4352416992188, |
|
"logps/rejected": -664.8331909179688, |
|
"loss": 0.6679, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.34746861457824707, |
|
"rewards/margins": 0.056263603270053864, |
|
"rewards/rejected": 0.2912050187587738, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.5334126160826034, |
|
"learning_rate": 4.5254695427445517e-07, |
|
"logits/chosen": -2.4382026195526123, |
|
"logits/rejected": -2.4473910331726074, |
|
"logps/chosen": -746.6915283203125, |
|
"logps/rejected": -620.1527099609375, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.37302321195602417, |
|
"rewards/margins": 0.08707480877637863, |
|
"rewards/rejected": 0.28594842553138733, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.319795440046946, |
|
"learning_rate": 4.4958382659317536e-07, |
|
"logits/chosen": -2.3574156761169434, |
|
"logits/rejected": -2.31435489654541, |
|
"logps/chosen": -725.4437866210938, |
|
"logps/rejected": -665.6046142578125, |
|
"loss": 0.6647, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.3333023190498352, |
|
"rewards/margins": 0.03737147897481918, |
|
"rewards/rejected": 0.29593080282211304, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/chosen": -2.374924659729004, |
|
"eval_logits/rejected": -2.302616596221924, |
|
"eval_logps/chosen": -688.75927734375, |
|
"eval_logps/rejected": -625.8477172851562, |
|
"eval_loss": 0.6670830249786377, |
|
"eval_rewards/accuracies": 0.6048076748847961, |
|
"eval_rewards/chosen": 0.349530965089798, |
|
"eval_rewards/margins": 0.07892686128616333, |
|
"eval_rewards/rejected": 0.27060407400131226, |
|
"eval_runtime": 839.5836, |
|
"eval_samples_per_second": 14.83, |
|
"eval_steps_per_second": 0.465, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 2.36756980006423, |
|
"learning_rate": 4.465412935965506e-07, |
|
"logits/chosen": -2.3698456287384033, |
|
"logits/rejected": -2.275355577468872, |
|
"logps/chosen": -692.2882080078125, |
|
"logps/rejected": -618.537109375, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.33824020624160767, |
|
"rewards/margins": 0.03582638129591942, |
|
"rewards/rejected": 0.30241379141807556, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.0999015400485477, |
|
"learning_rate": 4.434205657698999e-07, |
|
"logits/chosen": -2.3519811630249023, |
|
"logits/rejected": -2.349076747894287, |
|
"logps/chosen": -606.4388427734375, |
|
"logps/rejected": -607.9388427734375, |
|
"loss": 0.6803, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.3264550268650055, |
|
"rewards/margins": 0.020858457311987877, |
|
"rewards/rejected": 0.3055965304374695, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.8949235983581547, |
|
"learning_rate": 4.402228847087046e-07, |
|
"logits/chosen": -2.265383720397949, |
|
"logits/rejected": -2.20546555519104, |
|
"logps/chosen": -687.4367065429688, |
|
"logps/rejected": -624.896728515625, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.37758034467697144, |
|
"rewards/margins": 0.0828850045800209, |
|
"rewards/rejected": 0.2946953773498535, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.3607848761065453, |
|
"learning_rate": 4.36949522624633e-07, |
|
"logits/chosen": -2.2971131801605225, |
|
"logits/rejected": -2.2627434730529785, |
|
"logps/chosen": -667.89599609375, |
|
"logps/rejected": -629.7573852539062, |
|
"loss": 0.6669, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.3096492886543274, |
|
"rewards/margins": 0.09841451048851013, |
|
"rewards/rejected": 0.21123476326465607, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.3424108049054357, |
|
"learning_rate": 4.33601781839386e-07, |
|
"logits/chosen": -2.238157272338867, |
|
"logits/rejected": -2.2228469848632812, |
|
"logps/chosen": -761.5797119140625, |
|
"logps/rejected": -710.4862670898438, |
|
"loss": 0.6586, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.3563387989997864, |
|
"rewards/margins": 0.0723680704832077, |
|
"rewards/rejected": 0.28397077322006226, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.0984681365446556, |
|
"learning_rate": 4.301809942665625e-07, |
|
"logits/chosen": -2.3568809032440186, |
|
"logits/rejected": -2.238476037979126, |
|
"logps/chosen": -666.4149780273438, |
|
"logps/rejected": -600.7111206054688, |
|
"loss": 0.669, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.29119664430618286, |
|
"rewards/margins": 0.07588323205709457, |
|
"rewards/rejected": 0.2153133898973465, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.428757148167148, |
|
"learning_rate": 4.2668852088175145e-07, |
|
"logits/chosen": -2.1850478649139404, |
|
"logits/rejected": -2.2067627906799316, |
|
"logps/chosen": -727.734130859375, |
|
"logps/rejected": -701.0474243164062, |
|
"loss": 0.6748, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.28803592920303345, |
|
"rewards/margins": 0.04421677812933922, |
|
"rewards/rejected": 0.24381914734840393, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 3.3105303898170333, |
|
"learning_rate": 4.231257511810618e-07, |
|
"logits/chosen": -2.2972073554992676, |
|
"logits/rejected": -2.2880618572235107, |
|
"logps/chosen": -720.8897705078125, |
|
"logps/rejected": -703.8472900390625, |
|
"loss": 0.6734, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.29478007555007935, |
|
"rewards/margins": 0.058770112693309784, |
|
"rewards/rejected": 0.23600995540618896, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.414444271215555, |
|
"learning_rate": 4.1949410262830523e-07, |
|
"logits/chosen": -2.1948752403259277, |
|
"logits/rejected": -2.104328155517578, |
|
"logps/chosen": -677.14501953125, |
|
"logps/rejected": -613.4793701171875, |
|
"loss": 0.6555, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.3152075409889221, |
|
"rewards/margins": 0.1456335484981537, |
|
"rewards/rejected": 0.16957402229309082, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.390588473535724, |
|
"learning_rate": 4.1579502009105164e-07, |
|
"logits/chosen": -2.2218821048736572, |
|
"logits/rejected": -2.175379991531372, |
|
"logps/chosen": -660.20458984375, |
|
"logps/rejected": -598.1253051757812, |
|
"loss": 0.6598, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.28508657217025757, |
|
"rewards/margins": 0.10129264742136002, |
|
"rewards/rejected": 0.18379393219947815, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_logits/chosen": -2.2344727516174316, |
|
"eval_logits/rejected": -2.1693804264068604, |
|
"eval_logps/chosen": -690.60302734375, |
|
"eval_logps/rejected": -628.6143188476562, |
|
"eval_loss": 0.6635647416114807, |
|
"eval_rewards/accuracies": 0.6057692170143127, |
|
"eval_rewards/chosen": 0.3310932219028473, |
|
"eval_rewards/margins": 0.08815512806177139, |
|
"eval_rewards/rejected": 0.24293813109397888, |
|
"eval_runtime": 836.7899, |
|
"eval_samples_per_second": 14.879, |
|
"eval_steps_per_second": 0.466, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.620103064645028, |
|
"learning_rate": 4.120299752657827e-07, |
|
"logits/chosen": -2.2362682819366455, |
|
"logits/rejected": -2.1457152366638184, |
|
"logps/chosen": -733.4589233398438, |
|
"logps/rejected": -714.5718994140625, |
|
"loss": 0.6743, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.3162640631198883, |
|
"rewards/margins": 0.06924857199192047, |
|
"rewards/rejected": 0.24701550602912903, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 2.6435733608714713, |
|
"learning_rate": 4.082004660923702e-07, |
|
"logits/chosen": -2.16068696975708, |
|
"logits/rejected": -2.0863850116729736, |
|
"logps/chosen": -703.9278564453125, |
|
"logps/rejected": -639.5438842773438, |
|
"loss": 0.6648, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.3491550087928772, |
|
"rewards/margins": 0.07219501584768295, |
|
"rewards/rejected": 0.2769600450992584, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 2.4580793288834273, |
|
"learning_rate": 4.0430801615811437e-07, |
|
"logits/chosen": -2.138211965560913, |
|
"logits/rejected": -2.1236751079559326, |
|
"logps/chosen": -681.3580322265625, |
|
"logps/rejected": -662.194580078125, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.3004602789878845, |
|
"rewards/margins": 0.06852121651172638, |
|
"rewards/rejected": 0.23193907737731934, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 3.1611029274796003, |
|
"learning_rate": 4.00354174091578e-07, |
|
"logits/chosen": -2.075774669647217, |
|
"logits/rejected": -2.054658889770508, |
|
"logps/chosen": -674.4803466796875, |
|
"logps/rejected": -636.6451416015625, |
|
"loss": 0.6658, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.34417495131492615, |
|
"rewards/margins": 0.10877577215433121, |
|
"rewards/rejected": 0.23539917171001434, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 2.955455771466899, |
|
"learning_rate": 3.963405129464569e-07, |
|
"logits/chosen": -2.1316115856170654, |
|
"logits/rejected": -2.0590739250183105, |
|
"logps/chosen": -729.1641845703125, |
|
"logps/rejected": -655.9600219726562, |
|
"loss": 0.6581, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.31629639863967896, |
|
"rewards/margins": 0.0853310376405716, |
|
"rewards/rejected": 0.23096534609794617, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.20641486824199, |
|
"learning_rate": 3.922686295757347e-07, |
|
"logits/chosen": -2.1304194927215576, |
|
"logits/rejected": -2.051994800567627, |
|
"logps/chosen": -764.9317016601562, |
|
"logps/rejected": -722.5567626953125, |
|
"loss": 0.6564, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.35463058948516846, |
|
"rewards/margins": 0.09359289705753326, |
|
"rewards/rejected": 0.2610377073287964, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.3096544804003214, |
|
"learning_rate": 3.881401439963666e-07, |
|
"logits/chosen": -2.057319164276123, |
|
"logits/rejected": -2.015831232070923, |
|
"logps/chosen": -588.3761596679688, |
|
"logps/rejected": -567.8236083984375, |
|
"loss": 0.6641, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.2484780102968216, |
|
"rewards/margins": 0.057387322187423706, |
|
"rewards/rejected": 0.1910906732082367, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.8331676817387628, |
|
"learning_rate": 3.839566987447491e-07, |
|
"logits/chosen": -2.050976514816284, |
|
"logits/rejected": -2.0163493156433105, |
|
"logps/chosen": -749.3988037109375, |
|
"logps/rejected": -670.8482055664062, |
|
"loss": 0.6734, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.29123300313949585, |
|
"rewards/margins": 0.083717942237854, |
|
"rewards/rejected": 0.20751512050628662, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 2.6749133072665843, |
|
"learning_rate": 3.7971995822322947e-07, |
|
"logits/chosen": -2.019416332244873, |
|
"logits/rejected": -2.0274858474731445, |
|
"logps/chosen": -696.1351928710938, |
|
"logps/rejected": -654.2576293945312, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.31569501757621765, |
|
"rewards/margins": 0.07416489720344543, |
|
"rewards/rejected": 0.2415301501750946, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.457414583878444, |
|
"learning_rate": 3.7543160803791545e-07, |
|
"logits/chosen": -1.9960815906524658, |
|
"logits/rejected": -1.9505565166473389, |
|
"logps/chosen": -721.5948486328125, |
|
"logps/rejected": -651.5316162109375, |
|
"loss": 0.6598, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.31404954195022583, |
|
"rewards/margins": 0.10105651617050171, |
|
"rewards/rejected": 0.21299299597740173, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_logits/chosen": -1.9781278371810913, |
|
"eval_logits/rejected": -1.9252119064331055, |
|
"eval_logps/chosen": -695.4718017578125, |
|
"eval_logps/rejected": -634.3779296875, |
|
"eval_loss": 0.6605694890022278, |
|
"eval_rewards/accuracies": 0.6105769276618958, |
|
"eval_rewards/chosen": 0.28240564465522766, |
|
"eval_rewards/margins": 0.09710400551557541, |
|
"eval_rewards/rejected": 0.18530163168907166, |
|
"eval_runtime": 834.416, |
|
"eval_samples_per_second": 14.922, |
|
"eval_steps_per_second": 0.467, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.4659247559278366, |
|
"learning_rate": 3.7109335432805e-07, |
|
"logits/chosen": -1.9180772304534912, |
|
"logits/rejected": -1.9245834350585938, |
|
"logps/chosen": -758.48876953125, |
|
"logps/rejected": -619.541748046875, |
|
"loss": 0.6636, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.3083231747150421, |
|
"rewards/margins": 0.11108432710170746, |
|
"rewards/rejected": 0.19723883271217346, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 2.620943282000028, |
|
"learning_rate": 3.667069230872155e-07, |
|
"logits/chosen": -1.853564977645874, |
|
"logits/rejected": -1.833913803100586, |
|
"logps/chosen": -760.102783203125, |
|
"logps/rejected": -648.6535034179688, |
|
"loss": 0.6558, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.33468401432037354, |
|
"rewards/margins": 0.09708525240421295, |
|
"rewards/rejected": 0.23759880661964417, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.2579820514508264, |
|
"learning_rate": 3.6227405947664003e-07, |
|
"logits/chosen": -1.9256470203399658, |
|
"logits/rejected": -1.885286569595337, |
|
"logps/chosen": -790.8487548828125, |
|
"logps/rejected": -700.6820678710938, |
|
"loss": 0.6591, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.38141968846321106, |
|
"rewards/margins": 0.15458762645721436, |
|
"rewards/rejected": 0.22683203220367432, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 2.536381576442856, |
|
"learning_rate": 3.577965271308771e-07, |
|
"logits/chosen": -1.9556684494018555, |
|
"logits/rejected": -1.9257959127426147, |
|
"logps/chosen": -735.0540161132812, |
|
"logps/rejected": -654.5272216796875, |
|
"loss": 0.6585, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.3390718102455139, |
|
"rewards/margins": 0.1305239498615265, |
|
"rewards/rejected": 0.20854787528514862, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.3455105370717737, |
|
"learning_rate": 3.5327610745613546e-07, |
|
"logits/chosen": -1.9934170246124268, |
|
"logits/rejected": -1.9228801727294922, |
|
"logps/chosen": -700.4527587890625, |
|
"logps/rejected": -634.6886596679688, |
|
"loss": 0.657, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.32780513167381287, |
|
"rewards/margins": 0.12030024826526642, |
|
"rewards/rejected": 0.20750489830970764, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 2.5681519746805845, |
|
"learning_rate": 3.487145989215391e-07, |
|
"logits/chosen": -1.9263842105865479, |
|
"logits/rejected": -1.8778858184814453, |
|
"logps/chosen": -745.1176147460938, |
|
"logps/rejected": -652.140625, |
|
"loss": 0.6579, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.34303581714630127, |
|
"rewards/margins": 0.13984538614749908, |
|
"rewards/rejected": 0.203190416097641, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 2.8084666872092705, |
|
"learning_rate": 3.4411381634359796e-07, |
|
"logits/chosen": -1.9643045663833618, |
|
"logits/rejected": -1.9424177408218384, |
|
"logps/chosen": -738.8779907226562, |
|
"logps/rejected": -653.993408203125, |
|
"loss": 0.6513, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.3104199767112732, |
|
"rewards/margins": 0.0740085169672966, |
|
"rewards/rejected": 0.236411452293396, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.5437521131160805, |
|
"learning_rate": 3.3947559016417554e-07, |
|
"logits/chosen": -2.015496253967285, |
|
"logits/rejected": -1.9664264917373657, |
|
"logps/chosen": -730.8230590820312, |
|
"logps/rejected": -708.7545776367188, |
|
"loss": 0.6735, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.3310191333293915, |
|
"rewards/margins": 0.07294587790966034, |
|
"rewards/rejected": 0.2580733001232147, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 3.253152159173173, |
|
"learning_rate": 3.3480176572223885e-07, |
|
"logits/chosen": -2.022552728652954, |
|
"logits/rejected": -1.94183349609375, |
|
"logps/chosen": -637.3079223632812, |
|
"logps/rejected": -579.6361083984375, |
|
"loss": 0.6487, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.3080602288246155, |
|
"rewards/margins": 0.15390750765800476, |
|
"rewards/rejected": 0.15415272116661072, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.4835060692710464, |
|
"learning_rate": 3.300942025196824e-07, |
|
"logits/chosen": -2.063943862915039, |
|
"logits/rejected": -2.0479979515075684, |
|
"logps/chosen": -713.1132202148438, |
|
"logps/rejected": -639.5491943359375, |
|
"loss": 0.6563, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.389708936214447, |
|
"rewards/margins": 0.15992382168769836, |
|
"rewards/rejected": 0.22978505492210388, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_logits/chosen": -2.0598788261413574, |
|
"eval_logits/rejected": -2.0029659271240234, |
|
"eval_logps/chosen": -688.9520874023438, |
|
"eval_logps/rejected": -629.170654296875, |
|
"eval_loss": 0.658465564250946, |
|
"eval_rewards/accuracies": 0.6070512533187866, |
|
"eval_rewards/chosen": 0.3476024270057678, |
|
"eval_rewards/margins": 0.11022823303937912, |
|
"eval_rewards/rejected": 0.2373741716146469, |
|
"eval_runtime": 832.2648, |
|
"eval_samples_per_second": 14.96, |
|
"eval_steps_per_second": 0.469, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 2.4337446470444024, |
|
"learning_rate": 3.253547734815173e-07, |
|
"logits/chosen": -2.0798535346984863, |
|
"logits/rejected": -2.061521530151367, |
|
"logps/chosen": -715.1067504882812, |
|
"logps/rejected": -732.9197998046875, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.4086131453514099, |
|
"rewards/margins": 0.1309557557106018, |
|
"rewards/rejected": 0.2776573598384857, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 2.2842891498898275, |
|
"learning_rate": 3.2058536421071914e-07, |
|
"logits/chosen": -2.098947048187256, |
|
"logits/rejected": -2.087313652038574, |
|
"logps/chosen": -768.4358520507812, |
|
"logps/rejected": -626.5296630859375, |
|
"loss": 0.6657, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.4160265028476715, |
|
"rewards/margins": 0.11269855499267578, |
|
"rewards/rejected": 0.3033279478549957, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 2.3495187388609238, |
|
"learning_rate": 3.1578787223803294e-07, |
|
"logits/chosen": -1.9842846393585205, |
|
"logits/rejected": -1.9742408990859985, |
|
"logps/chosen": -700.257568359375, |
|
"logps/rejected": -584.7598876953125, |
|
"loss": 0.662, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.3193957507610321, |
|
"rewards/margins": 0.058227695524692535, |
|
"rewards/rejected": 0.26116806268692017, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 2.268979187768203, |
|
"learning_rate": 3.109642062670318e-07, |
|
"logits/chosen": -2.0449118614196777, |
|
"logits/rejected": -1.9595324993133545, |
|
"logps/chosen": -633.672607421875, |
|
"logps/rejected": -602.7111206054688, |
|
"loss": 0.6523, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.37880054116249084, |
|
"rewards/margins": 0.11559071391820908, |
|
"rewards/rejected": 0.26320984959602356, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.445431457272807, |
|
"learning_rate": 3.0611628541473074e-07, |
|
"logits/chosen": -2.0798251628875732, |
|
"logits/rejected": -2.0806803703308105, |
|
"logps/chosen": -776.8604125976562, |
|
"logps/rejected": -754.062255859375, |
|
"loss": 0.6676, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.4384000301361084, |
|
"rewards/margins": 0.1085997074842453, |
|
"rewards/rejected": 0.3298003077507019, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.4068587077954597, |
|
"learning_rate": 3.012460384480576e-07, |
|
"logits/chosen": -2.0535523891448975, |
|
"logits/rejected": -2.003754138946533, |
|
"logps/chosen": -708.5001220703125, |
|
"logps/rejected": -663.521240234375, |
|
"loss": 0.6466, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.4629546105861664, |
|
"rewards/margins": 0.14931170642375946, |
|
"rewards/rejected": 0.3136429190635681, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.540723241212297, |
|
"learning_rate": 2.963554030164842e-07, |
|
"logits/chosen": -1.893160104751587, |
|
"logits/rejected": -1.9689306020736694, |
|
"logps/chosen": -726.8738403320312, |
|
"logps/rejected": -671.0678100585938, |
|
"loss": 0.6633, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.4015054702758789, |
|
"rewards/margins": 0.07470430433750153, |
|
"rewards/rejected": 0.3268011808395386, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.0009975628742174, |
|
"learning_rate": 2.9144632488112416e-07, |
|
"logits/chosen": -2.020155429840088, |
|
"logits/rejected": -2.016500473022461, |
|
"logps/chosen": -721.1326904296875, |
|
"logps/rejected": -660.15966796875, |
|
"loss": 0.6551, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.38422372937202454, |
|
"rewards/margins": 0.16717380285263062, |
|
"rewards/rejected": 0.21704988181591034, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.651435689363449, |
|
"learning_rate": 2.865207571406029e-07, |
|
"logits/chosen": -2.0237433910369873, |
|
"logits/rejected": -1.9571411609649658, |
|
"logps/chosen": -692.2257690429688, |
|
"logps/rejected": -613.42431640625, |
|
"loss": 0.6684, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.34494125843048096, |
|
"rewards/margins": 0.0645814761519432, |
|
"rewards/rejected": 0.28035980463027954, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 2.5848228401368334, |
|
"learning_rate": 2.815806594540089e-07, |
|
"logits/chosen": -2.0166923999786377, |
|
"logits/rejected": -2.052598476409912, |
|
"logps/chosen": -720.5625, |
|
"logps/rejected": -678.0027465820312, |
|
"loss": 0.6636, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.40049290657043457, |
|
"rewards/margins": 0.06527121365070343, |
|
"rewards/rejected": 0.33522164821624756, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_logits/chosen": -2.0439984798431396, |
|
"eval_logits/rejected": -1.9872162342071533, |
|
"eval_logps/chosen": -688.0209350585938, |
|
"eval_logps/rejected": -628.6378784179688, |
|
"eval_loss": 0.6572163701057434, |
|
"eval_rewards/accuracies": 0.6118589639663696, |
|
"eval_rewards/chosen": 0.3569147288799286, |
|
"eval_rewards/margins": 0.11421282589435577, |
|
"eval_rewards/rejected": 0.24270187318325043, |
|
"eval_runtime": 811.7757, |
|
"eval_samples_per_second": 15.338, |
|
"eval_steps_per_second": 0.48, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 2.8783002475743547, |
|
"learning_rate": 2.7662799726123454e-07, |
|
"logits/chosen": -1.9933090209960938, |
|
"logits/rejected": -1.9611231088638306, |
|
"logps/chosen": -637.7457275390625, |
|
"logps/rejected": -599.6551513671875, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.28943049907684326, |
|
"rewards/margins": 0.0467146560549736, |
|
"rewards/rejected": 0.24271583557128906, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 3.137998102462479, |
|
"learning_rate": 2.7166474100101674e-07, |
|
"logits/chosen": -1.9679996967315674, |
|
"logits/rejected": -1.9321043491363525, |
|
"logps/chosen": -665.1378173828125, |
|
"logps/rejected": -610.6767578125, |
|
"loss": 0.6592, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.3013032078742981, |
|
"rewards/margins": 0.11742081493139267, |
|
"rewards/rejected": 0.18388235569000244, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.5841977136958727, |
|
"learning_rate": 2.6669286532698964e-07, |
|
"logits/chosen": -1.9860007762908936, |
|
"logits/rejected": -1.9256454706192017, |
|
"logps/chosen": -844.6925659179688, |
|
"logps/rejected": -707.6446533203125, |
|
"loss": 0.6465, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.38923513889312744, |
|
"rewards/margins": 0.14900648593902588, |
|
"rewards/rejected": 0.24022865295410156, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.7091636904536887, |
|
"learning_rate": 2.6171434832205953e-07, |
|
"logits/chosen": -1.9356151819229126, |
|
"logits/rejected": -1.9506133794784546, |
|
"logps/chosen": -715.4945068359375, |
|
"logps/rejected": -654.3660278320312, |
|
"loss": 0.6441, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.35090070962905884, |
|
"rewards/margins": 0.0896492600440979, |
|
"rewards/rejected": 0.26125144958496094, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 2.724306235445107, |
|
"learning_rate": 2.567311707114157e-07, |
|
"logits/chosen": -2.0761585235595703, |
|
"logits/rejected": -2.057460069656372, |
|
"logps/chosen": -638.9573364257812, |
|
"logps/rejected": -622.14697265625, |
|
"loss": 0.6766, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.3293986916542053, |
|
"rewards/margins": 0.06820010393857956, |
|
"rewards/rejected": 0.26119858026504517, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.752955939468096, |
|
"learning_rate": 2.5174531507449037e-07, |
|
"logits/chosen": -2.070046901702881, |
|
"logits/rejected": -1.9810707569122314, |
|
"logps/chosen": -616.99169921875, |
|
"logps/rejected": -543.088134765625, |
|
"loss": 0.657, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.3693603575229645, |
|
"rewards/margins": 0.14052534103393555, |
|
"rewards/rejected": 0.22883501648902893, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 2.4658663047675575, |
|
"learning_rate": 2.4675876505618045e-07, |
|
"logits/chosen": -2.007403612136841, |
|
"logits/rejected": -2.010920763015747, |
|
"logps/chosen": -762.5975952148438, |
|
"logps/rejected": -657.7164306640625, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.3691919445991516, |
|
"rewards/margins": 0.13456907868385315, |
|
"rewards/rejected": 0.23462291061878204, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 2.8093493901482645, |
|
"learning_rate": 2.4177350457764527e-07, |
|
"logits/chosen": -1.9829814434051514, |
|
"logits/rejected": -1.9325615167617798, |
|
"logps/chosen": -760.7388305664062, |
|
"logps/rejected": -746.89697265625, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.3807603120803833, |
|
"rewards/margins": 0.1183190792798996, |
|
"rewards/rejected": 0.2624412178993225, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 3.2352313043894214, |
|
"learning_rate": 2.367915170469956e-07, |
|
"logits/chosen": -2.02712345123291, |
|
"logits/rejected": -1.865962028503418, |
|
"logps/chosen": -726.9395751953125, |
|
"logps/rejected": -638.2818603515625, |
|
"loss": 0.658, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.3744068741798401, |
|
"rewards/margins": 0.17671848833560944, |
|
"rewards/rejected": 0.19768838584423065, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 3.0201719958390263, |
|
"learning_rate": 2.3181478457018475e-07, |
|
"logits/chosen": -2.0808539390563965, |
|
"logits/rejected": -2.021824359893799, |
|
"logps/chosen": -777.3084106445312, |
|
"logps/rejected": -668.2219848632812, |
|
"loss": 0.6436, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.3560071587562561, |
|
"rewards/margins": 0.17518427968025208, |
|
"rewards/rejected": 0.18082287907600403, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_logits/chosen": -2.018136501312256, |
|
"eval_logits/rejected": -1.9617843627929688, |
|
"eval_logps/chosen": -694.4999389648438, |
|
"eval_logps/rejected": -635.5912475585938, |
|
"eval_loss": 0.6557932496070862, |
|
"eval_rewards/accuracies": 0.6096153855323792, |
|
"eval_rewards/chosen": 0.29212433099746704, |
|
"eval_rewards/margins": 0.11895613372325897, |
|
"eval_rewards/rejected": 0.17316819727420807, |
|
"eval_runtime": 831.0231, |
|
"eval_samples_per_second": 14.983, |
|
"eval_steps_per_second": 0.469, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.5885677640784626, |
|
"learning_rate": 2.2684528716241968e-07, |
|
"logits/chosen": -1.9237916469573975, |
|
"logits/rejected": -1.9125349521636963, |
|
"logps/chosen": -777.046142578125, |
|
"logps/rejected": -745.3409423828125, |
|
"loss": 0.6548, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.3333626985549927, |
|
"rewards/margins": 0.11125592142343521, |
|
"rewards/rejected": 0.22210678458213806, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 2.5438960725196296, |
|
"learning_rate": 2.218850019604028e-07, |
|
"logits/chosen": -2.0038065910339355, |
|
"logits/rejected": -1.925611138343811, |
|
"logps/chosen": -680.6759033203125, |
|
"logps/rejected": -662.7098388671875, |
|
"loss": 0.6545, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.2969500422477722, |
|
"rewards/margins": 0.14483226835727692, |
|
"rewards/rejected": 0.1521177738904953, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 2.8410942952474136, |
|
"learning_rate": 2.1693590243571935e-07, |
|
"logits/chosen": -2.0043246746063232, |
|
"logits/rejected": -1.9441509246826172, |
|
"logps/chosen": -804.6784057617188, |
|
"logps/rejected": -714.288330078125, |
|
"loss": 0.6581, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.36427271366119385, |
|
"rewards/margins": 0.061019230633974075, |
|
"rewards/rejected": 0.3032534718513489, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 2.611147125105862, |
|
"learning_rate": 2.119999576096832e-07, |
|
"logits/chosen": -2.049757480621338, |
|
"logits/rejected": -1.9409106969833374, |
|
"logps/chosen": -737.8204345703125, |
|
"logps/rejected": -626.43701171875, |
|
"loss": 0.6571, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.3496776223182678, |
|
"rewards/margins": 0.1673753708600998, |
|
"rewards/rejected": 0.18230223655700684, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 2.289802791317009, |
|
"learning_rate": 2.0707913126995244e-07, |
|
"logits/chosen": -1.958674669265747, |
|
"logits/rejected": -1.9054616689682007, |
|
"logps/chosen": -794.5623779296875, |
|
"logps/rejected": -620.9688720703125, |
|
"loss": 0.6522, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.39125892519950867, |
|
"rewards/margins": 0.23107214272022247, |
|
"rewards/rejected": 0.1601867973804474, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 2.640295515988177, |
|
"learning_rate": 2.021753811892281e-07, |
|
"logits/chosen": -1.9902336597442627, |
|
"logits/rejected": -2.0175299644470215, |
|
"logps/chosen": -729.5810546875, |
|
"logps/rejected": -599.620361328125, |
|
"loss": 0.6502, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.37484925985336304, |
|
"rewards/margins": 0.14430688321590424, |
|
"rewards/rejected": 0.2305423766374588, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 2.5023450766367965, |
|
"learning_rate": 1.9729065834634528e-07, |
|
"logits/chosen": -2.086310863494873, |
|
"logits/rejected": -2.0709755420684814, |
|
"logps/chosen": -746.2198486328125, |
|
"logps/rejected": -680.5553588867188, |
|
"loss": 0.6511, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.39980658888816833, |
|
"rewards/margins": 0.16294142603874207, |
|
"rewards/rejected": 0.23686519265174866, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 2.62481893045694, |
|
"learning_rate": 1.924269061500678e-07, |
|
"logits/chosen": -2.0319840908050537, |
|
"logits/rejected": -2.07069730758667, |
|
"logps/chosen": -641.7462768554688, |
|
"logps/rejected": -621.5814819335938, |
|
"loss": 0.6579, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.2810671627521515, |
|
"rewards/margins": 0.05967780202627182, |
|
"rewards/rejected": 0.22138938307762146, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 2.405136558965165, |
|
"learning_rate": 1.8758605966589434e-07, |
|
"logits/chosen": -2.0415592193603516, |
|
"logits/rejected": -1.9661998748779297, |
|
"logps/chosen": -858.5528564453125, |
|
"logps/rejected": -739.1558837890625, |
|
"loss": 0.6549, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.3966771960258484, |
|
"rewards/margins": 0.10974051058292389, |
|
"rewards/rejected": 0.2869366705417633, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 3.3922492394829504, |
|
"learning_rate": 1.8277004484618357e-07, |
|
"logits/chosen": -1.9904890060424805, |
|
"logits/rejected": -1.9474655389785767, |
|
"logps/chosen": -736.5589599609375, |
|
"logps/rejected": -658.781005859375, |
|
"loss": 0.6759, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.3667674660682678, |
|
"rewards/margins": 0.1037057489156723, |
|
"rewards/rejected": 0.2630617022514343, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -2.019803047180176, |
|
"eval_logits/rejected": -1.9627211093902588, |
|
"eval_logps/chosen": -689.3489379882812, |
|
"eval_logps/rejected": -631.2626342773438, |
|
"eval_loss": 0.6548019051551819, |
|
"eval_rewards/accuracies": 0.6070512533187866, |
|
"eval_rewards/chosen": 0.3436334729194641, |
|
"eval_rewards/margins": 0.1271788775920868, |
|
"eval_rewards/rejected": 0.21645459532737732, |
|
"eval_runtime": 811.6476, |
|
"eval_samples_per_second": 15.34, |
|
"eval_steps_per_second": 0.481, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 2.8285528920600167, |
|
"learning_rate": 1.7798077776390603e-07, |
|
"logits/chosen": -1.9690300226211548, |
|
"logits/rejected": -1.923657774925232, |
|
"logps/chosen": -721.0764770507812, |
|
"logps/rejected": -610.729736328125, |
|
"loss": 0.6657, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.3101206123828888, |
|
"rewards/margins": 0.13614089787006378, |
|
"rewards/rejected": 0.17397968471050262, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 3.086737649194682, |
|
"learning_rate": 1.7322016385032536e-07, |
|
"logits/chosen": -2.0291314125061035, |
|
"logits/rejected": -1.9643595218658447, |
|
"logps/chosen": -809.5503540039062, |
|
"logps/rejected": -699.6201782226562, |
|
"loss": 0.6512, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.4089950621128082, |
|
"rewards/margins": 0.21505212783813477, |
|
"rewards/rejected": 0.19394293427467346, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 3.170259473129296, |
|
"learning_rate": 1.6849009713691454e-07, |
|
"logits/chosen": -1.9695608615875244, |
|
"logits/rejected": -1.958207130432129, |
|
"logps/chosen": -752.0731201171875, |
|
"logps/rejected": -697.3651733398438, |
|
"loss": 0.6638, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.29536861181259155, |
|
"rewards/margins": 0.09291869401931763, |
|
"rewards/rejected": 0.20244988799095154, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.6277921924177963, |
|
"learning_rate": 1.6379245950180666e-07, |
|
"logits/chosen": -1.9219013452529907, |
|
"logits/rejected": -1.9105087518692017, |
|
"logps/chosen": -740.3037109375, |
|
"logps/rejected": -609.6246948242188, |
|
"loss": 0.633, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.35257434844970703, |
|
"rewards/margins": 0.16397470235824585, |
|
"rewards/rejected": 0.18859967589378357, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 2.5332783060270123, |
|
"learning_rate": 1.591291199210813e-07, |
|
"logits/chosen": -1.9232860803604126, |
|
"logits/rejected": -1.9083904027938843, |
|
"logps/chosen": -665.9305419921875, |
|
"logps/rejected": -630.5094604492188, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.28770583868026733, |
|
"rewards/margins": 0.05961765721440315, |
|
"rewards/rejected": 0.22808821499347687, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 2.3275797683615105, |
|
"learning_rate": 1.5450193372518438e-07, |
|
"logits/chosen": -1.9562702178955078, |
|
"logits/rejected": -1.8471969366073608, |
|
"logps/chosen": -663.4168090820312, |
|
"logps/rejected": -641.2056884765625, |
|
"loss": 0.639, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.3817751407623291, |
|
"rewards/margins": 0.15303686261177063, |
|
"rewards/rejected": 0.22873827815055847, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 2.782849328804318, |
|
"learning_rate": 1.4991274186077628e-07, |
|
"logits/chosen": -1.9461126327514648, |
|
"logits/rejected": -1.8650624752044678, |
|
"logps/chosen": -717.2433471679688, |
|
"logps/rejected": -608.2086181640625, |
|
"loss": 0.6518, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.38884252309799194, |
|
"rewards/margins": 0.1517229527235031, |
|
"rewards/rejected": 0.23711958527565002, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 2.514896941566138, |
|
"learning_rate": 1.4536337015830323e-07, |
|
"logits/chosen": -1.964003562927246, |
|
"logits/rejected": -1.9508689641952515, |
|
"logps/chosen": -717.3407592773438, |
|
"logps/rejected": -673.0404663085938, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.3360191583633423, |
|
"rewards/margins": 0.10252443701028824, |
|
"rewards/rejected": 0.23349472880363464, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.6031614387547606, |
|
"learning_rate": 1.4085562860558253e-07, |
|
"logits/chosen": -1.92083740234375, |
|
"logits/rejected": -1.8824331760406494, |
|
"logps/chosen": -742.9560546875, |
|
"logps/rejected": -657.2242431640625, |
|
"loss": 0.6493, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.3182393014431, |
|
"rewards/margins": 0.14292821288108826, |
|
"rewards/rejected": 0.17531108856201172, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 3.4953278158305303, |
|
"learning_rate": 1.3639131062769075e-07, |
|
"logits/chosen": -1.9272648096084595, |
|
"logits/rejected": -1.9646079540252686, |
|
"logps/chosen": -699.5802001953125, |
|
"logps/rejected": -622.5048217773438, |
|
"loss": 0.6679, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.3422418236732483, |
|
"rewards/margins": 0.12412543594837189, |
|
"rewards/rejected": 0.2181164026260376, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_logits/chosen": -1.959836721420288, |
|
"eval_logits/rejected": -1.9057732820510864, |
|
"eval_logps/chosen": -688.3819580078125, |
|
"eval_logps/rejected": -630.7877807617188, |
|
"eval_loss": 0.6542019844055176, |
|
"eval_rewards/accuracies": 0.607692301273346, |
|
"eval_rewards/chosen": 0.35330361127853394, |
|
"eval_rewards/margins": 0.1320999264717102, |
|
"eval_rewards/rejected": 0.22120368480682373, |
|
"eval_runtime": 829.7168, |
|
"eval_samples_per_second": 15.006, |
|
"eval_steps_per_second": 0.47, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 2.9526761996356634, |
|
"learning_rate": 1.3197219237344155e-07, |
|
"logits/chosen": -1.979193925857544, |
|
"logits/rejected": -1.9904626607894897, |
|
"logps/chosen": -770.7225341796875, |
|
"logps/rejected": -613.1958618164062, |
|
"loss": 0.6464, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.4239157736301422, |
|
"rewards/margins": 0.21235182881355286, |
|
"rewards/rejected": 0.21156391501426697, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.314858281629497, |
|
"learning_rate": 1.2760003200873697e-07, |
|
"logits/chosen": -1.9512065649032593, |
|
"logits/rejected": -1.902094841003418, |
|
"logps/chosen": -712.6738891601562, |
|
"logps/rejected": -621.5015869140625, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.3171978294849396, |
|
"rewards/margins": 0.13169512152671814, |
|
"rewards/rejected": 0.18550269305706024, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.884626651322277, |
|
"learning_rate": 1.2327656901707338e-07, |
|
"logits/chosen": -1.8530919551849365, |
|
"logits/rejected": -1.8408454656600952, |
|
"logps/chosen": -772.9168701171875, |
|
"logps/rejected": -740.7579345703125, |
|
"loss": 0.6464, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.3601977229118347, |
|
"rewards/margins": 0.16855189204216003, |
|
"rewards/rejected": 0.1916457712650299, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 3.5188391499904563, |
|
"learning_rate": 1.1900352350748024e-07, |
|
"logits/chosen": -1.907289743423462, |
|
"logits/rejected": -1.9461581707000732, |
|
"logps/chosen": -801.3639526367188, |
|
"logps/rejected": -783.6478271484375, |
|
"loss": 0.6698, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.376972496509552, |
|
"rewards/margins": 0.02767539583146572, |
|
"rewards/rejected": 0.34929707646369934, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 2.665457863564576, |
|
"learning_rate": 1.147825955301668e-07, |
|
"logits/chosen": -1.9209585189819336, |
|
"logits/rejected": -1.8920555114746094, |
|
"logps/chosen": -636.7376708984375, |
|
"logps/rejected": -571.822021484375, |
|
"loss": 0.6563, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.3121941089630127, |
|
"rewards/margins": 0.13192227482795715, |
|
"rewards/rejected": 0.18027189373970032, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 2.885847358162881, |
|
"learning_rate": 1.1061546440014963e-07, |
|
"logits/chosen": -1.9306738376617432, |
|
"logits/rejected": -1.9074119329452515, |
|
"logps/chosen": -690.8770141601562, |
|
"logps/rejected": -606.33447265625, |
|
"loss": 0.6675, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.32359814643859863, |
|
"rewards/margins": 0.07375864684581757, |
|
"rewards/rejected": 0.24983951449394226, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 2.3702768828862864, |
|
"learning_rate": 1.0650378802913007e-07, |
|
"logits/chosen": -2.008939743041992, |
|
"logits/rejected": -1.970020055770874, |
|
"logps/chosen": -635.5045776367188, |
|
"logps/rejected": -561.628173828125, |
|
"loss": 0.652, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.31716689467430115, |
|
"rewards/margins": 0.16777855157852173, |
|
"rewards/rejected": 0.14938834309577942, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 2.600006257784113, |
|
"learning_rate": 1.0244920226588597e-07, |
|
"logits/chosen": -1.9430663585662842, |
|
"logits/rejected": -1.941568374633789, |
|
"logps/chosen": -684.0612182617188, |
|
"logps/rejected": -652.2069091796875, |
|
"loss": 0.6688, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.2591310143470764, |
|
"rewards/margins": 0.06264869123697281, |
|
"rewards/rejected": 0.1964823305606842, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 2.598645248672983, |
|
"learning_rate": 9.845332024544245e-08, |
|
"logits/chosen": -1.965990662574768, |
|
"logits/rejected": -1.998303771018982, |
|
"logps/chosen": -738.8136596679688, |
|
"logps/rejected": -617.0636596679688, |
|
"loss": 0.6533, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.31749868392944336, |
|
"rewards/margins": 0.15900316834449768, |
|
"rewards/rejected": 0.1584954857826233, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 2.600416268107498, |
|
"learning_rate": 9.451773174727873e-08, |
|
"logits/chosen": -1.9850645065307617, |
|
"logits/rejected": -1.9690158367156982, |
|
"logps/chosen": -758.544921875, |
|
"logps/rejected": -651.073486328125, |
|
"loss": 0.6358, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.35706740617752075, |
|
"rewards/margins": 0.15773825347423553, |
|
"rewards/rejected": 0.1993291676044464, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_logits/chosen": -2.0015413761138916, |
|
"eval_logits/rejected": -1.9446992874145508, |
|
"eval_logps/chosen": -690.077880859375, |
|
"eval_logps/rejected": -632.544921875, |
|
"eval_loss": 0.6533253192901611, |
|
"eval_rewards/accuracies": 0.6073718070983887, |
|
"eval_rewards/chosen": 0.33634501695632935, |
|
"eval_rewards/margins": 0.13271409273147583, |
|
"eval_rewards/rejected": 0.2036309689283371, |
|
"eval_runtime": 833.2952, |
|
"eval_samples_per_second": 14.942, |
|
"eval_steps_per_second": 0.468, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 2.4248781446367054, |
|
"learning_rate": 9.064400256282755e-08, |
|
"logits/chosen": -1.9554182291030884, |
|
"logits/rejected": -1.8983566761016846, |
|
"logps/chosen": -636.9713134765625, |
|
"logps/rejected": -603.3074951171875, |
|
"loss": 0.6436, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.26512694358825684, |
|
"rewards/margins": 0.08946672827005386, |
|
"rewards/rejected": 0.17566025257110596, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 3.090428157345016, |
|
"learning_rate": 8.68336738725177e-08, |
|
"logits/chosen": -2.043494939804077, |
|
"logits/rejected": -1.9082825183868408, |
|
"logps/chosen": -733.5816650390625, |
|
"logps/rejected": -669.7770385742188, |
|
"loss": 0.667, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.37750545144081116, |
|
"rewards/margins": 0.14476510882377625, |
|
"rewards/rejected": 0.23274032771587372, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 3.1532123855322376, |
|
"learning_rate": 8.308826163260852e-08, |
|
"logits/chosen": -1.9862855672836304, |
|
"logits/rejected": -1.9692624807357788, |
|
"logps/chosen": -690.51708984375, |
|
"logps/rejected": -675.4782104492188, |
|
"loss": 0.6531, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.3360420763492584, |
|
"rewards/margins": 0.157058984041214, |
|
"rewards/rejected": 0.17898306250572205, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 2.629746362223527, |
|
"learning_rate": 7.940925597206052e-08, |
|
"logits/chosen": -2.007011890411377, |
|
"logits/rejected": -1.9624780416488647, |
|
"logps/chosen": -647.2005615234375, |
|
"logps/rejected": -650.79052734375, |
|
"loss": 0.6582, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.35882705450057983, |
|
"rewards/margins": 0.11791452020406723, |
|
"rewards/rejected": 0.24091258645057678, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 2.7296472904697513, |
|
"learning_rate": 7.579812059968014e-08, |
|
"logits/chosen": -2.0613789558410645, |
|
"logits/rejected": -2.0162453651428223, |
|
"logps/chosen": -620.8883666992188, |
|
"logps/rejected": -658.7117309570312, |
|
"loss": 0.6554, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.3870480954647064, |
|
"rewards/margins": 0.126990407705307, |
|
"rewards/rejected": 0.2600576877593994, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 2.763125897839002, |
|
"learning_rate": 7.225629222177715e-08, |
|
"logits/chosen": -1.943429946899414, |
|
"logits/rejected": -1.9874225854873657, |
|
"logps/chosen": -738.5694580078125, |
|
"logps/rejected": -762.2120361328125, |
|
"loss": 0.649, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.3255593180656433, |
|
"rewards/margins": 0.08112556487321854, |
|
"rewards/rejected": 0.24443373084068298, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 2.5446108148638498, |
|
"learning_rate": 6.878517997056457e-08, |
|
"logits/chosen": -1.9231475591659546, |
|
"logits/rejected": -1.9031331539154053, |
|
"logps/chosen": -625.0635986328125, |
|
"logps/rejected": -623.573486328125, |
|
"loss": 0.6469, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.32645702362060547, |
|
"rewards/margins": 0.11162126064300537, |
|
"rewards/rejected": 0.21483579277992249, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 3.5073757146061677, |
|
"learning_rate": 6.538616484352902e-08, |
|
"logits/chosen": -1.910323143005371, |
|
"logits/rejected": -1.9373595714569092, |
|
"logps/chosen": -732.39697265625, |
|
"logps/rejected": -635.9688110351562, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.3115167021751404, |
|
"rewards/margins": 0.10787830501794815, |
|
"rewards/rejected": 0.20363843441009521, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 2.859757387740054, |
|
"learning_rate": 6.206059915399495e-08, |
|
"logits/chosen": -2.0179154872894287, |
|
"logits/rejected": -1.9656012058258057, |
|
"logps/chosen": -815.8009033203125, |
|
"logps/rejected": -609.6783447265625, |
|
"loss": 0.6424, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.3753044605255127, |
|
"rewards/margins": 0.12460635602474213, |
|
"rewards/rejected": 0.25069814920425415, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.8109306032090764, |
|
"learning_rate": 5.88098059931004e-08, |
|
"logits/chosen": -1.9886192083358765, |
|
"logits/rejected": -1.8791803121566772, |
|
"logps/chosen": -670.867919921875, |
|
"logps/rejected": -652.6749877929688, |
|
"loss": 0.6473, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.31432944536209106, |
|
"rewards/margins": 0.17133426666259766, |
|
"rewards/rejected": 0.1429951936006546, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_logits/chosen": -1.9620791673660278, |
|
"eval_logits/rejected": -1.9071754217147827, |
|
"eval_logps/chosen": -689.9299926757812, |
|
"eval_logps/rejected": -632.6980590820312, |
|
"eval_loss": 0.6527881622314453, |
|
"eval_rewards/accuracies": 0.6080127954483032, |
|
"eval_rewards/chosen": 0.337823748588562, |
|
"eval_rewards/margins": 0.1357237845659256, |
|
"eval_rewards/rejected": 0.2020999938249588, |
|
"eval_runtime": 811.1011, |
|
"eval_samples_per_second": 15.351, |
|
"eval_steps_per_second": 0.481, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 2.9339811794947375, |
|
"learning_rate": 5.563507870339962e-08, |
|
"logits/chosen": -1.9552795886993408, |
|
"logits/rejected": -1.885912537574768, |
|
"logps/chosen": -820.3865356445312, |
|
"logps/rejected": -724.3788452148438, |
|
"loss": 0.6536, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.40686970949172974, |
|
"rewards/margins": 0.22154918313026428, |
|
"rewards/rejected": 0.18532049655914307, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 3.0504864373903873, |
|
"learning_rate": 5.25376803643007e-08, |
|
"logits/chosen": -2.018120050430298, |
|
"logits/rejected": -1.973238229751587, |
|
"logps/chosen": -761.1993408203125, |
|
"logps/rejected": -684.3026733398438, |
|
"loss": 0.6609, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.32909178733825684, |
|
"rewards/margins": 0.12167789787054062, |
|
"rewards/rejected": 0.20741388201713562, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 3.300789834284616, |
|
"learning_rate": 4.9518843289544007e-08, |
|
"logits/chosen": -1.9833561182022095, |
|
"logits/rejected": -1.9334217309951782, |
|
"logps/chosen": -674.072265625, |
|
"logps/rejected": -636.847412109375, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.3223080039024353, |
|
"rewards/margins": 0.14498993754386902, |
|
"rewards/rejected": 0.17731806635856628, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 2.6410110326405314, |
|
"learning_rate": 4.6579768536920695e-08, |
|
"logits/chosen": -2.024742603302002, |
|
"logits/rejected": -1.9354236125946045, |
|
"logps/chosen": -717.5321655273438, |
|
"logps/rejected": -710.6961669921875, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.36842697858810425, |
|
"rewards/margins": 0.11848391592502594, |
|
"rewards/rejected": 0.2499430626630783, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 3.2793314877461306, |
|
"learning_rate": 4.372162543042623e-08, |
|
"logits/chosen": -2.0009231567382812, |
|
"logits/rejected": -1.942253828048706, |
|
"logps/chosen": -727.9117431640625, |
|
"logps/rejected": -638.6812744140625, |
|
"loss": 0.6507, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.40108004212379456, |
|
"rewards/margins": 0.1539073884487152, |
|
"rewards/rejected": 0.24717263877391815, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 2.7921989940115157, |
|
"learning_rate": 4.094555109503983e-08, |
|
"logits/chosen": -1.9503885507583618, |
|
"logits/rejected": -1.8943992853164673, |
|
"logps/chosen": -737.1885375976562, |
|
"logps/rejected": -671.646484375, |
|
"loss": 0.6543, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.3580954670906067, |
|
"rewards/margins": 0.12970301508903503, |
|
"rewards/rejected": 0.22839240729808807, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 2.7639464701150445, |
|
"learning_rate": 3.825265000431424e-08, |
|
"logits/chosen": -1.8646436929702759, |
|
"logits/rejected": -1.8763723373413086, |
|
"logps/chosen": -699.7142333984375, |
|
"logps/rejected": -632.5223388671875, |
|
"loss": 0.6615, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.37401726841926575, |
|
"rewards/margins": 0.1618153154850006, |
|
"rewards/rejected": 0.21220192313194275, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 2.995425337211788, |
|
"learning_rate": 3.5643993540955844e-08, |
|
"logits/chosen": -1.9095637798309326, |
|
"logits/rejected": -1.8548505306243896, |
|
"logps/chosen": -750.3823852539062, |
|
"logps/rejected": -683.6522216796875, |
|
"loss": 0.6587, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.37580543756484985, |
|
"rewards/margins": 0.0922856256365776, |
|
"rewards/rejected": 0.28351980447769165, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 2.6756247012610697, |
|
"learning_rate": 3.312061957057061e-08, |
|
"logits/chosen": -1.9792400598526, |
|
"logits/rejected": -1.9158741235733032, |
|
"logps/chosen": -642.9052124023438, |
|
"logps/rejected": -682.2804565429688, |
|
"loss": 0.6462, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.2730144262313843, |
|
"rewards/margins": 0.13844364881515503, |
|
"rewards/rejected": 0.13457079231739044, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.5306381537983658, |
|
"learning_rate": 3.0683532028744184e-08, |
|
"logits/chosen": -1.9723201990127563, |
|
"logits/rejected": -1.8639072179794312, |
|
"logps/chosen": -649.2698364257812, |
|
"logps/rejected": -595.0311889648438, |
|
"loss": 0.6447, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.3062586486339569, |
|
"rewards/margins": 0.15073780715465546, |
|
"rewards/rejected": 0.15552084147930145, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_logits/chosen": -1.9781183004379272, |
|
"eval_logits/rejected": -1.9226415157318115, |
|
"eval_logps/chosen": -691.5005493164062, |
|
"eval_logps/rejected": -634.215576171875, |
|
"eval_loss": 0.6525518298149109, |
|
"eval_rewards/accuracies": 0.6080127954483032, |
|
"eval_rewards/chosen": 0.3221181333065033, |
|
"eval_rewards/margins": 0.13519282639026642, |
|
"eval_rewards/rejected": 0.18692529201507568, |
|
"eval_runtime": 833.3431, |
|
"eval_samples_per_second": 14.941, |
|
"eval_steps_per_second": 0.468, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 2.4794478716281474, |
|
"learning_rate": 2.8333700521621955e-08, |
|
"logits/chosen": -1.9369399547576904, |
|
"logits/rejected": -1.915985345840454, |
|
"logps/chosen": -723.750732421875, |
|
"logps/rejected": -571.1014404296875, |
|
"loss": 0.6534, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.34681373834609985, |
|
"rewards/margins": 0.15279236435890198, |
|
"rewards/rejected": 0.1940213292837143, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 2.9576634978157657, |
|
"learning_rate": 2.6072059940146772e-08, |
|
"logits/chosen": -2.0135951042175293, |
|
"logits/rejected": -1.9804418087005615, |
|
"logps/chosen": -681.3564453125, |
|
"logps/rejected": -626.4763793945312, |
|
"loss": 0.6443, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.3405224680900574, |
|
"rewards/margins": 0.15179835259914398, |
|
"rewards/rejected": 0.1887241005897522, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 3.126710724613424, |
|
"learning_rate": 2.3899510088108587e-08, |
|
"logits/chosen": -1.9243135452270508, |
|
"logits/rejected": -1.8939940929412842, |
|
"logps/chosen": -654.2572021484375, |
|
"logps/rejected": -603.611328125, |
|
"loss": 0.648, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.2771258056163788, |
|
"rewards/margins": 0.08931798487901688, |
|
"rewards/rejected": 0.1878078281879425, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 3.3520779361226216, |
|
"learning_rate": 2.1816915324153334e-08, |
|
"logits/chosen": -2.021639347076416, |
|
"logits/rejected": -1.9558565616607666, |
|
"logps/chosen": -772.6861572265625, |
|
"logps/rejected": -712.33544921875, |
|
"loss": 0.6531, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.3145074248313904, |
|
"rewards/margins": 0.17986731231212616, |
|
"rewards/rejected": 0.13464009761810303, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 2.8164037709731296, |
|
"learning_rate": 1.9825104217894018e-08, |
|
"logits/chosen": -1.9654910564422607, |
|
"logits/rejected": -1.8914296627044678, |
|
"logps/chosen": -652.0030517578125, |
|
"logps/rejected": -626.3556518554688, |
|
"loss": 0.6569, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.3254419267177582, |
|
"rewards/margins": 0.10867112874984741, |
|
"rewards/rejected": 0.21677079796791077, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 2.6118898735650125, |
|
"learning_rate": 1.7924869220260626e-08, |
|
"logits/chosen": -1.9689853191375732, |
|
"logits/rejected": -1.9209016561508179, |
|
"logps/chosen": -692.149658203125, |
|
"logps/rejected": -639.4564819335938, |
|
"loss": 0.6507, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.3543575406074524, |
|
"rewards/margins": 0.1561117321252823, |
|
"rewards/rejected": 0.1982457935810089, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 2.704102602095005, |
|
"learning_rate": 1.6116966348220046e-08, |
|
"logits/chosen": -1.9350318908691406, |
|
"logits/rejected": -1.9500646591186523, |
|
"logps/chosen": -677.9044189453125, |
|
"logps/rejected": -582.033447265625, |
|
"loss": 0.6493, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.40810251235961914, |
|
"rewards/margins": 0.22535303235054016, |
|
"rewards/rejected": 0.18274952471256256, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 2.0946434953536768, |
|
"learning_rate": 1.4402114883991318e-08, |
|
"logits/chosen": -2.010521411895752, |
|
"logits/rejected": -1.9158645868301392, |
|
"logps/chosen": -664.4788208007812, |
|
"logps/rejected": -566.4072875976562, |
|
"loss": 0.6531, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.2843988537788391, |
|
"rewards/margins": 0.14584381878376007, |
|
"rewards/rejected": 0.13855502009391785, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 2.9882114113782885, |
|
"learning_rate": 1.2780997088875866e-08, |
|
"logits/chosen": -1.9896526336669922, |
|
"logits/rejected": -1.9169244766235352, |
|
"logps/chosen": -723.8212280273438, |
|
"logps/rejected": -595.9112548828125, |
|
"loss": 0.6595, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.31389322876930237, |
|
"rewards/margins": 0.14465804398059845, |
|
"rewards/rejected": 0.16923515498638153, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 2.597069789857251, |
|
"learning_rate": 1.125425793181703e-08, |
|
"logits/chosen": -2.011854410171509, |
|
"logits/rejected": -1.9551817178726196, |
|
"logps/chosen": -690.0392456054688, |
|
"logps/rejected": -658.7679443359375, |
|
"loss": 0.6546, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.3487124741077423, |
|
"rewards/margins": 0.15366964042186737, |
|
"rewards/rejected": 0.19504281878471375, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_logits/chosen": -1.9683998823165894, |
|
"eval_logits/rejected": -1.913440465927124, |
|
"eval_logps/chosen": -690.6824340820312, |
|
"eval_logps/rejected": -633.5018310546875, |
|
"eval_loss": 0.6524997353553772, |
|
"eval_rewards/accuracies": 0.6073718070983887, |
|
"eval_rewards/chosen": 0.3302985429763794, |
|
"eval_rewards/margins": 0.13623538613319397, |
|
"eval_rewards/rejected": 0.19406315684318542, |
|
"eval_runtime": 811.6062, |
|
"eval_samples_per_second": 15.341, |
|
"eval_steps_per_second": 0.481, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 2.686772952442727, |
|
"learning_rate": 9.822504832796036e-09, |
|
"logits/chosen": -1.9362843036651611, |
|
"logits/rejected": -1.8773858547210693, |
|
"logps/chosen": -756.9738159179688, |
|
"logps/rejected": -635.9459838867188, |
|
"loss": 0.6479, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.3329898416996002, |
|
"rewards/margins": 0.18678471446037292, |
|
"rewards/rejected": 0.1462051421403885, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 2.6447110973275665, |
|
"learning_rate": 8.48630742116746e-09, |
|
"logits/chosen": -1.9655792713165283, |
|
"logits/rejected": -1.8795562982559204, |
|
"logps/chosen": -771.013671875, |
|
"logps/rejected": -664.7561645507812, |
|
"loss": 0.6623, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.33320263028144836, |
|
"rewards/margins": 0.15516476333141327, |
|
"rewards/rejected": 0.1780378520488739, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 2.8021224333165144, |
|
"learning_rate": 7.246197309029617e-09, |
|
"logits/chosen": -1.9644882678985596, |
|
"logits/rejected": -1.9270439147949219, |
|
"logps/chosen": -812.21337890625, |
|
"logps/rejected": -742.98095703125, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.3916359543800354, |
|
"rewards/margins": 0.15279091894626617, |
|
"rewards/rejected": 0.2388450652360916, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 2.7924440856518578, |
|
"learning_rate": 6.102667879720164e-09, |
|
"logits/chosen": -2.016839027404785, |
|
"logits/rejected": -1.9747447967529297, |
|
"logps/chosen": -696.322265625, |
|
"logps/rejected": -629.0064697265625, |
|
"loss": 0.6598, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.310738742351532, |
|
"rewards/margins": 0.11902445554733276, |
|
"rewards/rejected": 0.19171428680419922, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 3.1884351048854946, |
|
"learning_rate": 5.056174091521509e-09, |
|
"logits/chosen": -2.0210442543029785, |
|
"logits/rejected": -1.9764583110809326, |
|
"logps/chosen": -786.7182006835938, |
|
"logps/rejected": -743.8095703125, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.35495525598526, |
|
"rewards/margins": 0.17099758982658386, |
|
"rewards/rejected": 0.18395769596099854, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 2.8161297313177394, |
|
"learning_rate": 4.107132296653548e-09, |
|
"logits/chosen": -1.9870693683624268, |
|
"logits/rejected": -1.916751503944397, |
|
"logps/chosen": -682.517822265625, |
|
"logps/rejected": -588.7493896484375, |
|
"loss": 0.6492, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.37071341276168823, |
|
"rewards/margins": 0.136337548494339, |
|
"rewards/rejected": 0.23437583446502686, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 2.6636522324368266, |
|
"learning_rate": 3.255920075626084e-09, |
|
"logits/chosen": -1.8954880237579346, |
|
"logits/rejected": -1.9022176265716553, |
|
"logps/chosen": -623.636962890625, |
|
"logps/rejected": -596.3826293945312, |
|
"loss": 0.6712, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.3262075185775757, |
|
"rewards/margins": 0.11597733199596405, |
|
"rewards/rejected": 0.21023023128509521, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 2.7200883225079395, |
|
"learning_rate": 2.5028760870168253e-09, |
|
"logits/chosen": -1.9352178573608398, |
|
"logits/rejected": -1.9500812292099, |
|
"logps/chosen": -690.5252685546875, |
|
"logps/rejected": -673.8720703125, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.28286224603652954, |
|
"rewards/margins": 0.084191232919693, |
|
"rewards/rejected": 0.19867102801799774, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 4.497441891242338, |
|
"learning_rate": 1.8482999327343597e-09, |
|
"logits/chosen": -2.021484613418579, |
|
"logits/rejected": -1.9103187322616577, |
|
"logps/chosen": -767.1332397460938, |
|
"logps/rejected": -708.3283081054688, |
|
"loss": 0.659, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.40307435393333435, |
|
"rewards/margins": 0.1727377474308014, |
|
"rewards/rejected": 0.23033663630485535, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 3.084782840541729, |
|
"learning_rate": 1.2924520388204462e-09, |
|
"logits/chosen": -1.9084007740020752, |
|
"logits/rejected": -1.9263513088226318, |
|
"logps/chosen": -614.9078369140625, |
|
"logps/rejected": -623.568603515625, |
|
"loss": 0.6725, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.26621872186660767, |
|
"rewards/margins": 0.04896958917379379, |
|
"rewards/rejected": 0.21724911034107208, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_logits/chosen": -1.9645206928253174, |
|
"eval_logits/rejected": -1.9097665548324585, |
|
"eval_logps/chosen": -690.5892333984375, |
|
"eval_logps/rejected": -633.4114990234375, |
|
"eval_loss": 0.6524692177772522, |
|
"eval_rewards/accuracies": 0.6073718070983887, |
|
"eval_rewards/chosen": 0.33123117685317993, |
|
"eval_rewards/margins": 0.13626568019390106, |
|
"eval_rewards/rejected": 0.19496554136276245, |
|
"eval_runtime": 831.8192, |
|
"eval_samples_per_second": 14.968, |
|
"eval_steps_per_second": 0.469, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 2.668345364222901, |
|
"learning_rate": 8.355535518381751e-10, |
|
"logits/chosen": -1.9214885234832764, |
|
"logits/rejected": -1.947447419166565, |
|
"logps/chosen": -736.5780029296875, |
|
"logps/rejected": -594.7259521484375, |
|
"loss": 0.6583, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.33075276017189026, |
|
"rewards/margins": 0.11735783517360687, |
|
"rewards/rejected": 0.2133948802947998, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 3.4262928984719894, |
|
"learning_rate": 4.77786250887846e-10, |
|
"logits/chosen": -1.8850606679916382, |
|
"logits/rejected": -1.882394552230835, |
|
"logps/chosen": -706.0784912109375, |
|
"logps/rejected": -678.0963134765625, |
|
"loss": 0.6542, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.3584180772304535, |
|
"rewards/margins": 0.1582474410533905, |
|
"rewards/rejected": 0.20017066597938538, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 2.9771754271252857, |
|
"learning_rate": 2.1929247528540418e-10, |
|
"logits/chosen": -2.0322813987731934, |
|
"logits/rejected": -1.9513972997665405, |
|
"logps/chosen": -745.3729248046875, |
|
"logps/rejected": -708.0712280273438, |
|
"loss": 0.6572, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.36603301763534546, |
|
"rewards/margins": 0.15421339869499207, |
|
"rewards/rejected": 0.21181955933570862, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 2.8018132635690183, |
|
"learning_rate": 6.017506793212779e-11, |
|
"logits/chosen": -1.9503310918807983, |
|
"logits/rejected": -1.9407823085784912, |
|
"logps/chosen": -713.3302612304688, |
|
"logps/rejected": -686.0525512695312, |
|
"loss": 0.6631, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.31906238198280334, |
|
"rewards/margins": 0.11456944793462753, |
|
"rewards/rejected": 0.20449292659759521, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.3199203561320076, |
|
"learning_rate": 4.973343980252398e-13, |
|
"logits/chosen": -1.9441578388214111, |
|
"logits/rejected": -1.8932502269744873, |
|
"logps/chosen": -728.4362182617188, |
|
"logps/rejected": -602.8780517578125, |
|
"loss": 0.6488, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.2982894778251648, |
|
"rewards/margins": 0.17425718903541565, |
|
"rewards/rejected": 0.12403228133916855, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1751, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6642669952984608, |
|
"train_runtime": 38056.9659, |
|
"train_samples_per_second": 2.944, |
|
"train_steps_per_second": 0.046 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1751, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|