|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997810698387214, |
|
"eval_steps": 100, |
|
"global_step": 3425, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00029190688170473617, |
|
"grad_norm": 0.013427734375, |
|
"learning_rate": 1.457725947521866e-08, |
|
"logits/chosen": -2.4752657413482666, |
|
"logits/rejected": -2.4752657413482666, |
|
"logps/chosen": -328.9035949707031, |
|
"logps/rejected": -328.9035949707031, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002919068817047362, |
|
"grad_norm": 0.0147705078125, |
|
"learning_rate": 1.457725947521866e-07, |
|
"logits/chosen": -2.395798683166504, |
|
"logits/rejected": -2.395798683166504, |
|
"logps/chosen": -317.85565185546875, |
|
"logps/rejected": -317.85565185546875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0003186435205861926, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0003186435205861926, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.005838137634094724, |
|
"grad_norm": 0.01318359375, |
|
"learning_rate": 2.915451895043732e-07, |
|
"logits/chosen": -2.4440758228302, |
|
"logits/rejected": -2.4440758228302, |
|
"logps/chosen": -301.12921142578125, |
|
"logps/rejected": -301.12921142578125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -7.847430242691189e-05, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -7.847430242691189e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.008757206451142086, |
|
"grad_norm": 0.01177978515625, |
|
"learning_rate": 4.373177842565598e-07, |
|
"logits/chosen": -2.441359519958496, |
|
"logits/rejected": -2.441359519958496, |
|
"logps/chosen": -317.1576843261719, |
|
"logps/rejected": -317.1576843261719, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.00025945488596335053, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.00025945488596335053, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.011676275268189448, |
|
"grad_norm": 0.0167236328125, |
|
"learning_rate": 5.830903790087464e-07, |
|
"logits/chosen": -2.455430269241333, |
|
"logits/rejected": -2.455430269241333, |
|
"logps/chosen": -328.7832946777344, |
|
"logps/rejected": -328.7832946777344, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.00034936360316351056, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.00034936360316351056, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.014595344085236809, |
|
"grad_norm": 0.012939453125, |
|
"learning_rate": 7.288629737609331e-07, |
|
"logits/chosen": -2.406463384628296, |
|
"logits/rejected": -2.406463384628296, |
|
"logps/chosen": -303.563232421875, |
|
"logps/rejected": -303.563232421875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0001031260471791029, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0001031260471791029, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01751441290228417, |
|
"grad_norm": 0.016357421875, |
|
"learning_rate": 8.746355685131196e-07, |
|
"logits/chosen": -2.4401960372924805, |
|
"logits/rejected": -2.4401960372924805, |
|
"logps/chosen": -284.1253967285156, |
|
"logps/rejected": -284.1253967285156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.00043843849562108517, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.00043843849562108517, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02043348171933153, |
|
"grad_norm": 0.01153564453125, |
|
"learning_rate": 1.0204081632653063e-06, |
|
"logits/chosen": -2.423875093460083, |
|
"logits/rejected": -2.423875093460083, |
|
"logps/chosen": -280.09442138671875, |
|
"logps/rejected": -280.09442138671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.00031216375646181405, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.00031216375646181405, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.023352550536378896, |
|
"grad_norm": 0.01214599609375, |
|
"learning_rate": 1.1661807580174927e-06, |
|
"logits/chosen": -2.404435396194458, |
|
"logits/rejected": -2.404435396194458, |
|
"logps/chosen": -267.2549743652344, |
|
"logps/rejected": -267.2549743652344, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0006922121392562985, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0006922121392562985, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.026271619353426257, |
|
"grad_norm": 0.0146484375, |
|
"learning_rate": 1.3119533527696792e-06, |
|
"logits/chosen": -2.416917324066162, |
|
"logits/rejected": -2.416917324066162, |
|
"logps/chosen": -333.58563232421875, |
|
"logps/rejected": -333.58563232421875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0005137195694260299, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0005137195694260299, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.029190688170473617, |
|
"grad_norm": 0.0189208984375, |
|
"learning_rate": 1.4577259475218661e-06, |
|
"logits/chosen": -2.4351730346679688, |
|
"logits/rejected": -2.4351730346679688, |
|
"logps/chosen": -339.3778381347656, |
|
"logps/rejected": -339.3778381347656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0005722829955630004, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0005722829955630004, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.029190688170473617, |
|
"eval_logits/chosen": -2.394068479537964, |
|
"eval_logits/rejected": -2.394068479537964, |
|
"eval_logps/chosen": -306.389892578125, |
|
"eval_logps/rejected": -306.389892578125, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": 0.0008870832389220595, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": 0.0008870832389220595, |
|
"eval_runtime": 2666.9983, |
|
"eval_samples_per_second": 2.283, |
|
"eval_steps_per_second": 0.286, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03210975698752098, |
|
"grad_norm": 0.015869140625, |
|
"learning_rate": 1.6034985422740526e-06, |
|
"logits/chosen": -2.420276165008545, |
|
"logits/rejected": -2.420276165008545, |
|
"logps/chosen": -306.0760803222656, |
|
"logps/rejected": -306.0760803222656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0014700460014864802, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0014700460014864802, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03502882580456834, |
|
"grad_norm": 0.01544189453125, |
|
"learning_rate": 1.7492711370262391e-06, |
|
"logits/chosen": -2.4616119861602783, |
|
"logits/rejected": -2.4616119861602783, |
|
"logps/chosen": -328.64129638671875, |
|
"logps/rejected": -328.64129638671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.001054848893545568, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.001054848893545568, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.037947894621615706, |
|
"grad_norm": 0.0250244140625, |
|
"learning_rate": 1.895043731778426e-06, |
|
"logits/chosen": -2.404423236846924, |
|
"logits/rejected": -2.404423236846924, |
|
"logps/chosen": -339.0644836425781, |
|
"logps/rejected": -339.0644836425781, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0002889078459702432, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0002889078459702432, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04086696343866306, |
|
"grad_norm": 0.0137939453125, |
|
"learning_rate": 2.0408163265306125e-06, |
|
"logits/chosen": -2.4294090270996094, |
|
"logits/rejected": -2.4294090270996094, |
|
"logps/chosen": -299.0234375, |
|
"logps/rejected": -299.0234375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0003939162997994572, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0003939162997994572, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04378603225571043, |
|
"grad_norm": 0.01470947265625, |
|
"learning_rate": 2.1865889212827988e-06, |
|
"logits/chosen": -2.4415223598480225, |
|
"logits/rejected": -2.4415223598480225, |
|
"logps/chosen": -317.4403991699219, |
|
"logps/rejected": -317.4403991699219, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0011137222172692418, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0011137222172692418, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04670510107275779, |
|
"grad_norm": 0.01202392578125, |
|
"learning_rate": 2.3323615160349855e-06, |
|
"logits/chosen": -2.433961868286133, |
|
"logits/rejected": -2.433961868286133, |
|
"logps/chosen": -315.8016662597656, |
|
"logps/rejected": -315.8016662597656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.00034690109896473587, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.00034690109896473587, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04962416988980515, |
|
"grad_norm": 0.01226806640625, |
|
"learning_rate": 2.478134110787172e-06, |
|
"logits/chosen": -2.4214272499084473, |
|
"logits/rejected": -2.4214272499084473, |
|
"logps/chosen": -304.0071105957031, |
|
"logps/rejected": -304.0071105957031, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.00021128072694409639, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.00021128072694409639, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05254323870685251, |
|
"grad_norm": 0.01318359375, |
|
"learning_rate": 2.6239067055393585e-06, |
|
"logits/chosen": -2.410125255584717, |
|
"logits/rejected": -2.410125255584717, |
|
"logps/chosen": -329.052978515625, |
|
"logps/rejected": -329.052978515625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -8.667710062582046e-05, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -8.667710062582046e-05, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05546230752389988, |
|
"grad_norm": 0.0111083984375, |
|
"learning_rate": 2.7696793002915456e-06, |
|
"logits/chosen": -2.412470579147339, |
|
"logits/rejected": -2.412470579147339, |
|
"logps/chosen": -302.9618225097656, |
|
"logps/rejected": -302.9618225097656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0005764733068645, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0005764733068645, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.058381376340947234, |
|
"grad_norm": 0.013671875, |
|
"learning_rate": 2.9154518950437323e-06, |
|
"logits/chosen": -2.3948373794555664, |
|
"logits/rejected": -2.3948373794555664, |
|
"logps/chosen": -312.7694091796875, |
|
"logps/rejected": -312.7694091796875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0006709109293296933, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0006709109293296933, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.058381376340947234, |
|
"eval_logits/chosen": -2.3945627212524414, |
|
"eval_logits/rejected": -2.3945627212524414, |
|
"eval_logps/chosen": -306.5539245605469, |
|
"eval_logps/rejected": -306.5539245605469, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.0007532919407822192, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.0007532919407822192, |
|
"eval_runtime": 2667.9395, |
|
"eval_samples_per_second": 2.283, |
|
"eval_steps_per_second": 0.286, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0613004451579946, |
|
"grad_norm": 0.0120849609375, |
|
"learning_rate": 3.0612244897959185e-06, |
|
"logits/chosen": -2.445885181427002, |
|
"logits/rejected": -2.445885181427002, |
|
"logps/chosen": -316.7839050292969, |
|
"logps/rejected": -316.7839050292969, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0010994909098371863, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0010994909098371863, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06421951397504196, |
|
"grad_norm": 0.011962890625, |
|
"learning_rate": 3.2069970845481052e-06, |
|
"logits/chosen": -2.4333603382110596, |
|
"logits/rejected": -2.4333603382110596, |
|
"logps/chosen": -277.94915771484375, |
|
"logps/rejected": -277.94915771484375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.00041724619222804904, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.00041724619222804904, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06713858279208933, |
|
"grad_norm": 0.0145263671875, |
|
"learning_rate": 3.352769679300292e-06, |
|
"logits/chosen": -2.4338879585266113, |
|
"logits/rejected": -2.4338879585266113, |
|
"logps/chosen": -325.23455810546875, |
|
"logps/rejected": -325.23455810546875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0012703577522188425, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0012703577522188425, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.07005765160913668, |
|
"grad_norm": 0.0157470703125, |
|
"learning_rate": 3.4985422740524782e-06, |
|
"logits/chosen": -2.413400173187256, |
|
"logits/rejected": -2.413400173187256, |
|
"logps/chosen": -309.69403076171875, |
|
"logps/rejected": -309.69403076171875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.001970961457118392, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.001970961457118392, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07297672042618404, |
|
"grad_norm": 0.01422119140625, |
|
"learning_rate": 3.644314868804665e-06, |
|
"logits/chosen": -2.4458959102630615, |
|
"logits/rejected": -2.4458959102630615, |
|
"logps/chosen": -304.130615234375, |
|
"logps/rejected": -304.130615234375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0047234781086444855, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0047234781086444855, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07589578924323141, |
|
"grad_norm": 0.01324462890625, |
|
"learning_rate": 3.790087463556852e-06, |
|
"logits/chosen": -2.4266982078552246, |
|
"logits/rejected": -2.4266982078552246, |
|
"logps/chosen": -286.97076416015625, |
|
"logps/rejected": -286.97076416015625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.006642763502895832, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.006642763502895832, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07881485806027877, |
|
"grad_norm": 0.015625, |
|
"learning_rate": 3.935860058309039e-06, |
|
"logits/chosen": -2.436506748199463, |
|
"logits/rejected": -2.436506748199463, |
|
"logps/chosen": -310.330322265625, |
|
"logps/rejected": -310.330322265625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.007435324601829052, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.007435324601829052, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.08173392687732613, |
|
"grad_norm": 0.01495361328125, |
|
"learning_rate": 4.081632653061225e-06, |
|
"logits/chosen": -2.394254446029663, |
|
"logits/rejected": -2.394254446029663, |
|
"logps/chosen": -304.8192443847656, |
|
"logps/rejected": -304.8192443847656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.00737042585387826, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.00737042585387826, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.0846529956943735, |
|
"grad_norm": 0.0130615234375, |
|
"learning_rate": 4.227405247813411e-06, |
|
"logits/chosen": -2.4005939960479736, |
|
"logits/rejected": -2.4005939960479736, |
|
"logps/chosen": -288.9790954589844, |
|
"logps/rejected": -288.9790954589844, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0066338046453893185, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0066338046453893185, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08757206451142086, |
|
"grad_norm": 0.01458740234375, |
|
"learning_rate": 4.3731778425655976e-06, |
|
"logits/chosen": -2.4416656494140625, |
|
"logits/rejected": -2.4416656494140625, |
|
"logps/chosen": -288.1855773925781, |
|
"logps/rejected": -288.1855773925781, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0070955632254481316, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0070955632254481316, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08757206451142086, |
|
"eval_logits/chosen": -2.3941876888275146, |
|
"eval_logits/rejected": -2.3941876888275146, |
|
"eval_logps/chosen": -307.0490417480469, |
|
"eval_logps/rejected": -307.0490417480469, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.005704815499484539, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.005704815499484539, |
|
"eval_runtime": 2667.7916, |
|
"eval_samples_per_second": 2.283, |
|
"eval_steps_per_second": 0.286, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09049113332846821, |
|
"grad_norm": 0.01153564453125, |
|
"learning_rate": 4.518950437317785e-06, |
|
"logits/chosen": -2.420503854751587, |
|
"logits/rejected": -2.420503854751587, |
|
"logps/chosen": -276.64093017578125, |
|
"logps/rejected": -276.64093017578125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.007363935001194477, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.007363935001194477, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.09341020214551558, |
|
"grad_norm": 0.0185546875, |
|
"learning_rate": 4.664723032069971e-06, |
|
"logits/chosen": -2.4066500663757324, |
|
"logits/rejected": -2.4066500663757324, |
|
"logps/chosen": -315.653076171875, |
|
"logps/rejected": -315.653076171875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.006720393896102905, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.006720393896102905, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09632927096256294, |
|
"grad_norm": 0.015625, |
|
"learning_rate": 4.810495626822158e-06, |
|
"logits/chosen": -2.445965528488159, |
|
"logits/rejected": -2.445965528488159, |
|
"logps/chosen": -324.6703796386719, |
|
"logps/rejected": -324.6703796386719, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.007294761948287487, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.007294761948287487, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.0992483397796103, |
|
"grad_norm": 0.01446533203125, |
|
"learning_rate": 4.956268221574344e-06, |
|
"logits/chosen": -2.4288485050201416, |
|
"logits/rejected": -2.4288485050201416, |
|
"logps/chosen": -323.6286926269531, |
|
"logps/rejected": -323.6286926269531, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.009048479609191418, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.009048479609191418, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.10216740859665767, |
|
"grad_norm": 0.01458740234375, |
|
"learning_rate": 4.999936358746211e-06, |
|
"logits/chosen": -2.4309639930725098, |
|
"logits/rejected": -2.4309639930725098, |
|
"logps/chosen": -271.655029296875, |
|
"logps/rejected": -271.655029296875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.009400355629622936, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.009400355629622936, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.10508647741370503, |
|
"grad_norm": 0.0152587890625, |
|
"learning_rate": 4.99962465428288e-06, |
|
"logits/chosen": -2.4447290897369385, |
|
"logits/rejected": -2.4447290897369385, |
|
"logps/chosen": -303.4416198730469, |
|
"logps/rejected": -303.4416198730469, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.013941009528934956, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.013941009528934956, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.10800554623075238, |
|
"grad_norm": 0.0185546875, |
|
"learning_rate": 4.999053229746866e-06, |
|
"logits/chosen": -2.440117359161377, |
|
"logits/rejected": -2.440117359161377, |
|
"logps/chosen": -290.806884765625, |
|
"logps/rejected": -290.806884765625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.01759205386042595, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.01759205386042595, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.11092461504779975, |
|
"grad_norm": 0.01263427734375, |
|
"learning_rate": 4.9982221445112535e-06, |
|
"logits/chosen": -2.4275150299072266, |
|
"logits/rejected": -2.4275150299072266, |
|
"logps/chosen": -320.67938232421875, |
|
"logps/rejected": -320.67938232421875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.018790820613503456, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.018790820613503456, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.11384368386484711, |
|
"grad_norm": 0.01397705078125, |
|
"learning_rate": 4.997131484928813e-06, |
|
"logits/chosen": -2.414685010910034, |
|
"logits/rejected": -2.414685010910034, |
|
"logps/chosen": -301.1441650390625, |
|
"logps/rejected": -301.1441650390625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.015089067630469799, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.015089067630469799, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.11676275268189447, |
|
"grad_norm": 0.01458740234375, |
|
"learning_rate": 4.995781364323035e-06, |
|
"logits/chosen": -2.391239643096924, |
|
"logits/rejected": -2.391239643096924, |
|
"logps/chosen": -285.70941162109375, |
|
"logps/rejected": -285.70941162109375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.010374903678894043, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.010374903678894043, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11676275268189447, |
|
"eval_logits/chosen": -2.393982172012329, |
|
"eval_logits/rejected": -2.393982172012329, |
|
"eval_logps/chosen": -307.3796081542969, |
|
"eval_logps/rejected": -307.3796081542969, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.009010241366922855, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.009010241366922855, |
|
"eval_runtime": 2667.3233, |
|
"eval_samples_per_second": 2.283, |
|
"eval_steps_per_second": 0.286, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11968182149894184, |
|
"grad_norm": 0.01300048828125, |
|
"learning_rate": 4.994171922976349e-06, |
|
"logits/chosen": -2.4642019271850586, |
|
"logits/rejected": -2.4642019271850586, |
|
"logps/chosen": -298.46978759765625, |
|
"logps/rejected": -298.46978759765625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.009510824456810951, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.009510824456810951, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.1226008903159892, |
|
"grad_norm": 0.0159912109375, |
|
"learning_rate": 4.992303328115551e-06, |
|
"logits/chosen": -2.420297145843506, |
|
"logits/rejected": -2.420297145843506, |
|
"logps/chosen": -306.69610595703125, |
|
"logps/rejected": -306.69610595703125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0014959282707422972, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0014959282707422972, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.12551995913303657, |
|
"grad_norm": 0.0159912109375, |
|
"learning_rate": 4.990175773894428e-06, |
|
"logits/chosen": -2.46386981010437, |
|
"logits/rejected": -2.46386981010437, |
|
"logps/chosen": -281.81097412109375, |
|
"logps/rejected": -281.81097412109375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.008724676445126534, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.008724676445126534, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.1284390279500839, |
|
"grad_norm": 0.01287841796875, |
|
"learning_rate": 4.987789481373586e-06, |
|
"logits/chosen": -2.406324625015259, |
|
"logits/rejected": -2.406324625015259, |
|
"logps/chosen": -297.7574157714844, |
|
"logps/rejected": -297.7574157714844, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.006952273193746805, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.006952273193746805, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.13135809676713128, |
|
"grad_norm": 0.015869140625, |
|
"learning_rate": 4.985144698497477e-06, |
|
"logits/chosen": -2.4094862937927246, |
|
"logits/rejected": -2.4094862937927246, |
|
"logps/chosen": -294.4402160644531, |
|
"logps/rejected": -294.4402160644531, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.009783747605979443, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.009783747605979443, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.13427716558417865, |
|
"grad_norm": 0.015625, |
|
"learning_rate": 4.982241700068639e-06, |
|
"logits/chosen": -2.448880434036255, |
|
"logits/rejected": -2.448880434036255, |
|
"logps/chosen": -312.9103088378906, |
|
"logps/rejected": -312.9103088378906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.010099256411194801, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.010099256411194801, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.137196234401226, |
|
"grad_norm": 0.014404296875, |
|
"learning_rate": 4.979080787719144e-06, |
|
"logits/chosen": -2.4513556957244873, |
|
"logits/rejected": -2.4513556957244873, |
|
"logps/chosen": -330.3889465332031, |
|
"logps/rejected": -330.3889465332031, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.012815428897738457, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.012815428897738457, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.14011530321827337, |
|
"grad_norm": 0.013427734375, |
|
"learning_rate": 4.975662289879257e-06, |
|
"logits/chosen": -2.3824195861816406, |
|
"logits/rejected": -2.3824195861816406, |
|
"logps/chosen": -324.45654296875, |
|
"logps/rejected": -324.45654296875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.010385606437921524, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.010385606437921524, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.14303437203532074, |
|
"grad_norm": 0.016845703125, |
|
"learning_rate": 4.971986561743308e-06, |
|
"logits/chosen": -2.388378620147705, |
|
"logits/rejected": -2.388378620147705, |
|
"logps/chosen": -292.9872131347656, |
|
"logps/rejected": -292.9872131347656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.00819515809416771, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.00819515809416771, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.14595344085236808, |
|
"grad_norm": 0.01348876953125, |
|
"learning_rate": 4.96805398523279e-06, |
|
"logits/chosen": -2.438722610473633, |
|
"logits/rejected": -2.438722610473633, |
|
"logps/chosen": -333.7470397949219, |
|
"logps/rejected": -333.7470397949219, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.007836517877876759, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.007836517877876759, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14595344085236808, |
|
"eval_logits/chosen": -2.3937265872955322, |
|
"eval_logits/rejected": -2.3937265872955322, |
|
"eval_logps/chosen": -307.1580505371094, |
|
"eval_logps/rejected": -307.1580505371094, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.006794503424316645, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.006794503424316645, |
|
"eval_runtime": 2668.7964, |
|
"eval_samples_per_second": 2.282, |
|
"eval_steps_per_second": 0.286, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14887250966941545, |
|
"grad_norm": 0.0146484375, |
|
"learning_rate": 4.963864968956674e-06, |
|
"logits/chosen": -2.4363291263580322, |
|
"logits/rejected": -2.4363291263580322, |
|
"logps/chosen": -295.4735412597656, |
|
"logps/rejected": -295.4735412597656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.008334552869200706, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.008334552869200706, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.15179157848646282, |
|
"grad_norm": 0.0113525390625, |
|
"learning_rate": 4.959419948168952e-06, |
|
"logits/chosen": -2.4209957122802734, |
|
"logits/rejected": -2.4209957122802734, |
|
"logps/chosen": -252.09475708007812, |
|
"logps/rejected": -252.09475708007812, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.005244333762675524, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.005244333762675524, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.15471064730351017, |
|
"grad_norm": 0.011962890625, |
|
"learning_rate": 4.954719384723416e-06, |
|
"logits/chosen": -2.4421539306640625, |
|
"logits/rejected": -2.4421539306640625, |
|
"logps/chosen": -290.62939453125, |
|
"logps/rejected": -290.62939453125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.006143758539110422, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.006143758539110422, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.15762971612055754, |
|
"grad_norm": 0.0155029296875, |
|
"learning_rate": 4.949763767025665e-06, |
|
"logits/chosen": -2.433292865753174, |
|
"logits/rejected": -2.433292865753174, |
|
"logps/chosen": -301.56488037109375, |
|
"logps/rejected": -301.56488037109375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.007085380610078573, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.007085380610078573, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1605487849376049, |
|
"grad_norm": 0.01513671875, |
|
"learning_rate": 4.944553609982363e-06, |
|
"logits/chosen": -2.397106647491455, |
|
"logits/rejected": -2.397106647491455, |
|
"logps/chosen": -274.3099670410156, |
|
"logps/rejected": -274.3099670410156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.002214896958321333, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.002214896958321333, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.16346785375465225, |
|
"grad_norm": 0.0152587890625, |
|
"learning_rate": 4.939089454947734e-06, |
|
"logits/chosen": -2.417797088623047, |
|
"logits/rejected": -2.417797088623047, |
|
"logps/chosen": -299.5130615234375, |
|
"logps/rejected": -299.5130615234375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.005161653272807598, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.005161653272807598, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.16638692257169962, |
|
"grad_norm": 0.01507568359375, |
|
"learning_rate": 4.933371869667315e-06, |
|
"logits/chosen": -2.4109036922454834, |
|
"logits/rejected": -2.4109036922454834, |
|
"logps/chosen": -279.4015808105469, |
|
"logps/rejected": -279.4015808105469, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0024168032687157393, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0024168032687157393, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.169305991388747, |
|
"grad_norm": 0.00885009765625, |
|
"learning_rate": 4.9274014482189654e-06, |
|
"logits/chosen": -2.4315690994262695, |
|
"logits/rejected": -2.4315690994262695, |
|
"logps/chosen": -309.34234619140625, |
|
"logps/rejected": -309.34234619140625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.004016817547380924, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.004016817547380924, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.17222506020579434, |
|
"grad_norm": 0.017578125, |
|
"learning_rate": 4.9211788109511405e-06, |
|
"logits/chosen": -2.460508108139038, |
|
"logits/rejected": -2.460508108139038, |
|
"logps/chosen": -334.00933837890625, |
|
"logps/rejected": -334.00933837890625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.005641533527523279, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.005641533527523279, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.1751441290228417, |
|
"grad_norm": 0.016845703125, |
|
"learning_rate": 4.914704604418435e-06, |
|
"logits/chosen": -2.4566855430603027, |
|
"logits/rejected": -2.4566855430603027, |
|
"logps/chosen": -307.21331787109375, |
|
"logps/rejected": -307.21331787109375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0077440254390239716, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0077440254390239716, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1751441290228417, |
|
"eval_logits/chosen": -2.394993782043457, |
|
"eval_logits/rejected": -2.394993782043457, |
|
"eval_logps/chosen": -306.9631042480469, |
|
"eval_logps/rejected": -306.9631042480469, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.004845078103244305, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.004845078103244305, |
|
"eval_runtime": 2667.3075, |
|
"eval_samples_per_second": 2.283, |
|
"eval_steps_per_second": 0.286, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.17806319783988908, |
|
"grad_norm": 0.01312255859375, |
|
"learning_rate": 4.907979501314402e-06, |
|
"logits/chosen": -2.452761173248291, |
|
"logits/rejected": -2.452761173248291, |
|
"logps/chosen": -293.330078125, |
|
"logps/rejected": -293.330078125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.005413960665464401, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.005413960665464401, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.18098226665693642, |
|
"grad_norm": 0.013427734375, |
|
"learning_rate": 4.901004200401659e-06, |
|
"logits/chosen": -2.415590763092041, |
|
"logits/rejected": -2.415590763092041, |
|
"logps/chosen": -316.59185791015625, |
|
"logps/rejected": -316.59185791015625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.009168794378638268, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.009168794378638268, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.1839013354739838, |
|
"grad_norm": 0.017333984375, |
|
"learning_rate": 4.893779426439285e-06, |
|
"logits/chosen": -2.4269957542419434, |
|
"logits/rejected": -2.4269957542419434, |
|
"logps/chosen": -330.297607421875, |
|
"logps/rejected": -330.297607421875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.008635496720671654, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.008635496720671654, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.18682040429103117, |
|
"grad_norm": 0.0137939453125, |
|
"learning_rate": 4.886305930107512e-06, |
|
"logits/chosen": -2.4132332801818848, |
|
"logits/rejected": -2.4132332801818848, |
|
"logps/chosen": -334.0628967285156, |
|
"logps/rejected": -334.0628967285156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.00843154825270176, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.00843154825270176, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.1897394731080785, |
|
"grad_norm": 0.0162353515625, |
|
"learning_rate": 4.878584487929731e-06, |
|
"logits/chosen": -2.393531084060669, |
|
"logits/rejected": -2.393531084060669, |
|
"logps/chosen": -312.2678527832031, |
|
"logps/rejected": -312.2678527832031, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.008157333359122276, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.008157333359122276, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.19265854192512588, |
|
"grad_norm": 0.01141357421875, |
|
"learning_rate": 4.8706159021918046e-06, |
|
"logits/chosen": -2.4334394931793213, |
|
"logits/rejected": -2.4334394931793213, |
|
"logps/chosen": -313.9178466796875, |
|
"logps/rejected": -313.9178466796875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.010157248005270958, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.010157248005270958, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.19557761074217325, |
|
"grad_norm": 0.01446533203125, |
|
"learning_rate": 4.86240100085871e-06, |
|
"logits/chosen": -2.4123024940490723, |
|
"logits/rejected": -2.4123024940490723, |
|
"logps/chosen": -330.71856689453125, |
|
"logps/rejected": -330.71856689453125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.01049681194126606, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.01049681194126606, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.1984966795592206, |
|
"grad_norm": 0.0145263671875, |
|
"learning_rate": 4.853940637488505e-06, |
|
"logits/chosen": -2.4219470024108887, |
|
"logits/rejected": -2.4219470024108887, |
|
"logps/chosen": -347.1614990234375, |
|
"logps/rejected": -347.1614990234375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.010124921798706055, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.010124921798706055, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.20141574837626797, |
|
"grad_norm": 0.0140380859375, |
|
"learning_rate": 4.84523569114365e-06, |
|
"logits/chosen": -2.441845417022705, |
|
"logits/rejected": -2.441845417022705, |
|
"logps/chosen": -268.2397766113281, |
|
"logps/rejected": -268.2397766113281, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.013552245683968067, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.013552245683968067, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.20433481719331534, |
|
"grad_norm": 0.020751953125, |
|
"learning_rate": 4.8362870662996574e-06, |
|
"logits/chosen": -2.408205509185791, |
|
"logits/rejected": -2.408205509185791, |
|
"logps/chosen": -313.0887756347656, |
|
"logps/rejected": -313.0887756347656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.01138794980943203, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.01138794980943203, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.20433481719331534, |
|
"eval_logits/chosen": -2.394869565963745, |
|
"eval_logits/rejected": -2.394869565963745, |
|
"eval_logps/chosen": -307.6349182128906, |
|
"eval_logps/rejected": -307.6349182128906, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.011563203297555447, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.011563203297555447, |
|
"eval_runtime": 2685.1829, |
|
"eval_samples_per_second": 2.268, |
|
"eval_steps_per_second": 0.284, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.20725388601036268, |
|
"grad_norm": 0.015380859375, |
|
"learning_rate": 4.827095692751124e-06, |
|
"logits/chosen": -2.4306788444519043, |
|
"logits/rejected": -2.4306788444519043, |
|
"logps/chosen": -295.8254089355469, |
|
"logps/rejected": -295.8254089355469, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.009687040001153946, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.009687040001153946, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.21017295482741005, |
|
"grad_norm": 0.0135498046875, |
|
"learning_rate": 4.817662525515116e-06, |
|
"logits/chosen": -2.399963855743408, |
|
"logits/rejected": -2.399963855743408, |
|
"logps/chosen": -285.0207824707031, |
|
"logps/rejected": -285.0207824707031, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.010509965009987354, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.010509965009987354, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.21309202364445742, |
|
"grad_norm": 0.01275634765625, |
|
"learning_rate": 4.807988544731944e-06, |
|
"logits/chosen": -2.4015610218048096, |
|
"logits/rejected": -2.4015610218048096, |
|
"logps/chosen": -301.6191711425781, |
|
"logps/rejected": -301.6191711425781, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0023958988022059202, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0023958988022059202, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.21601109246150477, |
|
"grad_norm": 0.0120849609375, |
|
"learning_rate": 4.7980747555633174e-06, |
|
"logits/chosen": -2.421522617340088, |
|
"logits/rejected": -2.421522617340088, |
|
"logps/chosen": -300.5765380859375, |
|
"logps/rejected": -300.5765380859375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0066505610011518, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0066505610011518, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.21893016127855214, |
|
"grad_norm": 0.0167236328125, |
|
"learning_rate": 4.787922188087907e-06, |
|
"logits/chosen": -2.3898696899414062, |
|
"logits/rejected": -2.3898696899414062, |
|
"logps/chosen": -312.099853515625, |
|
"logps/rejected": -312.099853515625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.009563307277858257, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.009563307277858257, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2218492300955995, |
|
"grad_norm": 0.0185546875, |
|
"learning_rate": 4.7775318971943165e-06, |
|
"logits/chosen": -2.368053674697876, |
|
"logits/rejected": -2.368053674697876, |
|
"logps/chosen": -280.77703857421875, |
|
"logps/rejected": -280.77703857421875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.008711813017725945, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.008711813017725945, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.22476829891264685, |
|
"grad_norm": 0.01434326171875, |
|
"learning_rate": 4.766904962471477e-06, |
|
"logits/chosen": -2.428321361541748, |
|
"logits/rejected": -2.428321361541748, |
|
"logps/chosen": -283.40704345703125, |
|
"logps/rejected": -283.40704345703125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0074463835917413235, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0074463835917413235, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.22768736772969422, |
|
"grad_norm": 0.020751953125, |
|
"learning_rate": 4.756042488096472e-06, |
|
"logits/chosen": -2.421441078186035, |
|
"logits/rejected": -2.421441078186035, |
|
"logps/chosen": -283.1347961425781, |
|
"logps/rejected": -283.1347961425781, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.009277506731450558, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.009277506731450558, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2306064365467416, |
|
"grad_norm": 0.0169677734375, |
|
"learning_rate": 4.744945602719806e-06, |
|
"logits/chosen": -2.4225807189941406, |
|
"logits/rejected": -2.4225807189941406, |
|
"logps/chosen": -296.5173645019531, |
|
"logps/rejected": -296.5173645019531, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.009408360347151756, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.009408360347151756, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.23352550536378894, |
|
"grad_norm": 0.01495361328125, |
|
"learning_rate": 4.733615459348143e-06, |
|
"logits/chosen": -2.3777918815612793, |
|
"logits/rejected": -2.3777918815612793, |
|
"logps/chosen": -337.0318298339844, |
|
"logps/rejected": -337.0318298339844, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.012588550336658955, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.012588550336658955, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.23352550536378894, |
|
"eval_logits/chosen": -2.394713878631592, |
|
"eval_logits/rejected": -2.394713878631592, |
|
"eval_logps/chosen": -307.6956787109375, |
|
"eval_logps/rejected": -307.6956787109375, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.012170875445008278, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.012170875445008278, |
|
"eval_runtime": 2762.1462, |
|
"eval_samples_per_second": 2.205, |
|
"eval_steps_per_second": 0.276, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2364445741808363, |
|
"grad_norm": 0.0145263671875, |
|
"learning_rate": 4.722053235224495e-06, |
|
"logits/chosen": -2.4402616024017334, |
|
"logits/rejected": -2.4402616024017334, |
|
"logps/chosen": -333.5353698730469, |
|
"logps/rejected": -333.5353698730469, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.008296088315546513, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.008296088315546513, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.23936364299788368, |
|
"grad_norm": 0.0128173828125, |
|
"learning_rate": 4.710260131705908e-06, |
|
"logits/chosen": -2.411567211151123, |
|
"logits/rejected": -2.411567211151123, |
|
"logps/chosen": -274.9350280761719, |
|
"logps/rejected": -274.9350280761719, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.015997527167201042, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.015997527167201042, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.24228271181493102, |
|
"grad_norm": 0.01531982421875, |
|
"learning_rate": 4.698237374138634e-06, |
|
"logits/chosen": -2.420203447341919, |
|
"logits/rejected": -2.420203447341919, |
|
"logps/chosen": -312.3550720214844, |
|
"logps/rejected": -312.3550720214844, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.015846502035856247, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.015846502035856247, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.2452017806319784, |
|
"grad_norm": 0.01513671875, |
|
"learning_rate": 4.685986211730816e-06, |
|
"logits/chosen": -2.3960068225860596, |
|
"logits/rejected": -2.3960068225860596, |
|
"logps/chosen": -331.6641845703125, |
|
"logps/rejected": -331.6641845703125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.01894356682896614, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.01894356682896614, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.24812084944902577, |
|
"grad_norm": 0.01165771484375, |
|
"learning_rate": 4.6735079174226864e-06, |
|
"logits/chosen": -2.408433198928833, |
|
"logits/rejected": -2.408433198928833, |
|
"logps/chosen": -269.3624572753906, |
|
"logps/rejected": -269.3624572753906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.009970271959900856, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.009970271959900856, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.25103991826607314, |
|
"grad_norm": 0.01483154296875, |
|
"learning_rate": 4.660803787754306e-06, |
|
"logits/chosen": -2.416790723800659, |
|
"logits/rejected": -2.416790723800659, |
|
"logps/chosen": -302.0819396972656, |
|
"logps/rejected": -302.0819396972656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.010707431472837925, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.010707431472837925, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.2539589870831205, |
|
"grad_norm": 0.0157470703125, |
|
"learning_rate": 4.647875142730853e-06, |
|
"logits/chosen": -2.3868987560272217, |
|
"logits/rejected": -2.3868987560272217, |
|
"logps/chosen": -299.74444580078125, |
|
"logps/rejected": -299.74444580078125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.012594198808073997, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.012594198808073997, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.2568780559001678, |
|
"grad_norm": 0.0140380859375, |
|
"learning_rate": 4.634723325685462e-06, |
|
"logits/chosen": -2.442610263824463, |
|
"logits/rejected": -2.442610263824463, |
|
"logps/chosen": -308.396240234375, |
|
"logps/rejected": -308.396240234375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.011057281866669655, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.011057281866669655, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.2597971247172152, |
|
"grad_norm": 0.0157470703125, |
|
"learning_rate": 4.621349703139651e-06, |
|
"logits/chosen": -2.4502758979797363, |
|
"logits/rejected": -2.4502758979797363, |
|
"logps/chosen": -327.5845031738281, |
|
"logps/rejected": -327.5845031738281, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.012428502552211285, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.012428502552211285, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.26271619353426257, |
|
"grad_norm": 0.01519775390625, |
|
"learning_rate": 4.6077556646613365e-06, |
|
"logits/chosen": -2.4429335594177246, |
|
"logits/rejected": -2.4429335594177246, |
|
"logps/chosen": -309.44598388671875, |
|
"logps/rejected": -309.44598388671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.008300786837935448, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.008300786837935448, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.26271619353426257, |
|
"eval_logits/chosen": -2.396768093109131, |
|
"eval_logits/rejected": -2.396768093109131, |
|
"eval_logps/chosen": -307.1708068847656, |
|
"eval_logps/rejected": -307.1708068847656, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.0069224112667143345, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.0069224112667143345, |
|
"eval_runtime": 2667.1988, |
|
"eval_samples_per_second": 2.283, |
|
"eval_steps_per_second": 0.286, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2656352623513099, |
|
"grad_norm": 0.0150146484375, |
|
"learning_rate": 4.593942622720449e-06, |
|
"logits/chosen": -2.431570529937744, |
|
"logits/rejected": -2.431570529937744, |
|
"logps/chosen": -333.9033203125, |
|
"logps/rejected": -333.9033203125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.008790754713118076, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.008790754713118076, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.2685543311683573, |
|
"grad_norm": 0.011474609375, |
|
"learning_rate": 4.579912012542172e-06, |
|
"logits/chosen": -2.4538259506225586, |
|
"logits/rejected": -2.4538259506225586, |
|
"logps/chosen": -330.14776611328125, |
|
"logps/rejected": -330.14776611328125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.010161884129047394, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.010161884129047394, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.27147339998540465, |
|
"grad_norm": 0.0164794921875, |
|
"learning_rate": 4.565665291957821e-06, |
|
"logits/chosen": -2.412051200866699, |
|
"logits/rejected": -2.412051200866699, |
|
"logps/chosen": -300.0600891113281, |
|
"logps/rejected": -300.0600891113281, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.010114507749676704, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.010114507749676704, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.274392468802452, |
|
"grad_norm": 0.0125732421875, |
|
"learning_rate": 4.551203941253367e-06, |
|
"logits/chosen": -2.4353108406066895, |
|
"logits/rejected": -2.4353108406066895, |
|
"logps/chosen": -288.15032958984375, |
|
"logps/rejected": -288.15032958984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.01036372222006321, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.01036372222006321, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.2773115376194994, |
|
"grad_norm": 0.01434326171875, |
|
"learning_rate": 4.5365294630156264e-06, |
|
"logits/chosen": -2.4350383281707764, |
|
"logits/rejected": -2.4350383281707764, |
|
"logps/chosen": -319.06195068359375, |
|
"logps/rejected": -319.06195068359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.011402562260627747, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.011402562260627747, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.28023060643654674, |
|
"grad_norm": 0.012451171875, |
|
"learning_rate": 4.521643381976142e-06, |
|
"logits/chosen": -2.428330898284912, |
|
"logits/rejected": -2.428330898284912, |
|
"logps/chosen": -322.0547790527344, |
|
"logps/rejected": -322.0547790527344, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.012878289446234703, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.012878289446234703, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.2831496752535941, |
|
"grad_norm": 0.013671875, |
|
"learning_rate": 4.506547244852756e-06, |
|
"logits/chosen": -2.4220213890075684, |
|
"logits/rejected": -2.4220213890075684, |
|
"logps/chosen": -298.77056884765625, |
|
"logps/rejected": -298.77056884765625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.012091143056750298, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.012091143056750298, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.2860687440706415, |
|
"grad_norm": 0.0145263671875, |
|
"learning_rate": 4.491242620188898e-06, |
|
"logits/chosen": -2.400778293609619, |
|
"logits/rejected": -2.400778293609619, |
|
"logps/chosen": -302.7762756347656, |
|
"logps/rejected": -302.7762756347656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.01696743816137314, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.01696743816137314, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2889878128876888, |
|
"grad_norm": 0.012451171875, |
|
"learning_rate": 4.475731098190611e-06, |
|
"logits/chosen": -2.4159862995147705, |
|
"logits/rejected": -2.4159862995147705, |
|
"logps/chosen": -278.34356689453125, |
|
"logps/rejected": -278.34356689453125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.014010600745677948, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.014010600745677948, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.29190688170473617, |
|
"grad_norm": 0.0145263671875, |
|
"learning_rate": 4.4600142905613216e-06, |
|
"logits/chosen": -2.416891098022461, |
|
"logits/rejected": -2.416891098022461, |
|
"logps/chosen": -310.4523620605469, |
|
"logps/rejected": -310.4523620605469, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.01909947767853737, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.01909947767853737, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.29190688170473617, |
|
"eval_logits/chosen": -2.3967111110687256, |
|
"eval_logits/rejected": -2.3967111110687256, |
|
"eval_logps/chosen": -308.2130432128906, |
|
"eval_logps/rejected": -308.2130432128906, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.017344659194350243, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.017344659194350243, |
|
"eval_runtime": 2668.0913, |
|
"eval_samples_per_second": 2.283, |
|
"eval_steps_per_second": 0.286, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.29482595052178356, |
|
"grad_norm": 0.029296875, |
|
"learning_rate": 4.444093830334381e-06, |
|
"logits/chosen": -2.395017147064209, |
|
"logits/rejected": -2.395017147064209, |
|
"logps/chosen": -330.1224670410156, |
|
"logps/rejected": -330.1224670410156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.013958754017949104, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.013958754017949104, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.2977450193388309, |
|
"grad_norm": 0.01611328125, |
|
"learning_rate": 4.427971371703378e-06, |
|
"logits/chosen": -2.4404492378234863, |
|
"logits/rejected": -2.4404492378234863, |
|
"logps/chosen": -314.79888916015625, |
|
"logps/rejected": -314.79888916015625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.027685949578881264, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.027685949578881264, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.30066408815587825, |
|
"grad_norm": 0.01263427734375, |
|
"learning_rate": 4.411648589850276e-06, |
|
"logits/chosen": -2.4368889331817627, |
|
"logits/rejected": -2.4368889331817627, |
|
"logps/chosen": -299.6970520019531, |
|
"logps/rejected": -299.6970520019531, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.01648426428437233, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.01648426428437233, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.30358315697292565, |
|
"grad_norm": 0.01416015625, |
|
"learning_rate": 4.395127180771342e-06, |
|
"logits/chosen": -2.4541175365448, |
|
"logits/rejected": -2.4541175365448, |
|
"logps/chosen": -326.87841796875, |
|
"logps/rejected": -326.87841796875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.020237499848008156, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.020237499848008156, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.306502225789973, |
|
"grad_norm": 0.01318359375, |
|
"learning_rate": 4.378408861100937e-06, |
|
"logits/chosen": -2.415283203125, |
|
"logits/rejected": -2.415283203125, |
|
"logps/chosen": -261.1552429199219, |
|
"logps/rejected": -261.1552429199219, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.01741962879896164, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.01741962879896164, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.30942129460702034, |
|
"grad_norm": 0.01416015625, |
|
"learning_rate": 4.361495367933144e-06, |
|
"logits/chosen": -2.396031141281128, |
|
"logits/rejected": -2.396031141281128, |
|
"logps/chosen": -322.30377197265625, |
|
"logps/rejected": -322.30377197265625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.014474359340965748, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.014474359340965748, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.31234036342406774, |
|
"grad_norm": 0.0181884765625, |
|
"learning_rate": 4.344388458641283e-06, |
|
"logits/chosen": -2.4288814067840576, |
|
"logits/rejected": -2.4288814067840576, |
|
"logps/chosen": -324.64501953125, |
|
"logps/rejected": -324.64501953125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.025701653212308884, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.025701653212308884, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.3152594322411151, |
|
"grad_norm": 0.0164794921875, |
|
"learning_rate": 4.32708991069531e-06, |
|
"logits/chosen": -2.411003589630127, |
|
"logits/rejected": -2.411003589630127, |
|
"logps/chosen": -318.289794921875, |
|
"logps/rejected": -318.289794921875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.02725202962756157, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.02725202962756157, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.3181785010581624, |
|
"grad_norm": 0.01312255859375, |
|
"learning_rate": 4.309601521477134e-06, |
|
"logits/chosen": -2.437730550765991, |
|
"logits/rejected": -2.437730550765991, |
|
"logps/chosen": -318.1125793457031, |
|
"logps/rejected": -318.1125793457031, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.035508617758750916, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.035508617758750916, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.3210975698752098, |
|
"grad_norm": 0.01373291015625, |
|
"learning_rate": 4.291925108093856e-06, |
|
"logits/chosen": -2.4134514331817627, |
|
"logits/rejected": -2.4134514331817627, |
|
"logps/chosen": -306.98712158203125, |
|
"logps/rejected": -306.98712158203125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.02741456963121891, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.02741456963121891, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3210975698752098, |
|
"eval_logits/chosen": -2.3970751762390137, |
|
"eval_logits/rejected": -2.3970751762390137, |
|
"eval_logps/chosen": -309.472412109375, |
|
"eval_logps/rejected": -309.472412109375, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.029938040301203728, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.029938040301203728, |
|
"eval_runtime": 2667.8688, |
|
"eval_samples_per_second": 2.283, |
|
"eval_steps_per_second": 0.286, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.32401663869225716, |
|
"grad_norm": 0.0120849609375, |
|
"learning_rate": 4.274062507188978e-06, |
|
"logits/chosen": -2.413846492767334, |
|
"logits/rejected": -2.413846492767334, |
|
"logps/chosen": -319.53887939453125, |
|
"logps/rejected": -319.53887939453125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03637847676873207, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03637847676873207, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.3269357075093045, |
|
"grad_norm": 0.0130615234375, |
|
"learning_rate": 4.256015574751555e-06, |
|
"logits/chosen": -2.443239212036133, |
|
"logits/rejected": -2.443239212036133, |
|
"logps/chosen": -302.9671630859375, |
|
"logps/rejected": -302.9671630859375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.032804206013679504, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.032804206013679504, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3298547763263519, |
|
"grad_norm": 0.0159912109375, |
|
"learning_rate": 4.2377861859233604e-06, |
|
"logits/chosen": -2.4368813037872314, |
|
"logits/rejected": -2.4368813037872314, |
|
"logps/chosen": -277.4005126953125, |
|
"logps/rejected": -277.4005126953125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.030909573659300804, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.030909573659300804, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.33277384514339925, |
|
"grad_norm": 0.01263427734375, |
|
"learning_rate": 4.219376234804047e-06, |
|
"logits/chosen": -2.4358789920806885, |
|
"logits/rejected": -2.4358789920806885, |
|
"logps/chosen": -294.87567138671875, |
|
"logps/rejected": -294.87567138671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.033531349152326584, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.033531349152326584, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.3356929139604466, |
|
"grad_norm": 0.01519775390625, |
|
"learning_rate": 4.200787634254345e-06, |
|
"logits/chosen": -2.458458662033081, |
|
"logits/rejected": -2.458458662033081, |
|
"logps/chosen": -284.5567321777344, |
|
"logps/rejected": -284.5567321777344, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.029438916593790054, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.029438916593790054, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.338611982777494, |
|
"grad_norm": 0.0157470703125, |
|
"learning_rate": 4.18202231569731e-06, |
|
"logits/chosen": -2.465770721435547, |
|
"logits/rejected": -2.465770721435547, |
|
"logps/chosen": -325.60443115234375, |
|
"logps/rejected": -325.60443115234375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03488076478242874, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03488076478242874, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.34153105159454133, |
|
"grad_norm": 0.0194091796875, |
|
"learning_rate": 4.163082228917639e-06, |
|
"logits/chosen": -2.42230224609375, |
|
"logits/rejected": -2.42230224609375, |
|
"logps/chosen": -332.96807861328125, |
|
"logps/rejected": -332.96807861328125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03761008754372597, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03761008754372597, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.3444501204115887, |
|
"grad_norm": 0.01519775390625, |
|
"learning_rate": 4.143969341859083e-06, |
|
"logits/chosen": -2.4006218910217285, |
|
"logits/rejected": -2.4006218910217285, |
|
"logps/chosen": -298.38372802734375, |
|
"logps/rejected": -298.38372802734375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.027944693341851234, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.027944693341851234, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3473691892286361, |
|
"grad_norm": 0.0167236328125, |
|
"learning_rate": 4.124685640419967e-06, |
|
"logits/chosen": -2.4376044273376465, |
|
"logits/rejected": -2.4376044273376465, |
|
"logps/chosen": -339.3370666503906, |
|
"logps/rejected": -339.3370666503906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04471305012702942, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04471305012702942, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.3502882580456834, |
|
"grad_norm": 0.015625, |
|
"learning_rate": 4.105233128246849e-06, |
|
"logits/chosen": -2.4307379722595215, |
|
"logits/rejected": -2.4307379722595215, |
|
"logps/chosen": -314.7157287597656, |
|
"logps/rejected": -314.7157287597656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04377968981862068, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04377968981862068, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3502882580456834, |
|
"eval_logits/chosen": -2.3975985050201416, |
|
"eval_logits/rejected": -2.3975985050201416, |
|
"eval_logps/chosen": -310.0194091796875, |
|
"eval_logps/rejected": -310.0194091796875, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.035407647490501404, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.035407647490501404, |
|
"eval_runtime": 2667.8638, |
|
"eval_samples_per_second": 2.283, |
|
"eval_steps_per_second": 0.286, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.35320732686273076, |
|
"grad_norm": 0.01373291015625, |
|
"learning_rate": 4.085613826526338e-06, |
|
"logits/chosen": -2.4104952812194824, |
|
"logits/rejected": -2.4104952812194824, |
|
"logps/chosen": -307.89056396484375, |
|
"logps/rejected": -307.89056396484375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.034878071397542953, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.034878071397542953, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.35612639567977816, |
|
"grad_norm": 0.0130615234375, |
|
"learning_rate": 4.065829773775082e-06, |
|
"logits/chosen": -2.454697847366333, |
|
"logits/rejected": -2.454697847366333, |
|
"logps/chosen": -331.95556640625, |
|
"logps/rejected": -331.95556640625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.035688284784555435, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.035688284784555435, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.3590454644968255, |
|
"grad_norm": 0.01318359375, |
|
"learning_rate": 4.045883025627957e-06, |
|
"logits/chosen": -2.416503429412842, |
|
"logits/rejected": -2.416503429412842, |
|
"logps/chosen": -317.5516662597656, |
|
"logps/rejected": -317.5516662597656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.036794569343328476, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.036794569343328476, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.36196453331387285, |
|
"grad_norm": 0.0159912109375, |
|
"learning_rate": 4.025775654624481e-06, |
|
"logits/chosen": -2.431762218475342, |
|
"logits/rejected": -2.431762218475342, |
|
"logps/chosen": -286.4144592285156, |
|
"logps/rejected": -286.4144592285156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0327475443482399, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0327475443482399, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.36488360213092025, |
|
"grad_norm": 0.01373291015625, |
|
"learning_rate": 4.005509749993471e-06, |
|
"logits/chosen": -2.4348835945129395, |
|
"logits/rejected": -2.4348835945129395, |
|
"logps/chosen": -264.43670654296875, |
|
"logps/rejected": -264.43670654296875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03447514772415161, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03447514772415161, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.3678026709479676, |
|
"grad_norm": 0.01544189453125, |
|
"learning_rate": 3.985087417435964e-06, |
|
"logits/chosen": -2.4379494190216064, |
|
"logits/rejected": -2.4379494190216064, |
|
"logps/chosen": -306.0783386230469, |
|
"logps/rejected": -306.0783386230469, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03243451565504074, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03243451565504074, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.37072173976501493, |
|
"grad_norm": 0.01318359375, |
|
"learning_rate": 3.964510778906425e-06, |
|
"logits/chosen": -2.434380292892456, |
|
"logits/rejected": -2.434380292892456, |
|
"logps/chosen": -316.9388427734375, |
|
"logps/rejected": -316.9388427734375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.038867734372615814, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.038867734372615814, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.37364080858206233, |
|
"grad_norm": 0.0142822265625, |
|
"learning_rate": 3.943781972392269e-06, |
|
"logits/chosen": -2.4212710857391357, |
|
"logits/rejected": -2.4212710857391357, |
|
"logps/chosen": -326.74237060546875, |
|
"logps/rejected": -326.74237060546875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.038525618612766266, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.038525618612766266, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.3765598773991097, |
|
"grad_norm": 0.016357421875, |
|
"learning_rate": 3.922903151691716e-06, |
|
"logits/chosen": -2.450032949447632, |
|
"logits/rejected": -2.450032949447632, |
|
"logps/chosen": -329.82073974609375, |
|
"logps/rejected": -329.82073974609375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.037473224103450775, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.037473224103450775, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.379478946216157, |
|
"grad_norm": 0.018310546875, |
|
"learning_rate": 3.901876486190008e-06, |
|
"logits/chosen": -2.4351401329040527, |
|
"logits/rejected": -2.4351401329040527, |
|
"logps/chosen": -315.5516662597656, |
|
"logps/rejected": -315.5516662597656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03452508896589279, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03452508896589279, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.379478946216157, |
|
"eval_logits/chosen": -2.3963370323181152, |
|
"eval_logits/rejected": -2.3963370323181152, |
|
"eval_logps/chosen": -309.5113525390625, |
|
"eval_logps/rejected": -309.5113525390625, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.030327608808875084, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.030327608808875084, |
|
"eval_runtime": 2666.8225, |
|
"eval_samples_per_second": 2.284, |
|
"eval_steps_per_second": 0.286, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.3823980150332044, |
|
"grad_norm": 0.018798828125, |
|
"learning_rate": 3.880704160633995e-06, |
|
"logits/chosen": -2.4444994926452637, |
|
"logits/rejected": -2.4444994926452637, |
|
"logps/chosen": -295.88348388671875, |
|
"logps/rejected": -295.88348388671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.036420173943042755, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.036420173943042755, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.38531708385025176, |
|
"grad_norm": 0.013671875, |
|
"learning_rate": 3.859388374905136e-06, |
|
"logits/chosen": -2.41549015045166, |
|
"logits/rejected": -2.41549015045166, |
|
"logps/chosen": -291.2346496582031, |
|
"logps/rejected": -291.2346496582031, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03046022728085518, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03046022728085518, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3882361526672991, |
|
"grad_norm": 0.0152587890625, |
|
"learning_rate": 3.837931343790924e-06, |
|
"logits/chosen": -2.4401891231536865, |
|
"logits/rejected": -2.4401891231536865, |
|
"logps/chosen": -297.060791015625, |
|
"logps/rejected": -297.060791015625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.02374189719557762, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.02374189719557762, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.3911552214843465, |
|
"grad_norm": 0.0152587890625, |
|
"learning_rate": 3.8163352967547575e-06, |
|
"logits/chosen": -2.4282491207122803, |
|
"logits/rejected": -2.4282491207122803, |
|
"logps/chosen": -350.7884216308594, |
|
"logps/rejected": -350.7884216308594, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.031809043139219284, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.031809043139219284, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.39407429030139385, |
|
"grad_norm": 0.01190185546875, |
|
"learning_rate": 3.7946024777042974e-06, |
|
"logits/chosen": -2.423346996307373, |
|
"logits/rejected": -2.423346996307373, |
|
"logps/chosen": -300.26800537109375, |
|
"logps/rejected": -300.26800537109375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.029370862990617752, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.029370862990617752, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.3969933591184412, |
|
"grad_norm": 0.01953125, |
|
"learning_rate": 3.7727351447583095e-06, |
|
"logits/chosen": -2.397026538848877, |
|
"logits/rejected": -2.397026538848877, |
|
"logps/chosen": -318.9501647949219, |
|
"logps/rejected": -318.9501647949219, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.030634000897407532, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.030634000897407532, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.3999124279354886, |
|
"grad_norm": 0.01385498046875, |
|
"learning_rate": 3.750735570012043e-06, |
|
"logits/chosen": -2.438441276550293, |
|
"logits/rejected": -2.438441276550293, |
|
"logps/chosen": -330.5710754394531, |
|
"logps/rejected": -330.5710754394531, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03722671791911125, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03722671791911125, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.40283149675253593, |
|
"grad_norm": 0.01806640625, |
|
"learning_rate": 3.7286060393011513e-06, |
|
"logits/chosen": -2.419067144393921, |
|
"logits/rejected": -2.419067144393921, |
|
"logps/chosen": -314.528564453125, |
|
"logps/rejected": -314.528564453125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.032639987766742706, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.032639987766742706, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.4057505655695833, |
|
"grad_norm": 0.01904296875, |
|
"learning_rate": 3.7063488519641825e-06, |
|
"logits/chosen": -2.4223015308380127, |
|
"logits/rejected": -2.4223015308380127, |
|
"logps/chosen": -329.4114685058594, |
|
"logps/rejected": -329.4114685058594, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03504698723554611, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03504698723554611, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.4086696343866307, |
|
"grad_norm": 0.0162353515625, |
|
"learning_rate": 3.6839663206036715e-06, |
|
"logits/chosen": -2.4432168006896973, |
|
"logits/rejected": -2.4432168006896973, |
|
"logps/chosen": -293.8369445800781, |
|
"logps/rejected": -293.8369445800781, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.031177738681435585, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.031177738681435585, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.4086696343866307, |
|
"eval_logits/chosen": -2.395508050918579, |
|
"eval_logits/rejected": -2.395508050918579, |
|
"eval_logps/chosen": -309.2061462402344, |
|
"eval_logps/rejected": -309.2061462402344, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.02727527543902397, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.02727527543902397, |
|
"eval_runtime": 2667.2175, |
|
"eval_samples_per_second": 2.283, |
|
"eval_steps_per_second": 0.286, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.411588703203678, |
|
"grad_norm": 0.01239013671875, |
|
"learning_rate": 3.6614607708458532e-06, |
|
"logits/chosen": -2.418804883956909, |
|
"logits/rejected": -2.418804883956909, |
|
"logps/chosen": -295.696533203125, |
|
"logps/rejected": -295.696533203125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.024944758042693138, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.024944758042693138, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.41450777202072536, |
|
"grad_norm": 0.0146484375, |
|
"learning_rate": 3.6388345410990195e-06, |
|
"logits/chosen": -2.4199652671813965, |
|
"logits/rejected": -2.4199652671813965, |
|
"logps/chosen": -341.0202331542969, |
|
"logps/rejected": -341.0202331542969, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.030235985293984413, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.030235985293984413, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.41742684083777276, |
|
"grad_norm": 0.01141357421875, |
|
"learning_rate": 3.6160899823105518e-06, |
|
"logits/chosen": -2.4291069507598877, |
|
"logits/rejected": -2.4291069507598877, |
|
"logps/chosen": -287.2336730957031, |
|
"logps/rejected": -287.2336730957031, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0277925543487072, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0277925543487072, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.4203459096548201, |
|
"grad_norm": 0.0140380859375, |
|
"learning_rate": 3.5932294577226468e-06, |
|
"logits/chosen": -2.440561532974243, |
|
"logits/rejected": -2.440561532974243, |
|
"logps/chosen": -276.7684020996094, |
|
"logps/rejected": -276.7684020996094, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.017870336771011353, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.017870336771011353, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.42326497847186745, |
|
"grad_norm": 0.0118408203125, |
|
"learning_rate": 3.5702553426267704e-06, |
|
"logits/chosen": -2.449218988418579, |
|
"logits/rejected": -2.449218988418579, |
|
"logps/chosen": -305.78814697265625, |
|
"logps/rejected": -305.78814697265625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.024231892079114914, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.024231892079114914, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.42618404728891485, |
|
"grad_norm": 0.015625, |
|
"learning_rate": 3.547170024116854e-06, |
|
"logits/chosen": -2.4015636444091797, |
|
"logits/rejected": -2.4015636444091797, |
|
"logps/chosen": -281.1402893066406, |
|
"logps/rejected": -281.1402893066406, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.027333328500390053, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.027333328500390053, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.4291031161059622, |
|
"grad_norm": 0.0164794921875, |
|
"learning_rate": 3.5239759008412666e-06, |
|
"logits/chosen": -2.461341381072998, |
|
"logits/rejected": -2.461341381072998, |
|
"logps/chosen": -315.0804443359375, |
|
"logps/rejected": -315.0804443359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.026240995153784752, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.026240995153784752, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.43202218492300953, |
|
"grad_norm": 0.0164794921875, |
|
"learning_rate": 3.500675382753588e-06, |
|
"logits/chosen": -2.420381784439087, |
|
"logits/rejected": -2.420381784439087, |
|
"logps/chosen": -310.7515563964844, |
|
"logps/rejected": -310.7515563964844, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.023152858018875122, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.023152858018875122, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.43494125374005693, |
|
"grad_norm": 0.01336669921875, |
|
"learning_rate": 3.477270890862204e-06, |
|
"logits/chosen": -2.3881866931915283, |
|
"logits/rejected": -2.3881866931915283, |
|
"logps/chosen": -318.3128356933594, |
|
"logps/rejected": -318.3128356933594, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.030725980177521706, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.030725980177521706, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.4378603225571043, |
|
"grad_norm": 0.0140380859375, |
|
"learning_rate": 3.453764856978758e-06, |
|
"logits/chosen": -2.409209728240967, |
|
"logits/rejected": -2.409209728240967, |
|
"logps/chosen": -331.4593200683594, |
|
"logps/rejected": -331.4593200683594, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.022285277023911476, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.022285277023911476, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4378603225571043, |
|
"eval_logits/chosen": -2.394321918487549, |
|
"eval_logits/rejected": -2.394321918487549, |
|
"eval_logps/chosen": -308.9651794433594, |
|
"eval_logps/rejected": -308.9651794433594, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.024866018444299698, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.024866018444299698, |
|
"eval_runtime": 2666.7789, |
|
"eval_samples_per_second": 2.284, |
|
"eval_steps_per_second": 0.286, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4407793913741516, |
|
"grad_norm": 0.01312255859375, |
|
"learning_rate": 3.4301597234654733e-06, |
|
"logits/chosen": -2.4193215370178223, |
|
"logits/rejected": -2.4193215370178223, |
|
"logps/chosen": -304.951171875, |
|
"logps/rejected": -304.951171875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.02852988801896572, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.02852988801896572, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.443698460191199, |
|
"grad_norm": 0.0177001953125, |
|
"learning_rate": 3.406457942981384e-06, |
|
"logits/chosen": -2.430614948272705, |
|
"logits/rejected": -2.430614948272705, |
|
"logps/chosen": -333.06988525390625, |
|
"logps/rejected": -333.06988525390625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.024759288877248764, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.024759288877248764, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.44661752900824636, |
|
"grad_norm": 0.0133056640625, |
|
"learning_rate": 3.3826619782274954e-06, |
|
"logits/chosen": -2.43021559715271, |
|
"logits/rejected": -2.43021559715271, |
|
"logps/chosen": -284.0345153808594, |
|
"logps/rejected": -284.0345153808594, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.025433775037527084, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.025433775037527084, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.4495365978252937, |
|
"grad_norm": 0.0142822265625, |
|
"learning_rate": 3.3587743016909013e-06, |
|
"logits/chosen": -2.439312219619751, |
|
"logits/rejected": -2.439312219619751, |
|
"logps/chosen": -320.015380859375, |
|
"logps/rejected": -320.015380859375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.02944205328822136, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.02944205328822136, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.4524556666423411, |
|
"grad_norm": 0.044677734375, |
|
"learning_rate": 3.334797395387882e-06, |
|
"logits/chosen": -2.4262938499450684, |
|
"logits/rejected": -2.4262938499450684, |
|
"logps/chosen": -329.60504150390625, |
|
"logps/rejected": -329.60504150390625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.027106398716568947, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.027106398716568947, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.45537473545938845, |
|
"grad_norm": 0.01226806640625, |
|
"learning_rate": 3.3107337506060145e-06, |
|
"logits/chosen": -2.4414420127868652, |
|
"logits/rejected": -2.4414420127868652, |
|
"logps/chosen": -289.9877014160156, |
|
"logps/rejected": -289.9877014160156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.028158003464341164, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.028158003464341164, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.4582938042764358, |
|
"grad_norm": 0.0301513671875, |
|
"learning_rate": 3.2865858676453172e-06, |
|
"logits/chosen": -2.434182643890381, |
|
"logits/rejected": -2.434182643890381, |
|
"logps/chosen": -306.0428466796875, |
|
"logps/rejected": -306.0428466796875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.02475564181804657, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.02475564181804657, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.4612128730934832, |
|
"grad_norm": 0.0098876953125, |
|
"learning_rate": 3.2623562555584633e-06, |
|
"logits/chosen": -2.430816411972046, |
|
"logits/rejected": -2.430816411972046, |
|
"logps/chosen": -281.2196960449219, |
|
"logps/rejected": -281.2196960449219, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.02929893136024475, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.02929893136024475, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.46413194191053053, |
|
"grad_norm": 0.024658203125, |
|
"learning_rate": 3.2380474318900766e-06, |
|
"logits/chosen": -2.4165406227111816, |
|
"logits/rejected": -2.4165406227111816, |
|
"logps/chosen": -310.68511962890625, |
|
"logps/rejected": -310.68511962890625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03376628831028938, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03376628831028938, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.4670510107275779, |
|
"grad_norm": 0.016845703125, |
|
"learning_rate": 3.2136619224151533e-06, |
|
"logits/chosen": -2.4508678913116455, |
|
"logits/rejected": -2.4508678913116455, |
|
"logps/chosen": -327.84619140625, |
|
"logps/rejected": -327.84619140625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03426826745271683, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03426826745271683, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4670510107275779, |
|
"eval_logits/chosen": -2.3953943252563477, |
|
"eval_logits/rejected": -2.3953943252563477, |
|
"eval_logps/chosen": -309.15863037109375, |
|
"eval_logps/rejected": -309.15863037109375, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.026800233870744705, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.026800233870744705, |
|
"eval_runtime": 2666.9806, |
|
"eval_samples_per_second": 2.283, |
|
"eval_steps_per_second": 0.286, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4699700795446253, |
|
"grad_norm": 0.014892578125, |
|
"learning_rate": 3.1892022608766215e-06, |
|
"logits/chosen": -2.361971378326416, |
|
"logits/rejected": -2.361971378326416, |
|
"logps/chosen": -299.3944396972656, |
|
"logps/rejected": -299.3944396972656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0262086633592844, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0262086633592844, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.4728891483616726, |
|
"grad_norm": 0.01422119140625, |
|
"learning_rate": 3.16467098872208e-06, |
|
"logits/chosen": -2.4706971645355225, |
|
"logits/rejected": -2.4706971645355225, |
|
"logps/chosen": -332.5861511230469, |
|
"logps/rejected": -332.5861511230469, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.034811943769454956, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.034811943769454956, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.47580821717871996, |
|
"grad_norm": 0.032470703125, |
|
"learning_rate": 3.140070654839728e-06, |
|
"logits/chosen": -2.4026148319244385, |
|
"logits/rejected": -2.4026148319244385, |
|
"logps/chosen": -296.76605224609375, |
|
"logps/rejected": -296.76605224609375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.02368254028260708, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.02368254028260708, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.47872728599576736, |
|
"grad_norm": 0.0242919921875, |
|
"learning_rate": 3.115403815293532e-06, |
|
"logits/chosen": -2.43617582321167, |
|
"logits/rejected": -2.43617582321167, |
|
"logps/chosen": -342.2427062988281, |
|
"logps/rejected": -342.2427062988281, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.036105576902627945, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.036105576902627945, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.4816463548128147, |
|
"grad_norm": 0.0113525390625, |
|
"learning_rate": 3.0906730330576345e-06, |
|
"logits/chosen": -2.4739155769348145, |
|
"logits/rejected": -2.4739155769348145, |
|
"logps/chosen": -332.26678466796875, |
|
"logps/rejected": -332.26678466796875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.028261488303542137, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.028261488303542137, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.48456542362986205, |
|
"grad_norm": 0.017333984375, |
|
"learning_rate": 3.065880877750059e-06, |
|
"logits/chosen": -2.427436351776123, |
|
"logits/rejected": -2.427436351776123, |
|
"logps/chosen": -304.4495544433594, |
|
"logps/rejected": -304.4495544433594, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03417867794632912, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03417867794632912, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.48748449244690945, |
|
"grad_norm": 0.01226806640625, |
|
"learning_rate": 3.041029925365711e-06, |
|
"logits/chosen": -2.4058425426483154, |
|
"logits/rejected": -2.4058425426483154, |
|
"logps/chosen": -308.30072021484375, |
|
"logps/rejected": -308.30072021484375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.035630594938993454, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.035630594938993454, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.4904035612639568, |
|
"grad_norm": 0.0126953125, |
|
"learning_rate": 3.0161227580087282e-06, |
|
"logits/chosen": -2.433281421661377, |
|
"logits/rejected": -2.433281421661377, |
|
"logps/chosen": -342.0614013671875, |
|
"logps/rejected": -342.0614013671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03289630264043808, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03289630264043808, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.49332263008100413, |
|
"grad_norm": 0.0123291015625, |
|
"learning_rate": 2.9911619636241862e-06, |
|
"logits/chosen": -2.4333884716033936, |
|
"logits/rejected": -2.4333884716033936, |
|
"logps/chosen": -322.1616516113281, |
|
"logps/rejected": -322.1616516113281, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.034327663481235504, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.034327663481235504, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.49624169889805153, |
|
"grad_norm": 0.01275634765625, |
|
"learning_rate": 2.966150135729203e-06, |
|
"logits/chosen": -2.38623046875, |
|
"logits/rejected": -2.38623046875, |
|
"logps/chosen": -335.8984680175781, |
|
"logps/rejected": -335.8984680175781, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03050742670893669, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03050742670893669, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.49624169889805153, |
|
"eval_logits/chosen": -2.3913044929504395, |
|
"eval_logits/rejected": -2.3913044929504395, |
|
"eval_logps/chosen": -309.405517578125, |
|
"eval_logps/rejected": -309.405517578125, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.029269486665725708, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.029269486665725708, |
|
"eval_runtime": 2669.612, |
|
"eval_samples_per_second": 2.281, |
|
"eval_steps_per_second": 0.285, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4991607677150989, |
|
"grad_norm": 0.01324462890625, |
|
"learning_rate": 2.9410898731434667e-06, |
|
"logits/chosen": -2.41214919090271, |
|
"logits/rejected": -2.41214919090271, |
|
"logps/chosen": -302.40887451171875, |
|
"logps/rejected": -302.40887451171875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.028926188126206398, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.028926188126206398, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.5020798365321463, |
|
"grad_norm": 0.0152587890625, |
|
"learning_rate": 2.9159837797192003e-06, |
|
"logits/chosen": -2.415527820587158, |
|
"logits/rejected": -2.415527820587158, |
|
"logps/chosen": -329.7999267578125, |
|
"logps/rejected": -329.7999267578125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03768650442361832, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03768650442361832, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.5049989053491936, |
|
"grad_norm": 0.014404296875, |
|
"learning_rate": 2.890834464070623e-06, |
|
"logits/chosen": -2.4205574989318848, |
|
"logits/rejected": -2.4205574989318848, |
|
"logps/chosen": -309.94329833984375, |
|
"logps/rejected": -309.94329833984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03702525794506073, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03702525794506073, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.507917974166241, |
|
"grad_norm": 0.013671875, |
|
"learning_rate": 2.865644539302896e-06, |
|
"logits/chosen": -2.389092206954956, |
|
"logits/rejected": -2.389092206954956, |
|
"logps/chosen": -339.6660461425781, |
|
"logps/rejected": -339.6660461425781, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.029835382476449013, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.029835382476449013, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.5108370429832884, |
|
"grad_norm": 0.01300048828125, |
|
"learning_rate": 2.840416622740617e-06, |
|
"logits/chosen": -2.444392681121826, |
|
"logits/rejected": -2.444392681121826, |
|
"logps/chosen": -318.47296142578125, |
|
"logps/rejected": -318.47296142578125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03087993524968624, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03087993524968624, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.5137561118003356, |
|
"grad_norm": 0.01263427734375, |
|
"learning_rate": 2.8151533356558673e-06, |
|
"logits/chosen": -2.4179341793060303, |
|
"logits/rejected": -2.4179341793060303, |
|
"logps/chosen": -295.8548889160156, |
|
"logps/rejected": -295.8548889160156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.032246123999357224, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.032246123999357224, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.516675180617383, |
|
"grad_norm": 0.014892578125, |
|
"learning_rate": 2.7898573029958563e-06, |
|
"logits/chosen": -2.377382516860962, |
|
"logits/rejected": -2.377382516860962, |
|
"logps/chosen": -305.41656494140625, |
|
"logps/rejected": -305.41656494140625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03165289759635925, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03165289759635925, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.5195942494344304, |
|
"grad_norm": 0.0103759765625, |
|
"learning_rate": 2.7645311531101763e-06, |
|
"logits/chosen": -2.412802219390869, |
|
"logits/rejected": -2.412802219390869, |
|
"logps/chosen": -312.50067138671875, |
|
"logps/rejected": -312.50067138671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.034763775765895844, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.034763775765895844, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.5225133182514777, |
|
"grad_norm": 0.0135498046875, |
|
"learning_rate": 2.7391775174777084e-06, |
|
"logits/chosen": -2.419868230819702, |
|
"logits/rejected": -2.419868230819702, |
|
"logps/chosen": -310.26922607421875, |
|
"logps/rejected": -310.26922607421875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.035530101507902145, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.035530101507902145, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.5254323870685251, |
|
"grad_norm": 0.0167236328125, |
|
"learning_rate": 2.713799030433203e-06, |
|
"logits/chosen": -2.423767566680908, |
|
"logits/rejected": -2.423767566680908, |
|
"logps/chosen": -308.0718688964844, |
|
"logps/rejected": -308.0718688964844, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03834807127714157, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03834807127714157, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.5254323870685251, |
|
"eval_logits/chosen": -2.392709732055664, |
|
"eval_logits/rejected": -2.392709732055664, |
|
"eval_logps/chosen": -310.26434326171875, |
|
"eval_logps/rejected": -310.26434326171875, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.037857454270124435, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.037857454270124435, |
|
"eval_runtime": 2669.2359, |
|
"eval_samples_per_second": 2.282, |
|
"eval_steps_per_second": 0.285, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.5283514558855725, |
|
"grad_norm": 0.01373291015625, |
|
"learning_rate": 2.688398328893561e-06, |
|
"logits/chosen": -2.4216887950897217, |
|
"logits/rejected": -2.4216887950897217, |
|
"logps/chosen": -307.491455078125, |
|
"logps/rejected": -307.491455078125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03987189009785652, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03987189009785652, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.5312705247026198, |
|
"grad_norm": 0.013916015625, |
|
"learning_rate": 2.6629780520838526e-06, |
|
"logits/chosen": -2.389004945755005, |
|
"logits/rejected": -2.389004945755005, |
|
"logps/chosen": -314.912353515625, |
|
"logps/rejected": -314.912353515625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03697946295142174, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03697946295142174, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.5341895935196672, |
|
"grad_norm": 0.016845703125, |
|
"learning_rate": 2.637540841263088e-06, |
|
"logits/chosen": -2.4251251220703125, |
|
"logits/rejected": -2.4251251220703125, |
|
"logps/chosen": -309.82611083984375, |
|
"logps/rejected": -309.82611083984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.041506171226501465, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.041506171226501465, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.5371086623367146, |
|
"grad_norm": 0.0130615234375, |
|
"learning_rate": 2.6120893394497825e-06, |
|
"logits/chosen": -2.4095826148986816, |
|
"logits/rejected": -2.4095826148986816, |
|
"logps/chosen": -290.29876708984375, |
|
"logps/rejected": -290.29876708984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03885159641504288, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03885159641504288, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.5400277311537619, |
|
"grad_norm": 0.0203857421875, |
|
"learning_rate": 2.586626191147337e-06, |
|
"logits/chosen": -2.414461612701416, |
|
"logits/rejected": -2.414461612701416, |
|
"logps/chosen": -298.74444580078125, |
|
"logps/rejected": -298.74444580078125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.035465486347675323, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.035465486347675323, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.5429467999708093, |
|
"grad_norm": 0.0142822265625, |
|
"learning_rate": 2.5611540420692666e-06, |
|
"logits/chosen": -2.4189705848693848, |
|
"logits/rejected": -2.4189705848693848, |
|
"logps/chosen": -361.6686706542969, |
|
"logps/rejected": -361.6686706542969, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04054202139377594, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04054202139377594, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.5458658687878567, |
|
"grad_norm": 0.01446533203125, |
|
"learning_rate": 2.5356755388642973e-06, |
|
"logits/chosen": -2.4053876399993896, |
|
"logits/rejected": -2.4053876399993896, |
|
"logps/chosen": -290.9534606933594, |
|
"logps/rejected": -290.9534606933594, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.037081241607666016, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.037081241607666016, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.548784937604904, |
|
"grad_norm": 0.01611328125, |
|
"learning_rate": 2.510193328841375e-06, |
|
"logits/chosen": -2.4209909439086914, |
|
"logits/rejected": -2.4209909439086914, |
|
"logps/chosen": -304.0765075683594, |
|
"logps/rejected": -304.0765075683594, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03415703400969505, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03415703400969505, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.5517040064219514, |
|
"grad_norm": 0.0164794921875, |
|
"learning_rate": 2.484710059694594e-06, |
|
"logits/chosen": -2.4459662437438965, |
|
"logits/rejected": -2.4459662437438965, |
|
"logps/chosen": -274.7349548339844, |
|
"logps/rejected": -274.7349548339844, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03464942425489426, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03464942425489426, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.5546230752389988, |
|
"grad_norm": 0.01348876953125, |
|
"learning_rate": 2.4592283792280977e-06, |
|
"logits/chosen": -2.384141206741333, |
|
"logits/rejected": -2.384141206741333, |
|
"logps/chosen": -293.96533203125, |
|
"logps/rejected": -293.96533203125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04034542292356491, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04034542292356491, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5546230752389988, |
|
"eval_logits/chosen": -2.3927481174468994, |
|
"eval_logits/rejected": -2.3927481174468994, |
|
"eval_logps/chosen": -310.4163818359375, |
|
"eval_logps/rejected": -310.4163818359375, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.03937768191099167, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.03937768191099167, |
|
"eval_runtime": 2717.9911, |
|
"eval_samples_per_second": 2.241, |
|
"eval_steps_per_second": 0.28, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.5575421440560461, |
|
"grad_norm": 0.01123046875, |
|
"learning_rate": 2.433750935080959e-06, |
|
"logits/chosen": -2.438390016555786, |
|
"logits/rejected": -2.438390016555786, |
|
"logps/chosen": -282.78106689453125, |
|
"logps/rejected": -282.78106689453125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.05149908736348152, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.05149908736348152, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5604612128730935, |
|
"grad_norm": 0.011962890625, |
|
"learning_rate": 2.408280374452083e-06, |
|
"logits/chosen": -2.4534342288970947, |
|
"logits/rejected": -2.4534342288970947, |
|
"logps/chosen": -306.63946533203125, |
|
"logps/rejected": -306.63946533203125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04204695671796799, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04204695671796799, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.5633802816901409, |
|
"grad_norm": 0.01385498046875, |
|
"learning_rate": 2.3828193438251497e-06, |
|
"logits/chosen": -2.4302496910095215, |
|
"logits/rejected": -2.4302496910095215, |
|
"logps/chosen": -328.1105651855469, |
|
"logps/rejected": -328.1105651855469, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03839876502752304, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03839876502752304, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.5662993505071882, |
|
"grad_norm": 0.01513671875, |
|
"learning_rate": 2.3573704886936414e-06, |
|
"logits/chosen": -2.4566609859466553, |
|
"logits/rejected": -2.4566609859466553, |
|
"logps/chosen": -314.76910400390625, |
|
"logps/rejected": -314.76910400390625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04071163386106491, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04071163386106491, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.5692184193242356, |
|
"grad_norm": 0.01397705078125, |
|
"learning_rate": 2.331936453285957e-06, |
|
"logits/chosen": -2.414055109024048, |
|
"logits/rejected": -2.414055109024048, |
|
"logps/chosen": -346.7576904296875, |
|
"logps/rejected": -346.7576904296875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.036326270550489426, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.036326270550489426, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.572137488141283, |
|
"grad_norm": 0.0157470703125, |
|
"learning_rate": 2.3065198802906767e-06, |
|
"logits/chosen": -2.4286112785339355, |
|
"logits/rejected": -2.4286112785339355, |
|
"logps/chosen": -339.60064697265625, |
|
"logps/rejected": -339.60064697265625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04548191279172897, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04548191279172897, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.5750565569583302, |
|
"grad_norm": 0.01141357421875, |
|
"learning_rate": 2.2811234105819714e-06, |
|
"logits/chosen": -2.4342637062072754, |
|
"logits/rejected": -2.4342637062072754, |
|
"logps/chosen": -314.4915771484375, |
|
"logps/rejected": -314.4915771484375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03697306662797928, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03697306662797928, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.5779756257753776, |
|
"grad_norm": 0.01495361328125, |
|
"learning_rate": 2.2557496829452056e-06, |
|
"logits/chosen": -2.387324810028076, |
|
"logits/rejected": -2.387324810028076, |
|
"logps/chosen": -349.37835693359375, |
|
"logps/rejected": -349.37835693359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04250973090529442, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04250973090529442, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.580894694592425, |
|
"grad_norm": 0.0152587890625, |
|
"learning_rate": 2.230401333802763e-06, |
|
"logits/chosen": -2.412137985229492, |
|
"logits/rejected": -2.412137985229492, |
|
"logps/chosen": -310.9895324707031, |
|
"logps/rejected": -310.9895324707031, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.040226660668849945, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.040226660668849945, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.5838137634094723, |
|
"grad_norm": 0.01483154296875, |
|
"learning_rate": 2.205080996940108e-06, |
|
"logits/chosen": -2.4124810695648193, |
|
"logits/rejected": -2.4124810695648193, |
|
"logps/chosen": -273.5890197753906, |
|
"logps/rejected": -273.5890197753906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04197770729660988, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04197770729660988, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5838137634094723, |
|
"eval_logits/chosen": -2.392037868499756, |
|
"eval_logits/rejected": -2.392037868499756, |
|
"eval_logps/chosen": -310.4427185058594, |
|
"eval_logps/rejected": -310.4427185058594, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.039641354233026505, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.039641354233026505, |
|
"eval_runtime": 2711.551, |
|
"eval_samples_per_second": 2.246, |
|
"eval_steps_per_second": 0.281, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5867328322265197, |
|
"grad_norm": 0.01214599609375, |
|
"learning_rate": 2.1797913032321283e-06, |
|
"logits/chosen": -2.420572519302368, |
|
"logits/rejected": -2.420572519302368, |
|
"logps/chosen": -277.4279479980469, |
|
"logps/rejected": -277.4279479980469, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03539072722196579, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03539072722196579, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.5896519010435671, |
|
"grad_norm": 0.0157470703125, |
|
"learning_rate": 2.1545348803697745e-06, |
|
"logits/chosen": -2.4433321952819824, |
|
"logits/rejected": -2.4433321952819824, |
|
"logps/chosen": -281.5128479003906, |
|
"logps/rejected": -281.5128479003906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.016543438658118248, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.016543438658118248, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.5925709698606144, |
|
"grad_norm": 0.015869140625, |
|
"learning_rate": 2.1293143525870396e-06, |
|
"logits/chosen": -2.435228109359741, |
|
"logits/rejected": -2.435228109359741, |
|
"logps/chosen": -315.1198425292969, |
|
"logps/rejected": -315.1198425292969, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04324204847216606, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04324204847216606, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.5954900386776618, |
|
"grad_norm": 0.0133056640625, |
|
"learning_rate": 2.1041323403882836e-06, |
|
"logits/chosen": -2.458317995071411, |
|
"logits/rejected": -2.458317995071411, |
|
"logps/chosen": -314.63482666015625, |
|
"logps/rejected": -314.63482666015625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.039002105593681335, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.039002105593681335, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.5984091074947092, |
|
"grad_norm": 0.0164794921875, |
|
"learning_rate": 2.078991460275958e-06, |
|
"logits/chosen": -2.4496326446533203, |
|
"logits/rejected": -2.4496326446533203, |
|
"logps/chosen": -295.86199951171875, |
|
"logps/rejected": -295.86199951171875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03856682404875755, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03856682404875755, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.6013281763117565, |
|
"grad_norm": 0.01409912109375, |
|
"learning_rate": 2.0538943244787452e-06, |
|
"logits/chosen": -2.440256118774414, |
|
"logits/rejected": -2.440256118774414, |
|
"logps/chosen": -302.68463134765625, |
|
"logps/rejected": -302.68463134765625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.040991030633449554, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.040991030633449554, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.6042472451288039, |
|
"grad_norm": 0.01226806640625, |
|
"learning_rate": 2.0288435406801293e-06, |
|
"logits/chosen": -2.4207422733306885, |
|
"logits/rejected": -2.4207422733306885, |
|
"logps/chosen": -347.23297119140625, |
|
"logps/rejected": -347.23297119140625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03826383873820305, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03826383873820305, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.6071663139458513, |
|
"grad_norm": 0.01275634765625, |
|
"learning_rate": 2.0038417117474574e-06, |
|
"logits/chosen": -2.4277267456054688, |
|
"logits/rejected": -2.4277267456054688, |
|
"logps/chosen": -314.09674072265625, |
|
"logps/rejected": -314.09674072265625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.05563684552907944, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.05563684552907944, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.6100853827628986, |
|
"grad_norm": 0.01251220703125, |
|
"learning_rate": 1.9788914354614853e-06, |
|
"logits/chosen": -2.4430274963378906, |
|
"logits/rejected": -2.4430274963378906, |
|
"logps/chosen": -280.791015625, |
|
"logps/rejected": -280.791015625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.039162371307611465, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.039162371307611465, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.613004451579946, |
|
"grad_norm": 0.0159912109375, |
|
"learning_rate": 1.9539953042464656e-06, |
|
"logits/chosen": -2.4126973152160645, |
|
"logits/rejected": -2.4126973152160645, |
|
"logps/chosen": -341.8514709472656, |
|
"logps/rejected": -341.8514709472656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04444648697972298, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04444648697972298, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.613004451579946, |
|
"eval_logits/chosen": -2.390094041824341, |
|
"eval_logits/rejected": -2.390094041824341, |
|
"eval_logps/chosen": -310.71502685546875, |
|
"eval_logps/rejected": -310.71502685546875, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.042364299297332764, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.042364299297332764, |
|
"eval_runtime": 2698.9127, |
|
"eval_samples_per_second": 2.256, |
|
"eval_steps_per_second": 0.282, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.6159235203969934, |
|
"grad_norm": 0.0126953125, |
|
"learning_rate": 1.929155904900778e-06, |
|
"logits/chosen": -2.442920207977295, |
|
"logits/rejected": -2.442920207977295, |
|
"logps/chosen": -336.13153076171875, |
|
"logps/rejected": -336.13153076171875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04605261981487274, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04605261981487274, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.6188425892140407, |
|
"grad_norm": 0.0128173828125, |
|
"learning_rate": 1.9043758183281548e-06, |
|
"logits/chosen": -2.398139476776123, |
|
"logits/rejected": -2.398139476776123, |
|
"logps/chosen": -297.93353271484375, |
|
"logps/rejected": -297.93353271484375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03661734238266945, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03661734238266945, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.6217616580310881, |
|
"grad_norm": 0.0162353515625, |
|
"learning_rate": 1.8796576192695198e-06, |
|
"logits/chosen": -2.4115586280822754, |
|
"logits/rejected": -2.4115586280822754, |
|
"logps/chosen": -283.5032653808594, |
|
"logps/rejected": -283.5032653808594, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.05024952441453934, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.05024952441453934, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.6246807268481355, |
|
"grad_norm": 0.01611328125, |
|
"learning_rate": 1.8550038760354559e-06, |
|
"logits/chosen": -2.4140570163726807, |
|
"logits/rejected": -2.4140570163726807, |
|
"logps/chosen": -328.29241943359375, |
|
"logps/rejected": -328.29241943359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03783569857478142, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03783569857478142, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.6275997956651828, |
|
"grad_norm": 0.01470947265625, |
|
"learning_rate": 1.8304171502393542e-06, |
|
"logits/chosen": -2.4498252868652344, |
|
"logits/rejected": -2.4498252868652344, |
|
"logps/chosen": -333.46807861328125, |
|
"logps/rejected": -333.46807861328125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.05009561777114868, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.05009561777114868, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.6305188644822302, |
|
"grad_norm": 0.0198974609375, |
|
"learning_rate": 1.8058999965312484e-06, |
|
"logits/chosen": -2.3965957164764404, |
|
"logits/rejected": -2.3965957164764404, |
|
"logps/chosen": -306.3211669921875, |
|
"logps/rejected": -306.3211669921875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04475449398159981, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04475449398159981, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.6334379332992776, |
|
"grad_norm": 0.016357421875, |
|
"learning_rate": 1.7814549623323828e-06, |
|
"logits/chosen": -2.400684356689453, |
|
"logits/rejected": -2.400684356689453, |
|
"logps/chosen": -286.625, |
|
"logps/rejected": -286.625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.043368883430957794, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.043368883430957794, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.6363570021163248, |
|
"grad_norm": 0.01531982421875, |
|
"learning_rate": 1.7570845875705205e-06, |
|
"logits/chosen": -2.4366753101348877, |
|
"logits/rejected": -2.4366753101348877, |
|
"logps/chosen": -338.27679443359375, |
|
"logps/rejected": -338.27679443359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.05562018230557442, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.05562018230557442, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.6392760709333722, |
|
"grad_norm": 0.0162353515625, |
|
"learning_rate": 1.7327914044160388e-06, |
|
"logits/chosen": -2.449612617492676, |
|
"logits/rejected": -2.449612617492676, |
|
"logps/chosen": -316.91766357421875, |
|
"logps/rejected": -316.91766357421875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04444233328104019, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04444233328104019, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.6421951397504196, |
|
"grad_norm": 0.0145263671875, |
|
"learning_rate": 1.7085779370188276e-06, |
|
"logits/chosen": -2.3980746269226074, |
|
"logits/rejected": -2.3980746269226074, |
|
"logps/chosen": -308.85906982421875, |
|
"logps/rejected": -308.85906982421875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0481327660381794, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0481327660381794, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.6421951397504196, |
|
"eval_logits/chosen": -2.3910679817199707, |
|
"eval_logits/rejected": -2.3910679817199707, |
|
"eval_logps/chosen": -311.0310363769531, |
|
"eval_logps/rejected": -311.0310363769531, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.04552413523197174, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.04552413523197174, |
|
"eval_runtime": 2707.3295, |
|
"eval_samples_per_second": 2.249, |
|
"eval_steps_per_second": 0.281, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.6451142085674669, |
|
"grad_norm": 0.016845703125, |
|
"learning_rate": 1.6844467012460193e-06, |
|
"logits/chosen": -2.429086446762085, |
|
"logits/rejected": -2.429086446762085, |
|
"logps/chosen": -306.8155822753906, |
|
"logps/rejected": -306.8155822753906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04282836988568306, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04282836988568306, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.6480332773845143, |
|
"grad_norm": 0.014404296875, |
|
"learning_rate": 1.6604002044205825e-06, |
|
"logits/chosen": -2.4325811862945557, |
|
"logits/rejected": -2.4325811862945557, |
|
"logps/chosen": -337.0578308105469, |
|
"logps/rejected": -337.0578308105469, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04457175359129906, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04457175359129906, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.6509523462015617, |
|
"grad_norm": 0.01397705078125, |
|
"learning_rate": 1.6364409450608018e-06, |
|
"logits/chosen": -2.4428985118865967, |
|
"logits/rejected": -2.4428985118865967, |
|
"logps/chosen": -308.55657958984375, |
|
"logps/rejected": -308.55657958984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04731472209095955, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04731472209095955, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.653871415018609, |
|
"grad_norm": 0.013427734375, |
|
"learning_rate": 1.6125714126206736e-06, |
|
"logits/chosen": -2.4196009635925293, |
|
"logits/rejected": -2.4196009635925293, |
|
"logps/chosen": -348.8056335449219, |
|
"logps/rejected": -348.8056335449219, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.049455009400844574, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.049455009400844574, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.6567904838356564, |
|
"grad_norm": 0.01556396484375, |
|
"learning_rate": 1.5887940872312391e-06, |
|
"logits/chosen": -2.4100897312164307, |
|
"logits/rejected": -2.4100897312164307, |
|
"logps/chosen": -320.3233642578125, |
|
"logps/rejected": -320.3233642578125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04676957428455353, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04676957428455353, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.6597095526527038, |
|
"grad_norm": 0.0147705078125, |
|
"learning_rate": 1.5651114394428955e-06, |
|
"logits/chosen": -2.4624266624450684, |
|
"logits/rejected": -2.4624266624450684, |
|
"logps/chosen": -344.6718444824219, |
|
"logps/rejected": -344.6718444824219, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0538489893078804, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0538489893078804, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.6626286214697511, |
|
"grad_norm": 0.01251220703125, |
|
"learning_rate": 1.5415259299686903e-06, |
|
"logits/chosen": -2.4147191047668457, |
|
"logits/rejected": -2.4147191047668457, |
|
"logps/chosen": -316.6529235839844, |
|
"logps/rejected": -316.6529235839844, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.041484713554382324, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.041484713554382324, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.6655476902867985, |
|
"grad_norm": 0.01348876953125, |
|
"learning_rate": 1.5180400094286496e-06, |
|
"logits/chosen": -2.440053939819336, |
|
"logits/rejected": -2.440053939819336, |
|
"logps/chosen": -309.5370178222656, |
|
"logps/rejected": -309.5370178222656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04570756107568741, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04570756107568741, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.6684667591038459, |
|
"grad_norm": 0.017822265625, |
|
"learning_rate": 1.494656118095149e-06, |
|
"logits/chosen": -2.407764434814453, |
|
"logits/rejected": -2.407764434814453, |
|
"logps/chosen": -320.51263427734375, |
|
"logps/rejected": -320.51263427734375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04706931859254837, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04706931859254837, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.6713858279208932, |
|
"grad_norm": 0.0120849609375, |
|
"learning_rate": 1.4713766856393557e-06, |
|
"logits/chosen": -2.420919895172119, |
|
"logits/rejected": -2.420919895172119, |
|
"logps/chosen": -295.04547119140625, |
|
"logps/rejected": -295.04547119140625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.05071335285902023, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.05071335285902023, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6713858279208932, |
|
"eval_logits/chosen": -2.391244411468506, |
|
"eval_logits/rejected": -2.391244411468506, |
|
"eval_logps/chosen": -310.7880554199219, |
|
"eval_logps/rejected": -310.7880554199219, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.04309455305337906, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.04309455305337906, |
|
"eval_runtime": 2670.058, |
|
"eval_samples_per_second": 2.281, |
|
"eval_steps_per_second": 0.285, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6743048967379406, |
|
"grad_norm": 0.0198974609375, |
|
"learning_rate": 1.448204130878785e-06, |
|
"logits/chosen": -2.3968968391418457, |
|
"logits/rejected": -2.3968968391418457, |
|
"logps/chosen": -287.2406005859375, |
|
"logps/rejected": -287.2406005859375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04974811524152756, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04974811524152756, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.677223965554988, |
|
"grad_norm": 0.013916015625, |
|
"learning_rate": 1.425140861525967e-06, |
|
"logits/chosen": -2.407982587814331, |
|
"logits/rejected": -2.407982587814331, |
|
"logps/chosen": -346.8302307128906, |
|
"logps/rejected": -346.8302307128906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.045040082186460495, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.045040082186460495, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.6801430343720353, |
|
"grad_norm": 0.01531982421875, |
|
"learning_rate": 1.4021892739382853e-06, |
|
"logits/chosen": -2.4366557598114014, |
|
"logits/rejected": -2.4366557598114014, |
|
"logps/chosen": -315.5507507324219, |
|
"logps/rejected": -315.5507507324219, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.053034014999866486, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.053034014999866486, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.6830621031890827, |
|
"grad_norm": 0.013916015625, |
|
"learning_rate": 1.3793517528689804e-06, |
|
"logits/chosen": -2.40993070602417, |
|
"logits/rejected": -2.40993070602417, |
|
"logps/chosen": -322.5754699707031, |
|
"logps/rejected": -322.5754699707031, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04859765246510506, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04859765246510506, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.6859811720061301, |
|
"grad_norm": 0.0167236328125, |
|
"learning_rate": 1.3566306712193704e-06, |
|
"logits/chosen": -2.4204134941101074, |
|
"logits/rejected": -2.4204134941101074, |
|
"logps/chosen": -349.4993896484375, |
|
"logps/rejected": -349.4993896484375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.05103806406259537, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.05103806406259537, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.6889002408231774, |
|
"grad_norm": 0.01531982421875, |
|
"learning_rate": 1.3340283897922911e-06, |
|
"logits/chosen": -2.4295237064361572, |
|
"logits/rejected": -2.4295237064361572, |
|
"logps/chosen": -330.99005126953125, |
|
"logps/rejected": -330.99005126953125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04879484325647354, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04879484325647354, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.6918193096402248, |
|
"grad_norm": 0.0146484375, |
|
"learning_rate": 1.3115472570468058e-06, |
|
"logits/chosen": -2.4285712242126465, |
|
"logits/rejected": -2.4285712242126465, |
|
"logps/chosen": -336.67364501953125, |
|
"logps/rejected": -336.67364501953125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04440216347575188, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04440216347575188, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.6947383784572722, |
|
"grad_norm": 0.0162353515625, |
|
"learning_rate": 1.2891896088541928e-06, |
|
"logits/chosen": -2.405956745147705, |
|
"logits/rejected": -2.405956745147705, |
|
"logps/chosen": -338.88739013671875, |
|
"logps/rejected": -338.88739013671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.047105275094509125, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.047105275094509125, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.6976574472743194, |
|
"grad_norm": 0.0169677734375, |
|
"learning_rate": 1.266957768255232e-06, |
|
"logits/chosen": -2.422194719314575, |
|
"logits/rejected": -2.422194719314575, |
|
"logps/chosen": -318.286865234375, |
|
"logps/rejected": -318.286865234375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04666005074977875, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04666005074977875, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.7005765160913668, |
|
"grad_norm": 0.0142822265625, |
|
"learning_rate": 1.2448540452188432e-06, |
|
"logits/chosen": -2.3955206871032715, |
|
"logits/rejected": -2.3955206871032715, |
|
"logps/chosen": -314.3586120605469, |
|
"logps/rejected": -314.3586120605469, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04044215753674507, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04044215753674507, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.7005765160913668, |
|
"eval_logits/chosen": -2.3899266719818115, |
|
"eval_logits/rejected": -2.3899266719818115, |
|
"eval_logps/chosen": -310.6455078125, |
|
"eval_logps/rejected": -310.6455078125, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.04166920483112335, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.04166920483112335, |
|
"eval_runtime": 2668.7744, |
|
"eval_samples_per_second": 2.282, |
|
"eval_steps_per_second": 0.286, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.7034955849084142, |
|
"grad_norm": 0.01446533203125, |
|
"learning_rate": 1.2228807364020617e-06, |
|
"logits/chosen": -2.4090027809143066, |
|
"logits/rejected": -2.4090027809143066, |
|
"logps/chosen": -268.48944091796875, |
|
"logps/rejected": -268.48944091796875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.037643421441316605, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.037643421441316605, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.7064146537254615, |
|
"grad_norm": 0.012451171875, |
|
"learning_rate": 1.2010401249114166e-06, |
|
"logits/chosen": -2.4060184955596924, |
|
"logits/rejected": -2.4060184955596924, |
|
"logps/chosen": -338.2677001953125, |
|
"logps/rejected": -338.2677001953125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.035085879266262054, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.035085879266262054, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.7093337225425089, |
|
"grad_norm": 0.0206298828125, |
|
"learning_rate": 1.1793344800656995e-06, |
|
"logits/chosen": -2.3857572078704834, |
|
"logits/rejected": -2.3857572078704834, |
|
"logps/chosen": -325.4837646484375, |
|
"logps/rejected": -325.4837646484375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03704181686043739, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03704181686043739, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.7122527913595563, |
|
"grad_norm": 0.01544189453125, |
|
"learning_rate": 1.1577660571601796e-06, |
|
"logits/chosen": -2.396127223968506, |
|
"logits/rejected": -2.396127223968506, |
|
"logps/chosen": -321.38897705078125, |
|
"logps/rejected": -321.38897705078125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0452260822057724, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0452260822057724, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.7151718601766036, |
|
"grad_norm": 0.0137939453125, |
|
"learning_rate": 1.1363370972322694e-06, |
|
"logits/chosen": -2.4177489280700684, |
|
"logits/rejected": -2.4177489280700684, |
|
"logps/chosen": -296.6512756347656, |
|
"logps/rejected": -296.6512756347656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04763947054743767, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04763947054743767, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.718090928993651, |
|
"grad_norm": 0.0142822265625, |
|
"learning_rate": 1.115049826828669e-06, |
|
"logits/chosen": -2.4321625232696533, |
|
"logits/rejected": -2.4321625232696533, |
|
"logps/chosen": -306.14141845703125, |
|
"logps/rejected": -306.14141845703125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04333222657442093, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04333222657442093, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.7210099978106984, |
|
"grad_norm": 0.01483154296875, |
|
"learning_rate": 1.0939064577740266e-06, |
|
"logits/chosen": -2.4054694175720215, |
|
"logits/rejected": -2.4054694175720215, |
|
"logps/chosen": -301.36334228515625, |
|
"logps/rejected": -301.36334228515625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.041240572929382324, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.041240572929382324, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.7239290666277457, |
|
"grad_norm": 0.0159912109375, |
|
"learning_rate": 1.0729091869411137e-06, |
|
"logits/chosen": -2.4020252227783203, |
|
"logits/rejected": -2.4020252227783203, |
|
"logps/chosen": -332.1387023925781, |
|
"logps/rejected": -332.1387023925781, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.043921731412410736, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.043921731412410736, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.7268481354447931, |
|
"grad_norm": 0.013427734375, |
|
"learning_rate": 1.0520601960225708e-06, |
|
"logits/chosen": -2.421534299850464, |
|
"logits/rejected": -2.421534299850464, |
|
"logps/chosen": -314.00311279296875, |
|
"logps/rejected": -314.00311279296875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.01183334831148386, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.01183334831148386, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.7297672042618405, |
|
"grad_norm": 0.020751953125, |
|
"learning_rate": 1.0313616513042133e-06, |
|
"logits/chosen": -2.4747350215911865, |
|
"logits/rejected": -2.4747350215911865, |
|
"logps/chosen": -319.47918701171875, |
|
"logps/rejected": -319.47918701171875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.055976878851652145, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.055976878851652145, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.7297672042618405, |
|
"eval_logits/chosen": -2.3914709091186523, |
|
"eval_logits/rejected": -2.3914709091186523, |
|
"eval_logps/chosen": -310.819580078125, |
|
"eval_logps/rejected": -310.819580078125, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.04341000318527222, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.04341000318527222, |
|
"eval_runtime": 2669.3967, |
|
"eval_samples_per_second": 2.281, |
|
"eval_steps_per_second": 0.285, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.7326862730788878, |
|
"grad_norm": 0.0145263671875, |
|
"learning_rate": 1.0108157034399532e-06, |
|
"logits/chosen": -2.4052977561950684, |
|
"logits/rejected": -2.4052977561950684, |
|
"logps/chosen": -298.67474365234375, |
|
"logps/rejected": -298.67474365234375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04686864838004112, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04686864838004112, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.7356053418959352, |
|
"grad_norm": 0.0179443359375, |
|
"learning_rate": 9.90424487228334e-07, |
|
"logits/chosen": -2.411712646484375, |
|
"logits/rejected": -2.411712646484375, |
|
"logps/chosen": -322.70428466796875, |
|
"logps/rejected": -322.70428466796875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.043312918394804, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.043312918394804, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.7385244107129826, |
|
"grad_norm": 0.01611328125, |
|
"learning_rate": 9.701901213907192e-07, |
|
"logits/chosen": -2.4330382347106934, |
|
"logits/rejected": -2.4330382347106934, |
|
"logps/chosen": -324.5224609375, |
|
"logps/rejected": -324.5224609375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.05447854846715927, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.05447854846715927, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.7414434795300299, |
|
"grad_norm": 0.01416015625, |
|
"learning_rate": 9.501147083511511e-07, |
|
"logits/chosen": -2.45332407951355, |
|
"logits/rejected": -2.45332407951355, |
|
"logps/chosen": -321.7140808105469, |
|
"logps/rejected": -321.7140808105469, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0516216978430748, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0516216978430748, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.7443625483470773, |
|
"grad_norm": 0.015625, |
|
"learning_rate": 9.302003340178962e-07, |
|
"logits/chosen": -2.417236804962158, |
|
"logits/rejected": -2.417236804962158, |
|
"logps/chosen": -333.95574951171875, |
|
"logps/rejected": -333.95574951171875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0465201810002327, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0465201810002327, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.7472816171641247, |
|
"grad_norm": 0.01422119140625, |
|
"learning_rate": 9.10449067566718e-07, |
|
"logits/chosen": -2.459394931793213, |
|
"logits/rejected": -2.459394931793213, |
|
"logps/chosen": -303.9725646972656, |
|
"logps/rejected": -303.9725646972656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04736841470003128, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04736841470003128, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.750200685981172, |
|
"grad_norm": 0.01513671875, |
|
"learning_rate": 8.908629612258765e-07, |
|
"logits/chosen": -2.435121774673462, |
|
"logits/rejected": -2.435121774673462, |
|
"logps/chosen": -300.51055908203125, |
|
"logps/rejected": -300.51055908203125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04969844967126846, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04969844967126846, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.7531197547982194, |
|
"grad_norm": 0.0145263671875, |
|
"learning_rate": 8.714440500628999e-07, |
|
"logits/chosen": -2.393557071685791, |
|
"logits/rejected": -2.393557071685791, |
|
"logps/chosen": -305.946044921875, |
|
"logps/rejected": -305.946044921875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.035433102399110794, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.035433102399110794, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.7560388236152668, |
|
"grad_norm": 0.01385498046875, |
|
"learning_rate": 8.521943517731276e-07, |
|
"logits/chosen": -2.394944667816162, |
|
"logits/rejected": -2.394944667816162, |
|
"logps/chosen": -329.5417175292969, |
|
"logps/rejected": -329.5417175292969, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.029879886656999588, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.029879886656999588, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.758957892432314, |
|
"grad_norm": 0.01513671875, |
|
"learning_rate": 8.33115866470069e-07, |
|
"logits/chosen": -2.3986093997955322, |
|
"logits/rejected": -2.3986093997955322, |
|
"logps/chosen": -297.0606994628906, |
|
"logps/rejected": -297.0606994628906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04173046723008156, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04173046723008156, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.758957892432314, |
|
"eval_logits/chosen": -2.3918919563293457, |
|
"eval_logits/rejected": -2.3918919563293457, |
|
"eval_logps/chosen": -310.8546447753906, |
|
"eval_logps/rejected": -310.8546447753906, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.0437602661550045, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.0437602661550045, |
|
"eval_runtime": 2682.4018, |
|
"eval_samples_per_second": 2.27, |
|
"eval_steps_per_second": 0.284, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.7618769612493614, |
|
"grad_norm": 0.01544189453125, |
|
"learning_rate": 8.142105764775824e-07, |
|
"logits/chosen": -2.384005546569824, |
|
"logits/rejected": -2.384005546569824, |
|
"logps/chosen": -327.1615295410156, |
|
"logps/rejected": -327.1615295410156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.051810719072818756, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.051810719072818756, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.7647960300664088, |
|
"grad_norm": 0.01458740234375, |
|
"learning_rate": 7.954804461239054e-07, |
|
"logits/chosen": -2.444282054901123, |
|
"logits/rejected": -2.444282054901123, |
|
"logps/chosen": -314.5889587402344, |
|
"logps/rejected": -314.5889587402344, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04727676510810852, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04727676510810852, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.7677150988834561, |
|
"grad_norm": 0.016357421875, |
|
"learning_rate": 7.769274215375544e-07, |
|
"logits/chosen": -2.432978391647339, |
|
"logits/rejected": -2.432978391647339, |
|
"logps/chosen": -293.0484924316406, |
|
"logps/rejected": -293.0484924316406, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0413900688290596, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0413900688290596, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.7706341677005035, |
|
"grad_norm": 0.01446533203125, |
|
"learning_rate": 7.585534304451103e-07, |
|
"logits/chosen": -2.444913387298584, |
|
"logits/rejected": -2.444913387298584, |
|
"logps/chosen": -330.8976135253906, |
|
"logps/rejected": -330.8976135253906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.043237775564193726, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.043237775564193726, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.7735532365175509, |
|
"grad_norm": 0.01312255859375, |
|
"learning_rate": 7.403603819709288e-07, |
|
"logits/chosen": -2.4194247722625732, |
|
"logits/rejected": -2.4194247722625732, |
|
"logps/chosen": -302.08465576171875, |
|
"logps/rejected": -302.08465576171875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04694979637861252, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04694979637861252, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.7764723053345982, |
|
"grad_norm": 0.014404296875, |
|
"learning_rate": 7.223501664387664e-07, |
|
"logits/chosen": -2.440764904022217, |
|
"logits/rejected": -2.440764904022217, |
|
"logps/chosen": -280.7825622558594, |
|
"logps/rejected": -280.7825622558594, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.036083877086639404, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.036083877086639404, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.7793913741516456, |
|
"grad_norm": 0.01458740234375, |
|
"learning_rate": 7.045246551753779e-07, |
|
"logits/chosen": -2.4197888374328613, |
|
"logits/rejected": -2.4197888374328613, |
|
"logps/chosen": -323.67938232421875, |
|
"logps/rejected": -323.67938232421875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.043979812413454056, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.043979812413454056, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.782310442968693, |
|
"grad_norm": 0.0142822265625, |
|
"learning_rate": 6.868857003160709e-07, |
|
"logits/chosen": -2.470567226409912, |
|
"logits/rejected": -2.470567226409912, |
|
"logps/chosen": -356.6578369140625, |
|
"logps/rejected": -356.6578369140625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.05309978872537613, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.05309978872537613, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.7852295117857403, |
|
"grad_norm": 0.0150146484375, |
|
"learning_rate": 6.69435134612266e-07, |
|
"logits/chosen": -2.4125561714172363, |
|
"logits/rejected": -2.4125561714172363, |
|
"logps/chosen": -302.1919250488281, |
|
"logps/rejected": -302.1919250488281, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04638701677322388, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04638701677322388, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.7881485806027877, |
|
"grad_norm": 0.013427734375, |
|
"learning_rate": 6.521747712410687e-07, |
|
"logits/chosen": -2.431802988052368, |
|
"logits/rejected": -2.431802988052368, |
|
"logps/chosen": -319.6323547363281, |
|
"logps/rejected": -319.6323547363281, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04552530124783516, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04552530124783516, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7881485806027877, |
|
"eval_logits/chosen": -2.3916165828704834, |
|
"eval_logits/rejected": -2.3916165828704834, |
|
"eval_logps/chosen": -310.8406677246094, |
|
"eval_logps/rejected": -310.8406677246094, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.04362065717577934, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.04362065717577934, |
|
"eval_runtime": 2682.1268, |
|
"eval_samples_per_second": 2.271, |
|
"eval_steps_per_second": 0.284, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7910676494198351, |
|
"grad_norm": 0.0250244140625, |
|
"learning_rate": 6.351064036168708e-07, |
|
"logits/chosen": -2.4238877296447754, |
|
"logits/rejected": -2.4238877296447754, |
|
"logps/chosen": -338.21759033203125, |
|
"logps/rejected": -338.21759033203125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.048554692417383194, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.048554692417383194, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.7939867182368824, |
|
"grad_norm": 0.01416015625, |
|
"learning_rate": 6.182318052050102e-07, |
|
"logits/chosen": -2.398974895477295, |
|
"logits/rejected": -2.398974895477295, |
|
"logps/chosen": -329.53106689453125, |
|
"logps/rejected": -329.53106689453125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.05249527841806412, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.05249527841806412, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.7969057870539298, |
|
"grad_norm": 0.019287109375, |
|
"learning_rate": 6.015527293374979e-07, |
|
"logits/chosen": -2.4338581562042236, |
|
"logits/rejected": -2.4338581562042236, |
|
"logps/chosen": -334.1202087402344, |
|
"logps/rejected": -334.1202087402344, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04841463267803192, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04841463267803192, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.7998248558709772, |
|
"grad_norm": 0.014404296875, |
|
"learning_rate": 5.850709090308459e-07, |
|
"logits/chosen": -2.4255330562591553, |
|
"logits/rejected": -2.4255330562591553, |
|
"logps/chosen": -295.30523681640625, |
|
"logps/rejected": -295.30523681640625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0433431938290596, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0433431938290596, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.8027439246880245, |
|
"grad_norm": 0.0133056640625, |
|
"learning_rate": 5.687880568059961e-07, |
|
"logits/chosen": -2.3997416496276855, |
|
"logits/rejected": -2.3997416496276855, |
|
"logps/chosen": -314.76361083984375, |
|
"logps/rejected": -314.76361083984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04872073233127594, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04872073233127594, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.8056629935050719, |
|
"grad_norm": 0.01422119140625, |
|
"learning_rate": 5.527058645103842e-07, |
|
"logits/chosen": -2.3996376991271973, |
|
"logits/rejected": -2.3996376991271973, |
|
"logps/chosen": -376.6802673339844, |
|
"logps/rejected": -376.6802673339844, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0522351935505867, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0522351935505867, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.8085820623221193, |
|
"grad_norm": 0.0159912109375, |
|
"learning_rate": 5.368260031421526e-07, |
|
"logits/chosen": -2.4533755779266357, |
|
"logits/rejected": -2.4533755779266357, |
|
"logps/chosen": -338.7648010253906, |
|
"logps/rejected": -338.7648010253906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04105439782142639, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04105439782142639, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.8115011311391666, |
|
"grad_norm": 0.01263427734375, |
|
"learning_rate": 5.211501226765242e-07, |
|
"logits/chosen": -2.43373441696167, |
|
"logits/rejected": -2.43373441696167, |
|
"logps/chosen": -285.7012023925781, |
|
"logps/rejected": -285.7012023925781, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.02375207468867302, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.02375207468867302, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.814420199956214, |
|
"grad_norm": 0.0184326171875, |
|
"learning_rate": 5.056798518943678e-07, |
|
"logits/chosen": -2.4133718013763428, |
|
"logits/rejected": -2.4133718013763428, |
|
"logps/chosen": -315.09210205078125, |
|
"logps/rejected": -315.09210205078125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.053018081933259964, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.053018081933259964, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.8173392687732614, |
|
"grad_norm": 0.01397705078125, |
|
"learning_rate": 4.904167982129591e-07, |
|
"logits/chosen": -2.423839569091797, |
|
"logits/rejected": -2.423839569091797, |
|
"logps/chosen": -294.44683837890625, |
|
"logps/rejected": -294.44683837890625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04831403121352196, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04831403121352196, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.8173392687732614, |
|
"eval_logits/chosen": -2.3914895057678223, |
|
"eval_logits/rejected": -2.3914895057678223, |
|
"eval_logps/chosen": -310.798095703125, |
|
"eval_logps/rejected": -310.798095703125, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.04319505766034126, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.04319505766034126, |
|
"eval_runtime": 2682.0962, |
|
"eval_samples_per_second": 2.271, |
|
"eval_steps_per_second": 0.284, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.8202583375903086, |
|
"grad_norm": 0.0126953125, |
|
"learning_rate": 4.7536254751896493e-07, |
|
"logits/chosen": -2.4333229064941406, |
|
"logits/rejected": -2.4333229064941406, |
|
"logps/chosen": -315.96234130859375, |
|
"logps/rejected": -315.96234130859375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.05122748017311096, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.05122748017311096, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.823177406407356, |
|
"grad_norm": 0.0167236328125, |
|
"learning_rate": 4.6051866400366354e-07, |
|
"logits/chosen": -2.4289793968200684, |
|
"logits/rejected": -2.4289793968200684, |
|
"logps/chosen": -344.29608154296875, |
|
"logps/rejected": -344.29608154296875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04764155298471451, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04764155298471451, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.8260964752244034, |
|
"grad_norm": 0.0166015625, |
|
"learning_rate": 4.4588669000042133e-07, |
|
"logits/chosen": -2.4046084880828857, |
|
"logits/rejected": -2.4046084880828857, |
|
"logps/chosen": -325.74957275390625, |
|
"logps/rejected": -325.74957275390625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.035486068576574326, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.035486068576574326, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.8290155440414507, |
|
"grad_norm": 0.016845703125, |
|
"learning_rate": 4.3146814582443605e-07, |
|
"logits/chosen": -2.418729066848755, |
|
"logits/rejected": -2.418729066848755, |
|
"logps/chosen": -327.8818359375, |
|
"logps/rejected": -327.8818359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.046850480139255524, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.046850480139255524, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.8319346128584981, |
|
"grad_norm": 0.0135498046875, |
|
"learning_rate": 4.1726452961477147e-07, |
|
"logits/chosen": -2.416329860687256, |
|
"logits/rejected": -2.416329860687256, |
|
"logps/chosen": -319.5370178222656, |
|
"logps/rejected": -319.5370178222656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0506584569811821, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0506584569811821, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.8348536816755455, |
|
"grad_norm": 0.0146484375, |
|
"learning_rate": 4.0327731717869775e-07, |
|
"logits/chosen": -2.4376559257507324, |
|
"logits/rejected": -2.4376559257507324, |
|
"logps/chosen": -272.7819519042969, |
|
"logps/rejected": -272.7819519042969, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.05310596153140068, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.05310596153140068, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.8377727504925928, |
|
"grad_norm": 0.0191650390625, |
|
"learning_rate": 3.8950796183834516e-07, |
|
"logits/chosen": -2.4388468265533447, |
|
"logits/rejected": -2.4388468265533447, |
|
"logps/chosen": -345.3861389160156, |
|
"logps/rejected": -345.3861389160156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.05019260570406914, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.05019260570406914, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.8406918193096402, |
|
"grad_norm": 0.01495361328125, |
|
"learning_rate": 3.759578942797029e-07, |
|
"logits/chosen": -2.4550201892852783, |
|
"logits/rejected": -2.4550201892852783, |
|
"logps/chosen": -306.2907409667969, |
|
"logps/rejected": -306.2907409667969, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.046652454882860184, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.046652454882860184, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.8436108881266876, |
|
"grad_norm": 0.0126953125, |
|
"learning_rate": 3.6262852240396356e-07, |
|
"logits/chosen": -2.446690082550049, |
|
"logits/rejected": -2.446690082550049, |
|
"logps/chosen": -310.69708251953125, |
|
"logps/rejected": -310.69708251953125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04568660259246826, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04568660259246826, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.8465299569437349, |
|
"grad_norm": 0.014404296875, |
|
"learning_rate": 3.4952123118123735e-07, |
|
"logits/chosen": -2.402627468109131, |
|
"logits/rejected": -2.402627468109131, |
|
"logps/chosen": -312.1624755859375, |
|
"logps/rejected": -312.1624755859375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.045014895498752594, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.045014895498752594, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.8465299569437349, |
|
"eval_logits/chosen": -2.391954183578491, |
|
"eval_logits/rejected": -2.391954183578491, |
|
"eval_logps/chosen": -310.79425048828125, |
|
"eval_logps/rejected": -310.79425048828125, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.043156567960977554, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.043156567960977554, |
|
"eval_runtime": 2682.3759, |
|
"eval_samples_per_second": 2.27, |
|
"eval_steps_per_second": 0.284, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.8494490257607823, |
|
"grad_norm": 0.01470947265625, |
|
"learning_rate": 3.3663738250664853e-07, |
|
"logits/chosen": -2.416839122772217, |
|
"logits/rejected": -2.416839122772217, |
|
"logps/chosen": -342.91815185546875, |
|
"logps/rejected": -342.91815185546875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.051371246576309204, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.051371246576309204, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.8523680945778297, |
|
"grad_norm": 0.0159912109375, |
|
"learning_rate": 3.239783150588283e-07, |
|
"logits/chosen": -2.3476662635803223, |
|
"logits/rejected": -2.3476662635803223, |
|
"logps/chosen": -304.71368408203125, |
|
"logps/rejected": -304.71368408203125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.005339882802218199, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.005339882802218199, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.855287163394877, |
|
"grad_norm": 0.01409912109375, |
|
"learning_rate": 3.1154534416082573e-07, |
|
"logits/chosen": -2.416965961456299, |
|
"logits/rejected": -2.416965961456299, |
|
"logps/chosen": -299.3199157714844, |
|
"logps/rejected": -299.3199157714844, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04180007427930832, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04180007427930832, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.8582062322119244, |
|
"grad_norm": 0.01055908203125, |
|
"learning_rate": 2.9933976164343514e-07, |
|
"logits/chosen": -2.4285387992858887, |
|
"logits/rejected": -2.4285387992858887, |
|
"logps/chosen": -303.32183837890625, |
|
"logps/rejected": -303.32183837890625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04410778731107712, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04410778731107712, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.8611253010289718, |
|
"grad_norm": 0.0162353515625, |
|
"learning_rate": 2.873628357109745e-07, |
|
"logits/chosen": -2.4083211421966553, |
|
"logits/rejected": -2.4083211421966553, |
|
"logps/chosen": -326.7142028808594, |
|
"logps/rejected": -326.7142028808594, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.046554580330848694, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.046554580330848694, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.8640443698460191, |
|
"grad_norm": 0.01324462890625, |
|
"learning_rate": 2.7561581080951195e-07, |
|
"logits/chosen": -2.4226157665252686, |
|
"logits/rejected": -2.4226157665252686, |
|
"logps/chosen": -292.55767822265625, |
|
"logps/rejected": -292.55767822265625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04246639460325241, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04246639460325241, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.8669634386630665, |
|
"grad_norm": 0.01361083984375, |
|
"learning_rate": 2.640999074975645e-07, |
|
"logits/chosen": -2.43457293510437, |
|
"logits/rejected": -2.43457293510437, |
|
"logps/chosen": -298.2882385253906, |
|
"logps/rejected": -298.2882385253906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04169774800539017, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04169774800539017, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.8698825074801139, |
|
"grad_norm": 0.01708984375, |
|
"learning_rate": 2.5281632231927786e-07, |
|
"logits/chosen": -2.473017930984497, |
|
"logits/rejected": -2.473017930984497, |
|
"logps/chosen": -307.8494567871094, |
|
"logps/rejected": -307.8494567871094, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.045904386788606644, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.045904386788606644, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.8728015762971612, |
|
"grad_norm": 0.014404296875, |
|
"learning_rate": 2.417662276800997e-07, |
|
"logits/chosen": -2.4377925395965576, |
|
"logits/rejected": -2.4377925395965576, |
|
"logps/chosen": -329.8043518066406, |
|
"logps/rejected": -329.8043518066406, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.052566416561603546, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.052566416561603546, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.8757206451142086, |
|
"grad_norm": 0.01226806640625, |
|
"learning_rate": 2.30950771724964e-07, |
|
"logits/chosen": -2.4452061653137207, |
|
"logits/rejected": -2.4452061653137207, |
|
"logps/chosen": -316.7723388671875, |
|
"logps/rejected": -316.7723388671875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.048340607434511185, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.048340607434511185, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.8757206451142086, |
|
"eval_logits/chosen": -2.3918232917785645, |
|
"eval_logits/rejected": -2.3918232917785645, |
|
"eval_logps/chosen": -310.78662109375, |
|
"eval_logps/rejected": -310.78662109375, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.04308019578456879, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.04308019578456879, |
|
"eval_runtime": 2681.8561, |
|
"eval_samples_per_second": 2.271, |
|
"eval_steps_per_second": 0.284, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.878639713931256, |
|
"grad_norm": 0.017822265625, |
|
"learning_rate": 2.2037107821899272e-07, |
|
"logits/chosen": -2.414727210998535, |
|
"logits/rejected": -2.414727210998535, |
|
"logps/chosen": -343.22796630859375, |
|
"logps/rejected": -343.22796630859375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.047485075891017914, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.047485075891017914, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.8815587827483032, |
|
"grad_norm": 0.01708984375, |
|
"learning_rate": 2.100282464307357e-07, |
|
"logits/chosen": -2.4386258125305176, |
|
"logits/rejected": -2.4386258125305176, |
|
"logps/chosen": -305.25250244140625, |
|
"logps/rejected": -305.25250244140625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04185379669070244, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04185379669070244, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.8844778515653506, |
|
"grad_norm": 0.016357421875, |
|
"learning_rate": 1.999233510179488e-07, |
|
"logits/chosen": -2.4112370014190674, |
|
"logits/rejected": -2.4112370014190674, |
|
"logps/chosen": -339.65093994140625, |
|
"logps/rejected": -339.65093994140625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.044059764593839645, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.044059764593839645, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.887396920382398, |
|
"grad_norm": 0.012939453125, |
|
"learning_rate": 1.9005744191593678e-07, |
|
"logits/chosen": -2.4179887771606445, |
|
"logits/rejected": -2.4179887771606445, |
|
"logps/chosen": -297.5303649902344, |
|
"logps/rejected": -297.5303649902344, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03384246677160263, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03384246677160263, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.8903159891994453, |
|
"grad_norm": 0.0120849609375, |
|
"learning_rate": 1.8043154422845794e-07, |
|
"logits/chosen": -2.4646730422973633, |
|
"logits/rejected": -2.4646730422973633, |
|
"logps/chosen": -295.91790771484375, |
|
"logps/rejected": -295.91790771484375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04882458597421646, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04882458597421646, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.8932350580164927, |
|
"grad_norm": 0.0186767578125, |
|
"learning_rate": 1.7104665812121445e-07, |
|
"logits/chosen": -2.423285961151123, |
|
"logits/rejected": -2.423285961151123, |
|
"logps/chosen": -297.9593505859375, |
|
"logps/rejected": -297.9593505859375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04218859225511551, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04218859225511551, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.8961541268335401, |
|
"grad_norm": 0.0164794921875, |
|
"learning_rate": 1.619037587179309e-07, |
|
"logits/chosen": -2.3985249996185303, |
|
"logits/rejected": -2.3985249996185303, |
|
"logps/chosen": -332.85809326171875, |
|
"logps/rejected": -332.85809326171875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.048540227115154266, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.048540227115154266, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.8990731956505874, |
|
"grad_norm": 0.0172119140625, |
|
"learning_rate": 1.5300379599903408e-07, |
|
"logits/chosen": -2.4070308208465576, |
|
"logits/rejected": -2.4070308208465576, |
|
"logps/chosen": -310.7314147949219, |
|
"logps/rejected": -310.7314147949219, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03895800933241844, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03895800933241844, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.9019922644676348, |
|
"grad_norm": 0.013671875, |
|
"learning_rate": 1.44347694702949e-07, |
|
"logits/chosen": -2.3916313648223877, |
|
"logits/rejected": -2.3916313648223877, |
|
"logps/chosen": -288.28106689453125, |
|
"logps/rejected": -288.28106689453125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03293871134519577, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03293871134519577, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.9049113332846822, |
|
"grad_norm": 0.017822265625, |
|
"learning_rate": 1.359363542300124e-07, |
|
"logits/chosen": -2.4147801399230957, |
|
"logits/rejected": -2.4147801399230957, |
|
"logps/chosen": -295.56768798828125, |
|
"logps/rejected": -295.56768798828125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04364749416708946, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04364749416708946, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.9049113332846822, |
|
"eval_logits/chosen": -2.390821933746338, |
|
"eval_logits/rejected": -2.390821933746338, |
|
"eval_logps/chosen": -310.7793884277344, |
|
"eval_logps/rejected": -310.7793884277344, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.04300786182284355, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.04300786182284355, |
|
"eval_runtime": 2681.9583, |
|
"eval_samples_per_second": 2.271, |
|
"eval_steps_per_second": 0.284, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.9078304021017295, |
|
"grad_norm": 0.0152587890625, |
|
"learning_rate": 1.2777064854902487e-07, |
|
"logits/chosen": -2.44869065284729, |
|
"logits/rejected": -2.44869065284729, |
|
"logps/chosen": -324.82257080078125, |
|
"logps/rejected": -324.82257080078125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04247719421982765, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04247719421982765, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.9107494709187769, |
|
"grad_norm": 0.023681640625, |
|
"learning_rate": 1.1985142610643902e-07, |
|
"logits/chosen": -2.4080257415771484, |
|
"logits/rejected": -2.4080257415771484, |
|
"logps/chosen": -321.1974792480469, |
|
"logps/rejected": -321.1974792480469, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.05160089209675789, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.05160089209675789, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.9136685397358243, |
|
"grad_norm": 0.01275634765625, |
|
"learning_rate": 1.121795097382064e-07, |
|
"logits/chosen": -2.422560691833496, |
|
"logits/rejected": -2.422560691833496, |
|
"logps/chosen": -335.0086975097656, |
|
"logps/rejected": -335.0086975097656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04736005887389183, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04736005887389183, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.9165876085528716, |
|
"grad_norm": 0.0169677734375, |
|
"learning_rate": 1.0475569658427803e-07, |
|
"logits/chosen": -2.438781261444092, |
|
"logits/rejected": -2.438781261444092, |
|
"logps/chosen": -311.33868408203125, |
|
"logps/rejected": -311.33868408203125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03844516724348068, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03844516724348068, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.919506677369919, |
|
"grad_norm": 0.02001953125, |
|
"learning_rate": 9.758075800578193e-08, |
|
"logits/chosen": -2.4374260902404785, |
|
"logits/rejected": -2.4374260902404785, |
|
"logps/chosen": -300.9288635253906, |
|
"logps/rejected": -300.9288635253906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0434752032160759, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0434752032160759, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.9224257461869664, |
|
"grad_norm": 0.01544189453125, |
|
"learning_rate": 9.06554395048742e-08, |
|
"logits/chosen": -2.4104561805725098, |
|
"logits/rejected": -2.4104561805725098, |
|
"logps/chosen": -310.27789306640625, |
|
"logps/rejected": -310.27789306640625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04003220796585083, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04003220796585083, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.9253448150040137, |
|
"grad_norm": 0.01416015625, |
|
"learning_rate": 8.398046064727855e-08, |
|
"logits/chosen": -2.448122262954712, |
|
"logits/rejected": -2.448122262954712, |
|
"logps/chosen": -303.9940185546875, |
|
"logps/rejected": -303.9940185546875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04497329518198967, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04497329518198967, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.9282638838210611, |
|
"grad_norm": 0.0140380859375, |
|
"learning_rate": 7.755651498752265e-08, |
|
"logits/chosen": -2.4395852088928223, |
|
"logits/rejected": -2.4395852088928223, |
|
"logps/chosen": -292.140380859375, |
|
"logps/rejected": -292.140380859375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04522908851504326, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04522908851504326, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.9311829526381085, |
|
"grad_norm": 0.016357421875, |
|
"learning_rate": 7.138426999687171e-08, |
|
"logits/chosen": -2.4227964878082275, |
|
"logits/rejected": -2.4227964878082275, |
|
"logps/chosen": -333.205810546875, |
|
"logps/rejected": -333.205810546875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04331531375646591, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04331531375646591, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.9341020214551558, |
|
"grad_norm": 0.0177001953125, |
|
"learning_rate": 6.546436699398029e-08, |
|
"logits/chosen": -2.4100470542907715, |
|
"logits/rejected": -2.4100470542907715, |
|
"logps/chosen": -334.2508850097656, |
|
"logps/rejected": -334.2508850097656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.043238040059804916, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.043238040059804916, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.9341020214551558, |
|
"eval_logits/chosen": -2.391075849533081, |
|
"eval_logits/rejected": -2.391075849533081, |
|
"eval_logps/chosen": -310.7811584472656, |
|
"eval_logps/rejected": -310.7811584472656, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.043025679886341095, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.043025679886341095, |
|
"eval_runtime": 2682.606, |
|
"eval_samples_per_second": 2.27, |
|
"eval_steps_per_second": 0.284, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.9370210902722031, |
|
"grad_norm": 0.0150146484375, |
|
"learning_rate": 5.979742107825287e-08, |
|
"logits/chosen": -2.3894600868225098, |
|
"logits/rejected": -2.3894600868225098, |
|
"logps/chosen": -313.91131591796875, |
|
"logps/rejected": -313.91131591796875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04388252645730972, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04388252645730972, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.9399401590892505, |
|
"grad_norm": 0.01446533203125, |
|
"learning_rate": 5.4384021065936045e-08, |
|
"logits/chosen": -2.408024549484253, |
|
"logits/rejected": -2.408024549484253, |
|
"logps/chosen": -288.5419006347656, |
|
"logps/rejected": -288.5419006347656, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0435100793838501, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0435100793838501, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.9428592279062978, |
|
"grad_norm": 0.033447265625, |
|
"learning_rate": 4.9224729428935806e-08, |
|
"logits/chosen": -2.423318862915039, |
|
"logits/rejected": -2.423318862915039, |
|
"logps/chosen": -309.74176025390625, |
|
"logps/rejected": -309.74176025390625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04652589559555054, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04652589559555054, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.9457782967233452, |
|
"grad_norm": 0.011962890625, |
|
"learning_rate": 4.432008223637596e-08, |
|
"logits/chosen": -2.4209766387939453, |
|
"logits/rejected": -2.4209766387939453, |
|
"logps/chosen": -299.3330078125, |
|
"logps/rejected": -299.3330078125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04555036872625351, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04555036872625351, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.9486973655403926, |
|
"grad_norm": 0.01953125, |
|
"learning_rate": 3.967058909889937e-08, |
|
"logits/chosen": -2.397352457046509, |
|
"logits/rejected": -2.397352457046509, |
|
"logps/chosen": -313.8124694824219, |
|
"logps/rejected": -313.8124694824219, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03916158154606819, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03916158154606819, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.9516164343574399, |
|
"grad_norm": 0.014404296875, |
|
"learning_rate": 3.5276733115715556e-08, |
|
"logits/chosen": -2.448172092437744, |
|
"logits/rejected": -2.448172092437744, |
|
"logps/chosen": -305.734130859375, |
|
"logps/rejected": -305.734130859375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.050101179629564285, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.050101179629564285, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.9545355031744873, |
|
"grad_norm": 0.01397705078125, |
|
"learning_rate": 3.11389708244067e-08, |
|
"logits/chosen": -2.4387991428375244, |
|
"logits/rejected": -2.4387991428375244, |
|
"logps/chosen": -325.77374267578125, |
|
"logps/rejected": -325.77374267578125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04632676765322685, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04632676765322685, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.9574545719915347, |
|
"grad_norm": 0.0130615234375, |
|
"learning_rate": 2.7257732153490313e-08, |
|
"logits/chosen": -2.3997585773468018, |
|
"logits/rejected": -2.3997585773468018, |
|
"logps/chosen": -323.36962890625, |
|
"logps/rejected": -323.36962890625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04246259480714798, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04246259480714798, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.960373640808582, |
|
"grad_norm": 0.01226806640625, |
|
"learning_rate": 2.3633420377749684e-08, |
|
"logits/chosen": -2.404913902282715, |
|
"logits/rejected": -2.404913902282715, |
|
"logps/chosen": -309.89715576171875, |
|
"logps/rejected": -309.89715576171875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.05698453634977341, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.05698453634977341, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.9632927096256294, |
|
"grad_norm": 0.013671875, |
|
"learning_rate": 2.0266412076330457e-08, |
|
"logits/chosen": -2.431570529937744, |
|
"logits/rejected": -2.431570529937744, |
|
"logps/chosen": -297.8599548339844, |
|
"logps/rejected": -297.8599548339844, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.052738986909389496, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.052738986909389496, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.9632927096256294, |
|
"eval_logits/chosen": -2.3914639949798584, |
|
"eval_logits/rejected": -2.3914639949798584, |
|
"eval_logps/chosen": -310.7767333984375, |
|
"eval_logps/rejected": -310.7767333984375, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.0429811105132103, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.0429811105132103, |
|
"eval_runtime": 2682.6978, |
|
"eval_samples_per_second": 2.27, |
|
"eval_steps_per_second": 0.284, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.9662117784426768, |
|
"grad_norm": 0.01361083984375, |
|
"learning_rate": 1.7157057093614704e-08, |
|
"logits/chosen": -2.452519178390503, |
|
"logits/rejected": -2.452519178390503, |
|
"logps/chosen": -296.8190002441406, |
|
"logps/rejected": -296.8190002441406, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.041969865560531616, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.041969865560531616, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.9691308472597241, |
|
"grad_norm": 0.01422119140625, |
|
"learning_rate": 1.430567850286807e-08, |
|
"logits/chosen": -2.4390811920166016, |
|
"logits/rejected": -2.4390811920166016, |
|
"logps/chosen": -339.14801025390625, |
|
"logps/rejected": -339.14801025390625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0495796874165535, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0495796874165535, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.9720499160767715, |
|
"grad_norm": 0.017333984375, |
|
"learning_rate": 1.1712572572674386e-08, |
|
"logits/chosen": -2.3779425621032715, |
|
"logits/rejected": -2.3779425621032715, |
|
"logps/chosen": -342.52117919921875, |
|
"logps/rejected": -342.52117919921875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03777734562754631, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03777734562754631, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.9749689848938189, |
|
"grad_norm": 0.0166015625, |
|
"learning_rate": 9.378008736149746e-09, |
|
"logits/chosen": -2.408357620239258, |
|
"logits/rejected": -2.408357620239258, |
|
"logps/chosen": -321.5648193359375, |
|
"logps/rejected": -321.5648193359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03947510942816734, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03947510942816734, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.9778880537108662, |
|
"grad_norm": 0.01275634765625, |
|
"learning_rate": 7.30222956294907e-09, |
|
"logits/chosen": -2.456228733062744, |
|
"logits/rejected": -2.456228733062744, |
|
"logps/chosen": -322.9805603027344, |
|
"logps/rejected": -322.9805603027344, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04836040362715721, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04836040362715721, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.9808071225279136, |
|
"grad_norm": 0.015380859375, |
|
"learning_rate": 5.485450734061259e-09, |
|
"logits/chosen": -2.395473003387451, |
|
"logits/rejected": -2.395473003387451, |
|
"logps/chosen": -292.87994384765625, |
|
"logps/rejected": -292.87994384765625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0445740707218647, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0445740707218647, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.983726191344961, |
|
"grad_norm": 0.01544189453125, |
|
"learning_rate": 3.927861019399903e-09, |
|
"logits/chosen": -2.406294345855713, |
|
"logits/rejected": -2.406294345855713, |
|
"logps/chosen": -288.55987548828125, |
|
"logps/rejected": -288.55987548828125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.03885042294859886, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.03885042294859886, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.9866452601620083, |
|
"grad_norm": 0.0155029296875, |
|
"learning_rate": 2.629622258188691e-09, |
|
"logits/chosen": -2.4149577617645264, |
|
"logits/rejected": -2.4149577617645264, |
|
"logps/chosen": -282.56585693359375, |
|
"logps/rejected": -282.56585693359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.0403311550617218, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.0403311550617218, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.9895643289790557, |
|
"grad_norm": 0.01544189453125, |
|
"learning_rate": 1.5908693421465282e-09, |
|
"logits/chosen": -2.4097559452056885, |
|
"logits/rejected": -2.4097559452056885, |
|
"logps/chosen": -284.1174011230469, |
|
"logps/rejected": -284.1174011230469, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04213656857609749, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04213656857609749, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.9924833977961031, |
|
"grad_norm": 0.0137939453125, |
|
"learning_rate": 8.11710201470417e-10, |
|
"logits/chosen": -2.4348714351654053, |
|
"logits/rejected": -2.4348714351654053, |
|
"logps/chosen": -325.41339111328125, |
|
"logps/rejected": -325.41339111328125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04575268179178238, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04575268179178238, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.9924833977961031, |
|
"eval_logits/chosen": -2.3908708095550537, |
|
"eval_logits/rejected": -2.3908708095550537, |
|
"eval_logps/chosen": -310.7832336425781, |
|
"eval_logps/rejected": -310.7832336425781, |
|
"eval_loss": 0.6931472420692444, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": -0.04304642230272293, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": -0.04304642230272293, |
|
"eval_runtime": 2747.5083, |
|
"eval_samples_per_second": 2.217, |
|
"eval_steps_per_second": 0.277, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.9954024666131503, |
|
"grad_norm": 0.01165771484375, |
|
"learning_rate": 2.922257936230355e-10, |
|
"logits/chosen": -2.409545421600342, |
|
"logits/rejected": -2.409545421600342, |
|
"logps/chosen": -264.7913818359375, |
|
"logps/rejected": -264.7913818359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.038275159895420074, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.038275159895420074, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.9983215354301977, |
|
"grad_norm": 0.0167236328125, |
|
"learning_rate": 3.247009491946784e-11, |
|
"logits/chosen": -2.429719924926758, |
|
"logits/rejected": -2.429719924926758, |
|
"logps/chosen": -340.23126220703125, |
|
"logps/rejected": -340.23126220703125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": -0.04289505258202553, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": -0.04289505258202553, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.9997810698387214, |
|
"step": 3425, |
|
"total_flos": 0.0, |
|
"train_loss": 0.18720042388804636, |
|
"train_runtime": 41876.0871, |
|
"train_samples_per_second": 1.309, |
|
"train_steps_per_second": 0.082 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3425, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|