|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997120644975526, |
|
"eval_steps": 100, |
|
"global_step": 1736, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 10.610388839867777, |
|
"learning_rate": 2.8735632183908045e-09, |
|
"logits/chosen": -2.688382625579834, |
|
"logits/rejected": -2.687504768371582, |
|
"logps/chosen": -154.15142822265625, |
|
"logps/rejected": -119.21998596191406, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 11.287668992561438, |
|
"learning_rate": 2.8735632183908043e-08, |
|
"logits/chosen": -2.693573236465454, |
|
"logits/rejected": -2.7061853408813477, |
|
"logps/chosen": -203.12576293945312, |
|
"logps/rejected": -203.58848571777344, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4305555522441864, |
|
"rewards/chosen": -0.0002493205538485199, |
|
"rewards/margins": -0.00013067919644527137, |
|
"rewards/rejected": -0.0001186413355753757, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 10.975446002121831, |
|
"learning_rate": 5.747126436781609e-08, |
|
"logits/chosen": -2.6681714057922363, |
|
"logits/rejected": -2.6636619567871094, |
|
"logps/chosen": -208.20529174804688, |
|
"logps/rejected": -195.71517944335938, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 2.0605861209332943e-05, |
|
"rewards/margins": 0.0007079349015839398, |
|
"rewards/rejected": -0.0006873290403746068, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 10.987240036415274, |
|
"learning_rate": 8.620689655172414e-08, |
|
"logits/chosen": -2.6226565837860107, |
|
"logits/rejected": -2.627593755722046, |
|
"logps/chosen": -179.27633666992188, |
|
"logps/rejected": -194.77871704101562, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0009387334575876594, |
|
"rewards/margins": 0.0007720856228843331, |
|
"rewards/rejected": 0.00016664779104758054, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.874024139589977, |
|
"learning_rate": 1.1494252873563217e-07, |
|
"logits/chosen": -2.610243320465088, |
|
"logits/rejected": -2.571385145187378, |
|
"logps/chosen": -208.62820434570312, |
|
"logps/rejected": -187.62649536132812, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.001225657993927598, |
|
"rewards/margins": 0.0014799232594668865, |
|
"rewards/rejected": -0.00025426512002013624, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 10.535632759826791, |
|
"learning_rate": 1.436781609195402e-07, |
|
"logits/chosen": -2.6413865089416504, |
|
"logits/rejected": -2.665769100189209, |
|
"logps/chosen": -236.5024871826172, |
|
"logps/rejected": -203.89524841308594, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0021102039609104395, |
|
"rewards/margins": 0.0023120432160794735, |
|
"rewards/rejected": -0.00020183932792861015, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 10.953283816672645, |
|
"learning_rate": 1.7241379310344828e-07, |
|
"logits/chosen": -2.649590015411377, |
|
"logits/rejected": -2.6609647274017334, |
|
"logps/chosen": -232.6203155517578, |
|
"logps/rejected": -211.6860809326172, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0004964367835782468, |
|
"rewards/margins": 0.003091245424002409, |
|
"rewards/rejected": -0.0025948083493858576, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 11.344557073712732, |
|
"learning_rate": 2.0114942528735633e-07, |
|
"logits/chosen": -2.6284663677215576, |
|
"logits/rejected": -2.6205639839172363, |
|
"logps/chosen": -203.4170684814453, |
|
"logps/rejected": -206.2279052734375, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.004654805175960064, |
|
"rewards/margins": 0.0051066940650343895, |
|
"rewards/rejected": -0.009761499240994453, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 11.169957980773157, |
|
"learning_rate": 2.2988505747126435e-07, |
|
"logits/chosen": -2.617027997970581, |
|
"logits/rejected": -2.653088092803955, |
|
"logps/chosen": -176.9120330810547, |
|
"logps/rejected": -186.38589477539062, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.01036100834608078, |
|
"rewards/margins": 0.004531105048954487, |
|
"rewards/rejected": -0.014892111532390118, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 11.709716340155365, |
|
"learning_rate": 2.586206896551724e-07, |
|
"logits/chosen": -2.5782480239868164, |
|
"logits/rejected": -2.599475622177124, |
|
"logps/chosen": -178.95782470703125, |
|
"logps/rejected": -210.3921661376953, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.019216390326619148, |
|
"rewards/margins": 0.0284399576485157, |
|
"rewards/rejected": -0.0476563461124897, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 11.076992600659995, |
|
"learning_rate": 2.873563218390804e-07, |
|
"logits/chosen": -2.6024298667907715, |
|
"logits/rejected": -2.603557825088501, |
|
"logps/chosen": -191.04461669921875, |
|
"logps/rejected": -196.60302734375, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.04221532493829727, |
|
"rewards/margins": 0.025046557188034058, |
|
"rewards/rejected": -0.06726188957691193, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_logits/chosen": -2.5392000675201416, |
|
"eval_logits/rejected": -2.5504696369171143, |
|
"eval_logps/chosen": -171.71307373046875, |
|
"eval_logps/rejected": -181.7760467529297, |
|
"eval_loss": 0.6815534234046936, |
|
"eval_rewards/accuracies": 0.6090182662010193, |
|
"eval_rewards/chosen": -0.0895635262131691, |
|
"eval_rewards/margins": 0.024930791929364204, |
|
"eval_rewards/rejected": -0.11449432373046875, |
|
"eval_runtime": 523.8706, |
|
"eval_samples_per_second": 13.362, |
|
"eval_steps_per_second": 0.418, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 11.892784161636136, |
|
"learning_rate": 3.160919540229885e-07, |
|
"logits/chosen": -2.559643268585205, |
|
"logits/rejected": -2.5869317054748535, |
|
"logps/chosen": -202.63461303710938, |
|
"logps/rejected": -223.0349578857422, |
|
"loss": 0.672, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.08688319474458694, |
|
"rewards/margins": 0.04590854048728943, |
|
"rewards/rejected": -0.13279172778129578, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 15.318535657417753, |
|
"learning_rate": 3.4482758620689656e-07, |
|
"logits/chosen": -2.5284199714660645, |
|
"logits/rejected": -2.5128540992736816, |
|
"logps/chosen": -199.5592803955078, |
|
"logps/rejected": -214.75119018554688, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.17664876580238342, |
|
"rewards/margins": 0.06412236392498016, |
|
"rewards/rejected": -0.24077114462852478, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 22.024086046505637, |
|
"learning_rate": 3.735632183908046e-07, |
|
"logits/chosen": -2.5801522731781006, |
|
"logits/rejected": -2.565929651260376, |
|
"logps/chosen": -245.0824432373047, |
|
"logps/rejected": -247.3890838623047, |
|
"loss": 0.641, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.3728107511997223, |
|
"rewards/margins": 0.1388251781463623, |
|
"rewards/rejected": -0.5116358995437622, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 20.201715650528918, |
|
"learning_rate": 4.0229885057471266e-07, |
|
"logits/chosen": -2.5328726768493652, |
|
"logits/rejected": -2.5208544731140137, |
|
"logps/chosen": -302.12322998046875, |
|
"logps/rejected": -297.0425109863281, |
|
"loss": 0.6436, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.7101386785507202, |
|
"rewards/margins": 0.11017869412899017, |
|
"rewards/rejected": -0.820317268371582, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 21.265576535090425, |
|
"learning_rate": 4.310344827586206e-07, |
|
"logits/chosen": -2.440979480743408, |
|
"logits/rejected": -2.446094512939453, |
|
"logps/chosen": -281.5878601074219, |
|
"logps/rejected": -299.9305419921875, |
|
"loss": 0.6327, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.7533038258552551, |
|
"rewards/margins": 0.1949019879102707, |
|
"rewards/rejected": -0.9482057690620422, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 24.193407542556805, |
|
"learning_rate": 4.597701149425287e-07, |
|
"logits/chosen": -2.405226707458496, |
|
"logits/rejected": -2.385442018508911, |
|
"logps/chosen": -282.8765563964844, |
|
"logps/rejected": -290.90338134765625, |
|
"loss": 0.6035, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7150470018386841, |
|
"rewards/margins": 0.3021948039531708, |
|
"rewards/rejected": -1.0172417163848877, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 25.66751117876746, |
|
"learning_rate": 4.885057471264368e-07, |
|
"logits/chosen": -2.428391456604004, |
|
"logits/rejected": -2.4205939769744873, |
|
"logps/chosen": -295.0913391113281, |
|
"logps/rejected": -324.97454833984375, |
|
"loss": 0.6138, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.996240496635437, |
|
"rewards/margins": 0.40502578020095825, |
|
"rewards/rejected": -1.40126633644104, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 23.464843947505965, |
|
"learning_rate": 4.999817969178237e-07, |
|
"logits/chosen": -2.4013054370880127, |
|
"logits/rejected": -2.398705005645752, |
|
"logps/chosen": -315.08050537109375, |
|
"logps/rejected": -362.9265441894531, |
|
"loss": 0.6065, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.1423838138580322, |
|
"rewards/margins": 0.3166060149669647, |
|
"rewards/rejected": -1.4589898586273193, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 25.400467946109586, |
|
"learning_rate": 4.998705654596034e-07, |
|
"logits/chosen": -2.467696189880371, |
|
"logits/rejected": -2.4567761421203613, |
|
"logps/chosen": -330.1573181152344, |
|
"logps/rejected": -355.02154541015625, |
|
"loss": 0.5809, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1700841188430786, |
|
"rewards/margins": 0.3594915568828583, |
|
"rewards/rejected": -1.5295757055282593, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 29.13043617111363, |
|
"learning_rate": 4.996582603056428e-07, |
|
"logits/chosen": -2.376218557357788, |
|
"logits/rejected": -2.3482134342193604, |
|
"logps/chosen": -332.60443115234375, |
|
"logps/rejected": -390.0224914550781, |
|
"loss": 0.6002, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.3785903453826904, |
|
"rewards/margins": 0.41944313049316406, |
|
"rewards/rejected": -1.7980334758758545, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_logits/chosen": -2.3656015396118164, |
|
"eval_logits/rejected": -2.356565237045288, |
|
"eval_logps/chosen": -309.8548583984375, |
|
"eval_logps/rejected": -361.9523010253906, |
|
"eval_loss": 0.5905965566635132, |
|
"eval_rewards/accuracies": 0.6843607425689697, |
|
"eval_rewards/chosen": -1.4709811210632324, |
|
"eval_rewards/margins": 0.4452756345272064, |
|
"eval_rewards/rejected": -1.9162570238113403, |
|
"eval_runtime": 536.6296, |
|
"eval_samples_per_second": 13.044, |
|
"eval_steps_per_second": 0.408, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 21.603808432085263, |
|
"learning_rate": 4.993449673342705e-07, |
|
"logits/chosen": -2.4084885120391846, |
|
"logits/rejected": -2.4161148071289062, |
|
"logps/chosen": -323.7695007324219, |
|
"logps/rejected": -387.0673828125, |
|
"loss": 0.594, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2521207332611084, |
|
"rewards/margins": 0.4323544502258301, |
|
"rewards/rejected": -1.684475302696228, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 23.37624428964897, |
|
"learning_rate": 4.989308132738126e-07, |
|
"logits/chosen": -2.339341402053833, |
|
"logits/rejected": -2.3030219078063965, |
|
"logps/chosen": -309.7107849121094, |
|
"logps/rejected": -352.9278564453125, |
|
"loss": 0.5974, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.297975778579712, |
|
"rewards/margins": 0.35226622223854065, |
|
"rewards/rejected": -1.6502418518066406, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 24.10831947448163, |
|
"learning_rate": 4.9841596565133e-07, |
|
"logits/chosen": -2.2944416999816895, |
|
"logits/rejected": -2.2744333744049072, |
|
"logps/chosen": -354.4916076660156, |
|
"logps/rejected": -389.98919677734375, |
|
"loss": 0.597, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.5111327171325684, |
|
"rewards/margins": 0.3378602862358093, |
|
"rewards/rejected": -1.848992943763733, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 23.94673097578735, |
|
"learning_rate": 4.978006327248536e-07, |
|
"logits/chosen": -2.4152960777282715, |
|
"logits/rejected": -2.417513370513916, |
|
"logps/chosen": -313.9660949707031, |
|
"logps/rejected": -363.4143981933594, |
|
"loss": 0.5808, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.1127357482910156, |
|
"rewards/margins": 0.48004403710365295, |
|
"rewards/rejected": -1.5927797555923462, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 45.997903240569016, |
|
"learning_rate": 4.970850633991431e-07, |
|
"logits/chosen": -2.3635926246643066, |
|
"logits/rejected": -2.3639185428619385, |
|
"logps/chosen": -357.05181884765625, |
|
"logps/rejected": -428.13134765625, |
|
"loss": 0.5965, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.6936771869659424, |
|
"rewards/margins": 0.5474244952201843, |
|
"rewards/rejected": -2.2411017417907715, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 26.05750468880025, |
|
"learning_rate": 4.962695471250032e-07, |
|
"logits/chosen": -2.3708977699279785, |
|
"logits/rejected": -2.3599140644073486, |
|
"logps/chosen": -314.1701965332031, |
|
"logps/rejected": -378.3408203125, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.363680124282837, |
|
"rewards/margins": 0.5289397239685059, |
|
"rewards/rejected": -1.8926197290420532, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 30.134203618956438, |
|
"learning_rate": 4.953544137822006e-07, |
|
"logits/chosen": -2.272925615310669, |
|
"logits/rejected": -2.2591726779937744, |
|
"logps/chosen": -352.3068542480469, |
|
"logps/rejected": -409.1640625, |
|
"loss": 0.5787, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.743584394454956, |
|
"rewards/margins": 0.42208537459373474, |
|
"rewards/rejected": -2.1656696796417236, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 23.199137985460396, |
|
"learning_rate": 4.94340033546025e-07, |
|
"logits/chosen": -2.300412654876709, |
|
"logits/rejected": -2.2782740592956543, |
|
"logps/chosen": -381.15594482421875, |
|
"logps/rejected": -399.106201171875, |
|
"loss": 0.5954, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.5968300104141235, |
|
"rewards/margins": 0.3864768445491791, |
|
"rewards/rejected": -1.983306884765625, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 30.52404960049098, |
|
"learning_rate": 4.932268167375531e-07, |
|
"logits/chosen": -2.3673739433288574, |
|
"logits/rejected": -2.3496601581573486, |
|
"logps/chosen": -319.85589599609375, |
|
"logps/rejected": -363.55059814453125, |
|
"loss": 0.5868, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.2304320335388184, |
|
"rewards/margins": 0.38459140062332153, |
|
"rewards/rejected": -1.6150233745574951, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 21.552944683968224, |
|
"learning_rate": 4.920152136576705e-07, |
|
"logits/chosen": -2.301480770111084, |
|
"logits/rejected": -2.286813259124756, |
|
"logps/chosen": -361.3895263671875, |
|
"logps/rejected": -411.3047790527344, |
|
"loss": 0.591, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5672380924224854, |
|
"rewards/margins": 0.5191463232040405, |
|
"rewards/rejected": -2.0863845348358154, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_logits/chosen": -2.2067737579345703, |
|
"eval_logits/rejected": -2.193309783935547, |
|
"eval_logps/chosen": -365.80474853515625, |
|
"eval_logps/rejected": -423.1273498535156, |
|
"eval_loss": 0.5809333324432373, |
|
"eval_rewards/accuracies": 0.6923515796661377, |
|
"eval_rewards/chosen": -2.030480146408081, |
|
"eval_rewards/margins": 0.49752748012542725, |
|
"eval_rewards/rejected": -2.528007984161377, |
|
"eval_runtime": 544.0927, |
|
"eval_samples_per_second": 12.865, |
|
"eval_steps_per_second": 0.403, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 20.57183591795313, |
|
"learning_rate": 4.907057144049243e-07, |
|
"logits/chosen": -2.2187986373901367, |
|
"logits/rejected": -2.2342276573181152, |
|
"logps/chosen": -363.1693420410156, |
|
"logps/rejected": -433.428955078125, |
|
"loss": 0.5665, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.6798083782196045, |
|
"rewards/margins": 0.5087260007858276, |
|
"rewards/rejected": -2.1885344982147217, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 29.514941076169325, |
|
"learning_rate": 4.892988486772756e-07, |
|
"logits/chosen": -2.145481586456299, |
|
"logits/rejected": -2.149977207183838, |
|
"logps/chosen": -315.6699523925781, |
|
"logps/rejected": -392.2762756347656, |
|
"loss": 0.5551, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.504184365272522, |
|
"rewards/margins": 0.5998227596282959, |
|
"rewards/rejected": -2.1040072441101074, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 49.892806992923354, |
|
"learning_rate": 4.877951855578342e-07, |
|
"logits/chosen": -2.0608973503112793, |
|
"logits/rejected": -2.0279011726379395, |
|
"logps/chosen": -388.0411376953125, |
|
"logps/rejected": -433.9009704589844, |
|
"loss": 0.5996, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.8731982707977295, |
|
"rewards/margins": 0.5407770872116089, |
|
"rewards/rejected": -2.413975477218628, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 28.07822983249446, |
|
"learning_rate": 4.861953332846629e-07, |
|
"logits/chosen": -2.0477962493896484, |
|
"logits/rejected": -1.9786545038223267, |
|
"logps/chosen": -350.5347900390625, |
|
"logps/rejected": -404.81390380859375, |
|
"loss": 0.5561, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.5449774265289307, |
|
"rewards/margins": 0.5318618416786194, |
|
"rewards/rejected": -2.0768394470214844, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 31.750069839466466, |
|
"learning_rate": 4.844999390047419e-07, |
|
"logits/chosen": -1.9117634296417236, |
|
"logits/rejected": -1.8637244701385498, |
|
"logps/chosen": -369.7088928222656, |
|
"logps/rejected": -423.8294982910156, |
|
"loss": 0.5674, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.8487409353256226, |
|
"rewards/margins": 0.5574057698249817, |
|
"rewards/rejected": -2.406146764755249, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 40.566376234563315, |
|
"learning_rate": 4.827096885121953e-07, |
|
"logits/chosen": -1.8720242977142334, |
|
"logits/rejected": -1.849880576133728, |
|
"logps/chosen": -453.58563232421875, |
|
"logps/rejected": -510.3387145996094, |
|
"loss": 0.5451, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.2124533653259277, |
|
"rewards/margins": 0.7541533708572388, |
|
"rewards/rejected": -2.966606616973877, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 27.693964794914088, |
|
"learning_rate": 4.808253059708848e-07, |
|
"logits/chosen": -1.9786027669906616, |
|
"logits/rejected": -1.957528829574585, |
|
"logps/chosen": -384.38519287109375, |
|
"logps/rejected": -449.1851501464844, |
|
"loss": 0.5708, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.7919820547103882, |
|
"rewards/margins": 0.6518365144729614, |
|
"rewards/rejected": -2.4438185691833496, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 26.76769623003568, |
|
"learning_rate": 4.788475536214821e-07, |
|
"logits/chosen": -2.040398120880127, |
|
"logits/rejected": -2.0081913471221924, |
|
"logps/chosen": -372.25213623046875, |
|
"logps/rejected": -443.19451904296875, |
|
"loss": 0.5233, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.7179028987884521, |
|
"rewards/margins": 0.7337791919708252, |
|
"rewards/rejected": -2.4516820907592773, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 38.23522225315786, |
|
"learning_rate": 4.767772314731393e-07, |
|
"logits/chosen": -1.9009816646575928, |
|
"logits/rejected": -1.9371490478515625, |
|
"logps/chosen": -370.54229736328125, |
|
"logps/rejected": -435.6880798339844, |
|
"loss": 0.5569, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.9820528030395508, |
|
"rewards/margins": 0.547071635723114, |
|
"rewards/rejected": -2.5291244983673096, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 32.640987965795105, |
|
"learning_rate": 4.746151769798818e-07, |
|
"logits/chosen": -1.969786286354065, |
|
"logits/rejected": -1.8861439228057861, |
|
"logps/chosen": -388.787353515625, |
|
"logps/rejected": -426.386962890625, |
|
"loss": 0.5437, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.8250181674957275, |
|
"rewards/margins": 0.5650046467781067, |
|
"rewards/rejected": -2.3900225162506104, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_logits/chosen": -1.9247232675552368, |
|
"eval_logits/rejected": -1.8974039554595947, |
|
"eval_logps/chosen": -343.13470458984375, |
|
"eval_logps/rejected": -406.9888000488281, |
|
"eval_loss": 0.5683532953262329, |
|
"eval_rewards/accuracies": 0.7031963467597961, |
|
"eval_rewards/chosen": -1.80377995967865, |
|
"eval_rewards/margins": 0.5628422498703003, |
|
"eval_rewards/rejected": -2.366621971130371, |
|
"eval_runtime": 547.2464, |
|
"eval_samples_per_second": 12.791, |
|
"eval_steps_per_second": 0.4, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 21.532686706791136, |
|
"learning_rate": 4.72362264701855e-07, |
|
"logits/chosen": -2.114487409591675, |
|
"logits/rejected": -2.0793392658233643, |
|
"logps/chosen": -370.3285217285156, |
|
"logps/rejected": -403.5226135253906, |
|
"loss": 0.5759, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2717143297195435, |
|
"rewards/margins": 0.5267833471298218, |
|
"rewards/rejected": -1.7984975576400757, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 29.239777552832912, |
|
"learning_rate": 4.7001940595156055e-07, |
|
"logits/chosen": -2.0379366874694824, |
|
"logits/rejected": -1.9628146886825562, |
|
"logps/chosen": -385.35113525390625, |
|
"logps/rejected": -440.34222412109375, |
|
"loss": 0.5678, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.6135915517807007, |
|
"rewards/margins": 0.6375263333320618, |
|
"rewards/rejected": -2.2511179447174072, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 20.560330978299934, |
|
"learning_rate": 4.6758754842522697e-07, |
|
"logits/chosen": -2.0536270141601562, |
|
"logits/rejected": -1.9932899475097656, |
|
"logps/chosen": -365.8475036621094, |
|
"logps/rejected": -423.611083984375, |
|
"loss": 0.565, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.728179931640625, |
|
"rewards/margins": 0.6338831186294556, |
|
"rewards/rejected": -2.362062931060791, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 26.229998665879116, |
|
"learning_rate": 4.650676758194623e-07, |
|
"logits/chosen": -2.07350492477417, |
|
"logits/rejected": -2.022712230682373, |
|
"logps/chosen": -401.141357421875, |
|
"logps/rejected": -436.979248046875, |
|
"loss": 0.5464, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.8332273960113525, |
|
"rewards/margins": 0.6851301789283752, |
|
"rewards/rejected": -2.518357753753662, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 46.29630215421365, |
|
"learning_rate": 4.6246080743334474e-07, |
|
"logits/chosen": -1.8938102722167969, |
|
"logits/rejected": -1.8106597661972046, |
|
"logps/chosen": -397.90948486328125, |
|
"logps/rejected": -467.4127502441406, |
|
"loss": 0.5466, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.9406541585922241, |
|
"rewards/margins": 0.6843216419219971, |
|
"rewards/rejected": -2.6249756813049316, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 26.630018999750448, |
|
"learning_rate": 4.5976799775611215e-07, |
|
"logits/chosen": -1.814541220664978, |
|
"logits/rejected": -1.7524267435073853, |
|
"logps/chosen": -366.3084716796875, |
|
"logps/rejected": -446.58026123046875, |
|
"loss": 0.5626, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.7752193212509155, |
|
"rewards/margins": 0.6405627727508545, |
|
"rewards/rejected": -2.4157819747924805, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 35.44334983652439, |
|
"learning_rate": 4.569903360406162e-07, |
|
"logits/chosen": -1.9025815725326538, |
|
"logits/rejected": -1.8398154973983765, |
|
"logps/chosen": -346.3355407714844, |
|
"logps/rejected": -398.0967102050781, |
|
"loss": 0.5401, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.503535509109497, |
|
"rewards/margins": 0.6164692640304565, |
|
"rewards/rejected": -2.1200051307678223, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 33.12278527176869, |
|
"learning_rate": 4.5412894586271543e-07, |
|
"logits/chosen": -1.8207648992538452, |
|
"logits/rejected": -1.7967065572738647, |
|
"logps/chosen": -392.82696533203125, |
|
"logps/rejected": -462.015869140625, |
|
"loss": 0.5451, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.8827365636825562, |
|
"rewards/margins": 0.6777531504631042, |
|
"rewards/rejected": -2.5604898929595947, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 25.558438319253998, |
|
"learning_rate": 4.511849846667839e-07, |
|
"logits/chosen": -1.883180022239685, |
|
"logits/rejected": -1.8137277364730835, |
|
"logps/chosen": -354.247314453125, |
|
"logps/rejected": -436.14556884765625, |
|
"loss": 0.5408, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.8229477405548096, |
|
"rewards/margins": 0.7674862742424011, |
|
"rewards/rejected": -2.5904340744018555, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 28.233129557824064, |
|
"learning_rate": 4.481596432975201e-07, |
|
"logits/chosen": -1.9428781270980835, |
|
"logits/rejected": -1.889491081237793, |
|
"logps/chosen": -410.0284729003906, |
|
"logps/rejected": -480.2649841308594, |
|
"loss": 0.5415, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -1.8563823699951172, |
|
"rewards/margins": 0.8543184995651245, |
|
"rewards/rejected": -2.710700750350952, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/chosen": -1.8125942945480347, |
|
"eval_logits/rejected": -1.7951966524124146, |
|
"eval_logps/chosen": -405.05938720703125, |
|
"eval_logps/rejected": -476.8222961425781, |
|
"eval_loss": 0.5648065209388733, |
|
"eval_rewards/accuracies": 0.706620991230011, |
|
"eval_rewards/chosen": -2.4230268001556396, |
|
"eval_rewards/margins": 0.6419299840927124, |
|
"eval_rewards/rejected": -3.0649566650390625, |
|
"eval_runtime": 536.9406, |
|
"eval_samples_per_second": 13.037, |
|
"eval_steps_per_second": 0.408, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 30.516998297285266, |
|
"learning_rate": 4.450541455182453e-07, |
|
"logits/chosen": -1.8995802402496338, |
|
"logits/rejected": -1.9007337093353271, |
|
"logps/chosen": -408.70635986328125, |
|
"logps/rejected": -487.16387939453125, |
|
"loss": 0.5238, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.09024977684021, |
|
"rewards/margins": 0.7947575449943542, |
|
"rewards/rejected": -2.885007381439209, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 33.722814638920184, |
|
"learning_rate": 4.41869747515886e-07, |
|
"logits/chosen": -1.95028817653656, |
|
"logits/rejected": -1.8546888828277588, |
|
"logps/chosen": -388.6572570800781, |
|
"logps/rejected": -446.74542236328125, |
|
"loss": 0.5667, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.9295694828033447, |
|
"rewards/margins": 0.7291784882545471, |
|
"rewards/rejected": -2.658748149871826, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 25.932688832468305, |
|
"learning_rate": 4.3860773739284126e-07, |
|
"logits/chosen": -1.9748178720474243, |
|
"logits/rejected": -1.9027087688446045, |
|
"logps/chosen": -368.09832763671875, |
|
"logps/rejected": -403.284912109375, |
|
"loss": 0.573, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.6870168447494507, |
|
"rewards/margins": 0.5430334806442261, |
|
"rewards/rejected": -2.2300503253936768, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 39.79448640097382, |
|
"learning_rate": 4.352694346459396e-07, |
|
"logits/chosen": -1.9401954412460327, |
|
"logits/rejected": -1.905206322669983, |
|
"logps/chosen": -386.59918212890625, |
|
"logps/rejected": -437.18536376953125, |
|
"loss": 0.571, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.91461181640625, |
|
"rewards/margins": 0.4893025755882263, |
|
"rewards/rejected": -2.403914451599121, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 28.112999261098803, |
|
"learning_rate": 4.318561896326973e-07, |
|
"logits/chosen": -1.959571123123169, |
|
"logits/rejected": -1.9278638362884521, |
|
"logps/chosen": -388.32073974609375, |
|
"logps/rejected": -454.91436767578125, |
|
"loss": 0.5538, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.955959677696228, |
|
"rewards/margins": 0.6656385660171509, |
|
"rewards/rejected": -2.621598482131958, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 26.262637133504416, |
|
"learning_rate": 4.2836938302509256e-07, |
|
"logits/chosen": -2.0025877952575684, |
|
"logits/rejected": -1.9562809467315674, |
|
"logps/chosen": -359.0731201171875, |
|
"logps/rejected": -429.9349060058594, |
|
"loss": 0.5291, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.6528972387313843, |
|
"rewards/margins": 0.6726639866828918, |
|
"rewards/rejected": -2.325561285018921, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 36.25641292003506, |
|
"learning_rate": 4.248104252510785e-07, |
|
"logits/chosen": -2.134064197540283, |
|
"logits/rejected": -2.1425302028656006, |
|
"logps/chosen": -429.51153564453125, |
|
"logps/rejected": -480.48138427734375, |
|
"loss": 0.544, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.814552664756775, |
|
"rewards/margins": 0.49401578307151794, |
|
"rewards/rejected": -2.3085684776306152, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 21.449511768929142, |
|
"learning_rate": 4.2118075592405874e-07, |
|
"logits/chosen": -1.988585114479065, |
|
"logits/rejected": -2.011026382446289, |
|
"logps/chosen": -405.82305908203125, |
|
"logps/rejected": -488.56451416015625, |
|
"loss": 0.5412, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.087791919708252, |
|
"rewards/margins": 0.7612438201904297, |
|
"rewards/rejected": -2.8490357398986816, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 30.59358168073691, |
|
"learning_rate": 4.174818432605578e-07, |
|
"logits/chosen": -2.0260438919067383, |
|
"logits/rejected": -2.033987522125244, |
|
"logps/chosen": -453.0452575683594, |
|
"logps/rejected": -514.720458984375, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.147684335708618, |
|
"rewards/margins": 0.7406858801841736, |
|
"rewards/rejected": -2.8883700370788574, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 28.138749590258723, |
|
"learning_rate": 4.137151834863213e-07, |
|
"logits/chosen": -1.9616165161132812, |
|
"logits/rejected": -1.972180724143982, |
|
"logps/chosen": -385.138427734375, |
|
"logps/rejected": -473.2599182128906, |
|
"loss": 0.564, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.132406234741211, |
|
"rewards/margins": 0.6182124018669128, |
|
"rewards/rejected": -2.7506186962127686, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_logits/chosen": -1.8847192525863647, |
|
"eval_logits/rejected": -1.8836290836334229, |
|
"eval_logps/chosen": -397.7480773925781, |
|
"eval_logps/rejected": -469.41180419921875, |
|
"eval_loss": 0.5578325390815735, |
|
"eval_rewards/accuracies": 0.7191780805587769, |
|
"eval_rewards/chosen": -2.3499135971069336, |
|
"eval_rewards/margins": 0.6409377455711365, |
|
"eval_rewards/rejected": -2.990851402282715, |
|
"eval_runtime": 544.5307, |
|
"eval_samples_per_second": 12.855, |
|
"eval_steps_per_second": 0.402, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 33.10703086608096, |
|
"learning_rate": 4.098823002310864e-07, |
|
"logits/chosen": -2.044586181640625, |
|
"logits/rejected": -1.9869381189346313, |
|
"logps/chosen": -415.4453125, |
|
"logps/rejected": -474.20526123046875, |
|
"loss": 0.5454, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.8535633087158203, |
|
"rewards/margins": 0.7316546440124512, |
|
"rewards/rejected": -2.5852179527282715, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 39.38037052781508, |
|
"learning_rate": 4.059847439122671e-07, |
|
"logits/chosen": -1.9577858448028564, |
|
"logits/rejected": -1.904496431350708, |
|
"logps/chosen": -393.66796875, |
|
"logps/rejected": -449.994140625, |
|
"loss": 0.5357, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.8364942073822021, |
|
"rewards/margins": 0.6134520769119263, |
|
"rewards/rejected": -2.449946165084839, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 29.15442393094139, |
|
"learning_rate": 4.020240911078041e-07, |
|
"logits/chosen": -1.8907365798950195, |
|
"logits/rejected": -1.8794755935668945, |
|
"logps/chosen": -393.5573425292969, |
|
"logps/rejected": -469.4529724121094, |
|
"loss": 0.5547, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.1041901111602783, |
|
"rewards/margins": 0.8193286657333374, |
|
"rewards/rejected": -2.923518419265747, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 24.710710448776272, |
|
"learning_rate": 3.98001943918432e-07, |
|
"logits/chosen": -1.87062668800354, |
|
"logits/rejected": -1.8511345386505127, |
|
"logps/chosen": -391.0401306152344, |
|
"logps/rejected": -467.5562438964844, |
|
"loss": 0.5439, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.9567807912826538, |
|
"rewards/margins": 0.8031463623046875, |
|
"rewards/rejected": -2.75992751121521, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 28.042405621162647, |
|
"learning_rate": 3.9391992931962304e-07, |
|
"logits/chosen": -1.912502646446228, |
|
"logits/rejected": -1.8945941925048828, |
|
"logps/chosen": -381.6258850097656, |
|
"logps/rejected": -439.37921142578125, |
|
"loss": 0.5279, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6793773174285889, |
|
"rewards/margins": 0.6930197477340698, |
|
"rewards/rejected": -2.3723976612091064, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 64.63037359225194, |
|
"learning_rate": 3.8977969850346866e-07, |
|
"logits/chosen": -1.8362230062484741, |
|
"logits/rejected": -1.827745795249939, |
|
"logps/chosen": -341.99755859375, |
|
"logps/rejected": -415.6537170410156, |
|
"loss": 0.5512, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.6921066045761108, |
|
"rewards/margins": 0.6708263158798218, |
|
"rewards/rejected": -2.3629326820373535, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 68.40563732230615, |
|
"learning_rate": 3.8558292621076526e-07, |
|
"logits/chosen": -1.873615026473999, |
|
"logits/rejected": -1.8472900390625, |
|
"logps/chosen": -422.1318359375, |
|
"logps/rejected": -461.34619140625, |
|
"loss": 0.5427, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.0594050884246826, |
|
"rewards/margins": 0.5281103253364563, |
|
"rewards/rejected": -2.5875158309936523, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 22.39050226911276, |
|
"learning_rate": 3.8133131005357465e-07, |
|
"logits/chosen": -1.8999011516571045, |
|
"logits/rejected": -1.836851716041565, |
|
"logps/chosen": -397.0812072753906, |
|
"logps/rejected": -480.00823974609375, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.0673575401306152, |
|
"rewards/margins": 0.7980934381484985, |
|
"rewards/rejected": -2.8654510974884033, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 38.649992337166125, |
|
"learning_rate": 3.7702656982853277e-07, |
|
"logits/chosen": -1.810121774673462, |
|
"logits/rejected": -1.793265700340271, |
|
"logps/chosen": -450.671875, |
|
"logps/rejected": -518.1996459960938, |
|
"loss": 0.5696, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.448154926300049, |
|
"rewards/margins": 0.7367699146270752, |
|
"rewards/rejected": -3.184924602508545, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 34.05006479039719, |
|
"learning_rate": 3.7267044682118435e-07, |
|
"logits/chosen": -1.860874891281128, |
|
"logits/rejected": -1.8456264734268188, |
|
"logps/chosen": -409.9309997558594, |
|
"logps/rejected": -486.42376708984375, |
|
"loss": 0.5769, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.9787782430648804, |
|
"rewards/margins": 0.6770876049995422, |
|
"rewards/rejected": -2.6558656692504883, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_logits/chosen": -1.7718605995178223, |
|
"eval_logits/rejected": -1.7858551740646362, |
|
"eval_logps/chosen": -383.05316162109375, |
|
"eval_logps/rejected": -453.5823059082031, |
|
"eval_loss": 0.5597525238990784, |
|
"eval_rewards/accuracies": 0.7031963467597961, |
|
"eval_rewards/chosen": -2.2029640674591064, |
|
"eval_rewards/margins": 0.6295928955078125, |
|
"eval_rewards/rejected": -2.832556962966919, |
|
"eval_runtime": 535.7382, |
|
"eval_samples_per_second": 13.066, |
|
"eval_steps_per_second": 0.409, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 30.516726115650822, |
|
"learning_rate": 3.682647031016264e-07, |
|
"logits/chosen": -1.9329684972763062, |
|
"logits/rejected": -1.940243124961853, |
|
"logps/chosen": -388.291259765625, |
|
"logps/rejected": -434.0372009277344, |
|
"loss": 0.5486, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.6513302326202393, |
|
"rewards/margins": 0.6132162809371948, |
|
"rewards/rejected": -2.2645463943481445, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 38.51345602531556, |
|
"learning_rate": 3.638111208117425e-07, |
|
"logits/chosen": -1.9404680728912354, |
|
"logits/rejected": -1.9298954010009766, |
|
"logps/chosen": -385.8715515136719, |
|
"logps/rejected": -416.53155517578125, |
|
"loss": 0.5762, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8125269412994385, |
|
"rewards/margins": 0.4822394847869873, |
|
"rewards/rejected": -2.294766426086426, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 36.417406572486875, |
|
"learning_rate": 3.593115014443195e-07, |
|
"logits/chosen": -1.9941285848617554, |
|
"logits/rejected": -1.9894773960113525, |
|
"logps/chosen": -382.0946350097656, |
|
"logps/rejected": -437.18841552734375, |
|
"loss": 0.5469, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.637915849685669, |
|
"rewards/margins": 0.631543755531311, |
|
"rewards/rejected": -2.2694597244262695, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 23.509926948805322, |
|
"learning_rate": 3.5476766511433605e-07, |
|
"logits/chosen": -1.9100837707519531, |
|
"logits/rejected": -1.857428789138794, |
|
"logps/chosen": -366.06109619140625, |
|
"logps/rejected": -444.9000549316406, |
|
"loss": 0.5376, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.6342054605484009, |
|
"rewards/margins": 0.7001287341117859, |
|
"rewards/rejected": -2.334334373474121, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 28.491603155440426, |
|
"learning_rate": 3.5018144982271806e-07, |
|
"logits/chosen": -1.847013235092163, |
|
"logits/rejected": -1.844740867614746, |
|
"logps/chosen": -387.2216796875, |
|
"logps/rejected": -458.35247802734375, |
|
"loss": 0.5425, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.9939115047454834, |
|
"rewards/margins": 0.5967587232589722, |
|
"rewards/rejected": -2.590670585632324, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 21.711577115215622, |
|
"learning_rate": 3.455547107128602e-07, |
|
"logits/chosen": -1.7501156330108643, |
|
"logits/rejected": -1.7191545963287354, |
|
"logps/chosen": -452.614013671875, |
|
"logps/rejected": -517.114501953125, |
|
"loss": 0.5117, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.3392488956451416, |
|
"rewards/margins": 0.8644348978996277, |
|
"rewards/rejected": -3.203683853149414, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 50.17207271612329, |
|
"learning_rate": 3.4088931932021185e-07, |
|
"logits/chosen": -1.8234459161758423, |
|
"logits/rejected": -1.780574083328247, |
|
"logps/chosen": -448.5769958496094, |
|
"logps/rejected": -518.0377197265625, |
|
"loss": 0.5488, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.1782760620117188, |
|
"rewards/margins": 0.8133376240730286, |
|
"rewards/rejected": -2.9916136264801025, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 49.301861132325, |
|
"learning_rate": 3.361871628152338e-07, |
|
"logits/chosen": -1.773737907409668, |
|
"logits/rejected": -1.7517740726470947, |
|
"logps/chosen": -440.6595153808594, |
|
"logps/rejected": -493.2332458496094, |
|
"loss": 0.5173, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.263493299484253, |
|
"rewards/margins": 0.6659582853317261, |
|
"rewards/rejected": -2.9294512271881104, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 30.255792286324436, |
|
"learning_rate": 3.314501432400294e-07, |
|
"logits/chosen": -1.7690521478652954, |
|
"logits/rejected": -1.7298529148101807, |
|
"logps/chosen": -411.845703125, |
|
"logps/rejected": -474.04425048828125, |
|
"loss": 0.566, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.1643013954162598, |
|
"rewards/margins": 0.6198045015335083, |
|
"rewards/rejected": -2.7841057777404785, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 22.17250118566977, |
|
"learning_rate": 3.2668017673896077e-07, |
|
"logits/chosen": -1.8177188634872437, |
|
"logits/rejected": -1.7350183725357056, |
|
"logps/chosen": -399.64495849609375, |
|
"logps/rejected": -457.10601806640625, |
|
"loss": 0.5598, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.0094985961914062, |
|
"rewards/margins": 0.7035370469093323, |
|
"rewards/rejected": -2.7130355834960938, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_logits/chosen": -1.7061283588409424, |
|
"eval_logits/rejected": -1.7086626291275024, |
|
"eval_logps/chosen": -387.18157958984375, |
|
"eval_logps/rejected": -455.0378723144531, |
|
"eval_loss": 0.558580219745636, |
|
"eval_rewards/accuracies": 0.7163242101669312, |
|
"eval_rewards/chosen": -2.244248390197754, |
|
"eval_rewards/margins": 0.6028640270233154, |
|
"eval_rewards/rejected": -2.8471124172210693, |
|
"eval_runtime": 544.1327, |
|
"eval_samples_per_second": 12.865, |
|
"eval_steps_per_second": 0.402, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 29.19961014949389, |
|
"learning_rate": 3.218791927835602e-07, |
|
"logits/chosen": -1.8107563257217407, |
|
"logits/rejected": -1.7641499042510986, |
|
"logps/chosen": -369.27203369140625, |
|
"logps/rejected": -456.6036682128906, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.8051646947860718, |
|
"rewards/margins": 0.7181805968284607, |
|
"rewards/rejected": -2.523345470428467, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 25.26002008872549, |
|
"learning_rate": 3.1704913339205103e-07, |
|
"logits/chosen": -1.8677990436553955, |
|
"logits/rejected": -1.825749158859253, |
|
"logps/chosen": -379.98321533203125, |
|
"logps/rejected": -454.1268005371094, |
|
"loss": 0.5288, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6397157907485962, |
|
"rewards/margins": 0.8766795992851257, |
|
"rewards/rejected": -2.516395092010498, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 28.6375298855639, |
|
"learning_rate": 3.1219195234379265e-07, |
|
"logits/chosen": -1.6751445531845093, |
|
"logits/rejected": -1.6866257190704346, |
|
"logps/chosen": -346.9654846191406, |
|
"logps/rejected": -451.60498046875, |
|
"loss": 0.5566, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.7691535949707031, |
|
"rewards/margins": 0.7554360628128052, |
|
"rewards/rejected": -2.5245893001556396, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 44.73580525279706, |
|
"learning_rate": 3.0730961438896885e-07, |
|
"logits/chosen": -1.7529224157333374, |
|
"logits/rejected": -1.7129818201065063, |
|
"logps/chosen": -400.9212951660156, |
|
"logps/rejected": -464.69305419921875, |
|
"loss": 0.5584, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.0139617919921875, |
|
"rewards/margins": 0.5847775936126709, |
|
"rewards/rejected": -2.5987396240234375, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 30.284221885120694, |
|
"learning_rate": 3.024040944538383e-07, |
|
"logits/chosen": -1.7323232889175415, |
|
"logits/rejected": -1.7132787704467773, |
|
"logps/chosen": -379.4556579589844, |
|
"logps/rejected": -454.51531982421875, |
|
"loss": 0.5314, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9580036401748657, |
|
"rewards/margins": 0.7366491556167603, |
|
"rewards/rejected": -2.694653034210205, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 27.718050401992414, |
|
"learning_rate": 2.9747737684186795e-07, |
|
"logits/chosen": -1.7737243175506592, |
|
"logits/rejected": -1.7415263652801514, |
|
"logps/chosen": -404.40509033203125, |
|
"logps/rejected": -465.7650451660156, |
|
"loss": 0.5184, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.0648417472839355, |
|
"rewards/margins": 0.7340750694274902, |
|
"rewards/rejected": -2.798916816711426, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 31.011489118398675, |
|
"learning_rate": 2.925314544310745e-07, |
|
"logits/chosen": -1.745216727256775, |
|
"logits/rejected": -1.727979302406311, |
|
"logps/chosen": -392.7491149902344, |
|
"logps/rejected": -456.2132263183594, |
|
"loss": 0.5497, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -2.195338487625122, |
|
"rewards/margins": 0.5425236225128174, |
|
"rewards/rejected": -2.7378618717193604, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 35.37211460888614, |
|
"learning_rate": 2.8756832786789663e-07, |
|
"logits/chosen": -1.8434585332870483, |
|
"logits/rejected": -1.8155876398086548, |
|
"logps/chosen": -413.1863708496094, |
|
"logps/rejected": -489.76220703125, |
|
"loss": 0.5608, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -2.009500026702881, |
|
"rewards/margins": 0.6946345567703247, |
|
"rewards/rejected": -2.704134464263916, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 33.27106994315821, |
|
"learning_rate": 2.8259000475792503e-07, |
|
"logits/chosen": -1.876704454421997, |
|
"logits/rejected": -1.7968547344207764, |
|
"logps/chosen": -395.55706787109375, |
|
"logps/rejected": -460.11669921875, |
|
"loss": 0.5543, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7892892360687256, |
|
"rewards/margins": 0.7638824582099915, |
|
"rewards/rejected": -2.5531716346740723, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 31.881562451650627, |
|
"learning_rate": 2.7759849885381747e-07, |
|
"logits/chosen": -1.868417739868164, |
|
"logits/rejected": -1.7971748113632202, |
|
"logps/chosen": -378.93353271484375, |
|
"logps/rejected": -464.1891174316406, |
|
"loss": 0.5374, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.808215856552124, |
|
"rewards/margins": 0.7741836309432983, |
|
"rewards/rejected": -2.582399368286133, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -1.6767016649246216, |
|
"eval_logits/rejected": -1.6597568988800049, |
|
"eval_logps/chosen": -382.5883483886719, |
|
"eval_logps/rejected": -453.9528503417969, |
|
"eval_loss": 0.5555324554443359, |
|
"eval_rewards/accuracies": 0.7151826620101929, |
|
"eval_rewards/chosen": -2.198316812515259, |
|
"eval_rewards/margins": 0.6379454731941223, |
|
"eval_rewards/rejected": -2.8362622261047363, |
|
"eval_runtime": 537.245, |
|
"eval_samples_per_second": 13.029, |
|
"eval_steps_per_second": 0.408, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 21.961619231813007, |
|
"learning_rate": 2.7259582924072756e-07, |
|
"logits/chosen": -1.8725192546844482, |
|
"logits/rejected": -1.8156566619873047, |
|
"logps/chosen": -350.8863220214844, |
|
"logps/rejected": -413.61993408203125, |
|
"loss": 0.5397, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.7346986532211304, |
|
"rewards/margins": 0.5974160432815552, |
|
"rewards/rejected": -2.3321146965026855, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 35.79222168716502, |
|
"learning_rate": 2.675840195195762e-07, |
|
"logits/chosen": -1.8498157262802124, |
|
"logits/rejected": -1.8300836086273193, |
|
"logps/chosen": -376.912353515625, |
|
"logps/rejected": -438.8692932128906, |
|
"loss": 0.5246, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.9630987644195557, |
|
"rewards/margins": 0.5869341492652893, |
|
"rewards/rejected": -2.5500330924987793, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 29.90256487232944, |
|
"learning_rate": 2.625650969884965e-07, |
|
"logits/chosen": -1.7971664667129517, |
|
"logits/rejected": -1.7699878215789795, |
|
"logps/chosen": -429.76171875, |
|
"logps/rejected": -510.20550537109375, |
|
"loss": 0.5419, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.1585049629211426, |
|
"rewards/margins": 0.7950173616409302, |
|
"rewards/rejected": -2.953521966934204, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 29.9715777964654, |
|
"learning_rate": 2.575410918227829e-07, |
|
"logits/chosen": -1.8557363748550415, |
|
"logits/rejected": -1.7954918146133423, |
|
"logps/chosen": -443.01092529296875, |
|
"logps/rejected": -511.6332092285156, |
|
"loss": 0.5316, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.0825295448303223, |
|
"rewards/margins": 0.8091154098510742, |
|
"rewards/rejected": -2.8916451930999756, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 29.036788981905207, |
|
"learning_rate": 2.525140362536775e-07, |
|
"logits/chosen": -1.7384717464447021, |
|
"logits/rejected": -1.6616607904434204, |
|
"logps/chosen": -384.7867126464844, |
|
"logps/rejected": -460.168212890625, |
|
"loss": 0.5632, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.1540913581848145, |
|
"rewards/margins": 0.569457471370697, |
|
"rewards/rejected": -2.723548650741577, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 24.40866664439217, |
|
"learning_rate": 2.474859637463226e-07, |
|
"logits/chosen": -1.7090812921524048, |
|
"logits/rejected": -1.6654443740844727, |
|
"logps/chosen": -438.59613037109375, |
|
"logps/rejected": -484.14093017578125, |
|
"loss": 0.5394, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.41066837310791, |
|
"rewards/margins": 0.5415581464767456, |
|
"rewards/rejected": -2.952226400375366, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 28.97604867448642, |
|
"learning_rate": 2.42458908177217e-07, |
|
"logits/chosen": -1.8490597009658813, |
|
"logits/rejected": -1.7891228199005127, |
|
"logps/chosen": -424.61383056640625, |
|
"logps/rejected": -479.585205078125, |
|
"loss": 0.5426, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.024867296218872, |
|
"rewards/margins": 0.7355901002883911, |
|
"rewards/rejected": -2.7604575157165527, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 35.488277243353735, |
|
"learning_rate": 2.3743490301150355e-07, |
|
"logits/chosen": -1.8032734394073486, |
|
"logits/rejected": -1.794163465499878, |
|
"logps/chosen": -417.388671875, |
|
"logps/rejected": -491.72021484375, |
|
"loss": 0.554, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.1092441082000732, |
|
"rewards/margins": 0.6570713520050049, |
|
"rewards/rejected": -2.766315460205078, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 22.23777106600426, |
|
"learning_rate": 2.324159804804238e-07, |
|
"logits/chosen": -1.8234403133392334, |
|
"logits/rejected": -1.786786675453186, |
|
"logps/chosen": -404.6798400878906, |
|
"logps/rejected": -463.2445373535156, |
|
"loss": 0.5494, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.0632481575012207, |
|
"rewards/margins": 0.6302553415298462, |
|
"rewards/rejected": -2.6935033798217773, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 31.216532615702715, |
|
"learning_rate": 2.274041707592724e-07, |
|
"logits/chosen": -1.9149761199951172, |
|
"logits/rejected": -1.8780314922332764, |
|
"logps/chosen": -430.76617431640625, |
|
"logps/rejected": -501.29132080078125, |
|
"loss": 0.5036, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.9554197788238525, |
|
"rewards/margins": 0.8839017748832703, |
|
"rewards/rejected": -2.8393216133117676, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": -1.7254499197006226, |
|
"eval_logits/rejected": -1.7160460948944092, |
|
"eval_logps/chosen": -385.9115295410156, |
|
"eval_logps/rejected": -462.5011291503906, |
|
"eval_loss": 0.5499266982078552, |
|
"eval_rewards/accuracies": 0.7208904027938843, |
|
"eval_rewards/chosen": -2.231548309326172, |
|
"eval_rewards/margins": 0.6901971697807312, |
|
"eval_rewards/rejected": -2.921745777130127, |
|
"eval_runtime": 544.8576, |
|
"eval_samples_per_second": 12.847, |
|
"eval_steps_per_second": 0.402, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 30.350568547131573, |
|
"learning_rate": 2.2240150114618259e-07, |
|
"logits/chosen": -1.8180408477783203, |
|
"logits/rejected": -1.7760928869247437, |
|
"logps/chosen": -416.88525390625, |
|
"logps/rejected": -509.04058837890625, |
|
"loss": 0.5276, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.980337381362915, |
|
"rewards/margins": 0.8953462839126587, |
|
"rewards/rejected": -2.875683546066284, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 31.603328627940357, |
|
"learning_rate": 2.17409995242075e-07, |
|
"logits/chosen": -1.8180592060089111, |
|
"logits/rejected": -1.7379405498504639, |
|
"logps/chosen": -440.83074951171875, |
|
"logps/rejected": -495.69830322265625, |
|
"loss": 0.5235, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.1939332485198975, |
|
"rewards/margins": 0.9124080538749695, |
|
"rewards/rejected": -3.1063413619995117, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 31.20623945497072, |
|
"learning_rate": 2.1243167213210335e-07, |
|
"logits/chosen": -1.8180633783340454, |
|
"logits/rejected": -1.7436892986297607, |
|
"logps/chosen": -410.88427734375, |
|
"logps/rejected": -483.1456604003906, |
|
"loss": 0.5401, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.02951717376709, |
|
"rewards/margins": 0.87162846326828, |
|
"rewards/rejected": -2.9011454582214355, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 34.72608405283437, |
|
"learning_rate": 2.0746854556892544e-07, |
|
"logits/chosen": -1.804686188697815, |
|
"logits/rejected": -1.7846415042877197, |
|
"logps/chosen": -387.50067138671875, |
|
"logps/rejected": -457.11505126953125, |
|
"loss": 0.5742, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.964666724205017, |
|
"rewards/margins": 0.690390944480896, |
|
"rewards/rejected": -2.655057907104492, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 26.9759576683522, |
|
"learning_rate": 2.025226231581321e-07, |
|
"logits/chosen": -1.8315603733062744, |
|
"logits/rejected": -1.7954432964324951, |
|
"logps/chosen": -408.33740234375, |
|
"logps/rejected": -479.91912841796875, |
|
"loss": 0.5286, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.079817533493042, |
|
"rewards/margins": 0.8754861950874329, |
|
"rewards/rejected": -2.95530366897583, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 28.45648597029955, |
|
"learning_rate": 1.9759590554616173e-07, |
|
"logits/chosen": -1.8250961303710938, |
|
"logits/rejected": -1.785871148109436, |
|
"logps/chosen": -423.91607666015625, |
|
"logps/rejected": -492.83563232421875, |
|
"loss": 0.5428, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.0802817344665527, |
|
"rewards/margins": 0.7300957441329956, |
|
"rewards/rejected": -2.810377597808838, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 36.3095911676204, |
|
"learning_rate": 1.926903856110311e-07, |
|
"logits/chosen": -1.8510675430297852, |
|
"logits/rejected": -1.7864242792129517, |
|
"logps/chosen": -412.6505432128906, |
|
"logps/rejected": -492.79095458984375, |
|
"loss": 0.53, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.102355480194092, |
|
"rewards/margins": 0.708962082862854, |
|
"rewards/rejected": -2.8113174438476562, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 28.29885030565513, |
|
"learning_rate": 1.8780804765620746e-07, |
|
"logits/chosen": -1.8249950408935547, |
|
"logits/rejected": -1.7665761709213257, |
|
"logps/chosen": -403.99609375, |
|
"logps/rejected": -481.81103515625, |
|
"loss": 0.5048, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.150357961654663, |
|
"rewards/margins": 0.908871054649353, |
|
"rewards/rejected": -3.0592291355133057, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 32.08952273669513, |
|
"learning_rate": 1.82950866607949e-07, |
|
"logits/chosen": -1.87527596950531, |
|
"logits/rejected": -1.8300920724868774, |
|
"logps/chosen": -415.8727111816406, |
|
"logps/rejected": -479.55419921875, |
|
"loss": 0.5616, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.0913748741149902, |
|
"rewards/margins": 0.8811753988265991, |
|
"rewards/rejected": -2.972550630569458, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 34.73789118478527, |
|
"learning_rate": 1.7812080721643973e-07, |
|
"logits/chosen": -1.8299520015716553, |
|
"logits/rejected": -1.7463247776031494, |
|
"logps/chosen": -407.6546325683594, |
|
"logps/rejected": -461.5155334472656, |
|
"loss": 0.5281, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9930970668792725, |
|
"rewards/margins": 0.8382734060287476, |
|
"rewards/rejected": -2.8313703536987305, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_logits/chosen": -1.7563356161117554, |
|
"eval_logits/rejected": -1.7503989934921265, |
|
"eval_logps/chosen": -391.30999755859375, |
|
"eval_logps/rejected": -466.37115478515625, |
|
"eval_loss": 0.5488813519477844, |
|
"eval_rewards/accuracies": 0.72374427318573, |
|
"eval_rewards/chosen": -2.2855324745178223, |
|
"eval_rewards/margins": 0.674912691116333, |
|
"eval_rewards/rejected": -2.960444927215576, |
|
"eval_runtime": 536.1349, |
|
"eval_samples_per_second": 13.056, |
|
"eval_steps_per_second": 0.408, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 22.04228728782992, |
|
"learning_rate": 1.7331982326103918e-07, |
|
"logits/chosen": -1.9050697088241577, |
|
"logits/rejected": -1.8864399194717407, |
|
"logps/chosen": -400.99151611328125, |
|
"logps/rejected": -456.43377685546875, |
|
"loss": 0.5236, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9019591808319092, |
|
"rewards/margins": 0.7700345516204834, |
|
"rewards/rejected": -2.6719937324523926, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 32.9478891711517, |
|
"learning_rate": 1.6854985675997063e-07, |
|
"logits/chosen": -1.857361078262329, |
|
"logits/rejected": -1.8371422290802002, |
|
"logps/chosen": -456.65582275390625, |
|
"logps/rejected": -527.1624755859375, |
|
"loss": 0.538, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -2.3455679416656494, |
|
"rewards/margins": 0.7390109300613403, |
|
"rewards/rejected": -3.0845787525177, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 35.31307694928471, |
|
"learning_rate": 1.638128371847662e-07, |
|
"logits/chosen": -1.8157202005386353, |
|
"logits/rejected": -1.7822942733764648, |
|
"logps/chosen": -413.3636779785156, |
|
"logps/rejected": -507.03338623046875, |
|
"loss": 0.5299, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.1275038719177246, |
|
"rewards/margins": 0.8648099899291992, |
|
"rewards/rejected": -2.992314338684082, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 39.68360857124647, |
|
"learning_rate": 1.5911068067978818e-07, |
|
"logits/chosen": -1.8612645864486694, |
|
"logits/rejected": -1.8140894174575806, |
|
"logps/chosen": -447.96697998046875, |
|
"logps/rejected": -535.6785888671875, |
|
"loss": 0.5089, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.447895050048828, |
|
"rewards/margins": 0.799897313117981, |
|
"rewards/rejected": -3.2477920055389404, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 45.56752540087649, |
|
"learning_rate": 1.5444528928713985e-07, |
|
"logits/chosen": -1.8386377096176147, |
|
"logits/rejected": -1.773667335510254, |
|
"logps/chosen": -397.98663330078125, |
|
"logps/rejected": -473.56329345703125, |
|
"loss": 0.5192, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.0095162391662598, |
|
"rewards/margins": 0.8521126508712769, |
|
"rewards/rejected": -2.861629009246826, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 28.03328574000068, |
|
"learning_rate": 1.4981855017728197e-07, |
|
"logits/chosen": -1.7747135162353516, |
|
"logits/rejected": -1.7615177631378174, |
|
"logps/chosen": -415.66680908203125, |
|
"logps/rejected": -485.73944091796875, |
|
"loss": 0.5243, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.2126426696777344, |
|
"rewards/margins": 0.8091427087783813, |
|
"rewards/rejected": -3.021785259246826, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 29.571461514972317, |
|
"learning_rate": 1.452323348856639e-07, |
|
"logits/chosen": -1.9694970846176147, |
|
"logits/rejected": -1.9434292316436768, |
|
"logps/chosen": -398.3510437011719, |
|
"logps/rejected": -494.0318908691406, |
|
"loss": 0.5583, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8146642446517944, |
|
"rewards/margins": 0.8760486841201782, |
|
"rewards/rejected": -2.6907129287719727, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 24.402984146105567, |
|
"learning_rate": 1.406884985556804e-07, |
|
"logits/chosen": -1.8805034160614014, |
|
"logits/rejected": -1.8436615467071533, |
|
"logps/chosen": -395.9003601074219, |
|
"logps/rejected": -471.02685546875, |
|
"loss": 0.5487, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.027919292449951, |
|
"rewards/margins": 0.7736718058586121, |
|
"rewards/rejected": -2.801591396331787, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 20.687254330852166, |
|
"learning_rate": 1.361888791882575e-07, |
|
"logits/chosen": -1.900747299194336, |
|
"logits/rejected": -1.8584699630737305, |
|
"logps/chosen": -339.09442138671875, |
|
"logps/rejected": -388.8125, |
|
"loss": 0.5293, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.7120403051376343, |
|
"rewards/margins": 0.5825742483139038, |
|
"rewards/rejected": -2.294614315032959, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 26.083643158531725, |
|
"learning_rate": 1.3173529689837354e-07, |
|
"logits/chosen": -2.0513994693756104, |
|
"logits/rejected": -1.986104965209961, |
|
"logps/chosen": -375.33746337890625, |
|
"logps/rejected": -463.71051025390625, |
|
"loss": 0.5067, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.7211412191390991, |
|
"rewards/margins": 0.6738361120223999, |
|
"rewards/rejected": -2.394977331161499, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_logits/chosen": -1.8046900033950806, |
|
"eval_logits/rejected": -1.7967232465744019, |
|
"eval_logps/chosen": -393.0003662109375, |
|
"eval_logps/rejected": -471.0760192871094, |
|
"eval_loss": 0.5447794198989868, |
|
"eval_rewards/accuracies": 0.7243150472640991, |
|
"eval_rewards/chosen": -2.302436351776123, |
|
"eval_rewards/margins": 0.7050578594207764, |
|
"eval_rewards/rejected": -3.0074942111968994, |
|
"eval_runtime": 543.7258, |
|
"eval_samples_per_second": 12.874, |
|
"eval_steps_per_second": 0.403, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 27.1913571170997, |
|
"learning_rate": 1.273295531788156e-07, |
|
"logits/chosen": -1.8818267583847046, |
|
"logits/rejected": -1.8337571620941162, |
|
"logps/chosen": -382.8564758300781, |
|
"logps/rejected": -479.34423828125, |
|
"loss": 0.4961, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9514567852020264, |
|
"rewards/margins": 0.9589536786079407, |
|
"rewards/rejected": -2.9104104042053223, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 30.87994960869174, |
|
"learning_rate": 1.2297343017146726e-07, |
|
"logits/chosen": -1.9285227060317993, |
|
"logits/rejected": -1.879024863243103, |
|
"logps/chosen": -413.84722900390625, |
|
"logps/rejected": -471.5714416503906, |
|
"loss": 0.5172, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.0496084690093994, |
|
"rewards/margins": 0.7111380696296692, |
|
"rewards/rejected": -2.760746479034424, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 30.103114841199876, |
|
"learning_rate": 1.1866868994642534e-07, |
|
"logits/chosen": -1.910308599472046, |
|
"logits/rejected": -1.8798065185546875, |
|
"logps/chosen": -428.7994689941406, |
|
"logps/rejected": -501.61572265625, |
|
"loss": 0.532, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.0664076805114746, |
|
"rewards/margins": 0.800611138343811, |
|
"rewards/rejected": -2.867018938064575, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 33.952742355560765, |
|
"learning_rate": 1.1441707378923474e-07, |
|
"logits/chosen": -1.954697608947754, |
|
"logits/rejected": -1.9360759258270264, |
|
"logps/chosen": -358.89459228515625, |
|
"logps/rejected": -451.65509033203125, |
|
"loss": 0.5161, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.7160298824310303, |
|
"rewards/margins": 0.8581940531730652, |
|
"rewards/rejected": -2.5742239952087402, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 26.669019235150035, |
|
"learning_rate": 1.1022030149653133e-07, |
|
"logits/chosen": -1.8900222778320312, |
|
"logits/rejected": -1.8807737827301025, |
|
"logps/chosen": -370.8710021972656, |
|
"logps/rejected": -462.078857421875, |
|
"loss": 0.5378, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.005030870437622, |
|
"rewards/margins": 0.7315531969070435, |
|
"rewards/rejected": -2.736584186553955, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 29.39370070872787, |
|
"learning_rate": 1.06080070680377e-07, |
|
"logits/chosen": -1.9039020538330078, |
|
"logits/rejected": -1.8967231512069702, |
|
"logps/chosen": -407.52886962890625, |
|
"logps/rejected": -471.5879821777344, |
|
"loss": 0.5022, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.9449284076690674, |
|
"rewards/margins": 0.866219699382782, |
|
"rewards/rejected": -2.811148166656494, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 34.28058476728983, |
|
"learning_rate": 1.01998056081568e-07, |
|
"logits/chosen": -1.947778344154358, |
|
"logits/rejected": -1.933396339416504, |
|
"logps/chosen": -409.4700622558594, |
|
"logps/rejected": -500.43798828125, |
|
"loss": 0.5202, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.1493587493896484, |
|
"rewards/margins": 0.8992208242416382, |
|
"rewards/rejected": -3.048579692840576, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 38.74422247304706, |
|
"learning_rate": 9.797590889219587e-08, |
|
"logits/chosen": -1.9459298849105835, |
|
"logits/rejected": -1.902991533279419, |
|
"logps/chosen": -424.58380126953125, |
|
"logps/rejected": -504.6437072753906, |
|
"loss": 0.5452, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.1141209602355957, |
|
"rewards/margins": 0.8704532384872437, |
|
"rewards/rejected": -2.984574317932129, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 28.244962433086215, |
|
"learning_rate": 9.401525608773292e-08, |
|
"logits/chosen": -1.8756380081176758, |
|
"logits/rejected": -1.8184016942977905, |
|
"logps/chosen": -392.9984130859375, |
|
"logps/rejected": -461.8180236816406, |
|
"loss": 0.5398, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.9684727191925049, |
|
"rewards/margins": 0.7552623748779297, |
|
"rewards/rejected": -2.7237350940704346, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 34.33816096896355, |
|
"learning_rate": 9.011769976891367e-08, |
|
"logits/chosen": -1.903464913368225, |
|
"logits/rejected": -1.8477399349212646, |
|
"logps/chosen": -419.75750732421875, |
|
"logps/rejected": -506.533935546875, |
|
"loss": 0.5095, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.119554042816162, |
|
"rewards/margins": 0.8639281392097473, |
|
"rewards/rejected": -2.9834823608398438, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_logits/chosen": -1.824761152267456, |
|
"eval_logits/rejected": -1.8238047361373901, |
|
"eval_logps/chosen": -383.5680236816406, |
|
"eval_logps/rejected": -460.3614196777344, |
|
"eval_loss": 0.545096218585968, |
|
"eval_rewards/accuracies": 0.7186073064804077, |
|
"eval_rewards/chosen": -2.208112955093384, |
|
"eval_rewards/margins": 0.6922349333763123, |
|
"eval_rewards/rejected": -2.90034818649292, |
|
"eval_runtime": 523.3073, |
|
"eval_samples_per_second": 13.376, |
|
"eval_steps_per_second": 0.418, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 31.642563855828758, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": -1.98309326171875, |
|
"logits/rejected": -1.9879448413848877, |
|
"logps/chosen": -390.30712890625, |
|
"logps/rejected": -448.2904357910156, |
|
"loss": 0.5447, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.7398111820220947, |
|
"rewards/margins": 0.5182247757911682, |
|
"rewards/rejected": -2.258035898208618, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 30.145195997712573, |
|
"learning_rate": 8.251815673944218e-08, |
|
"logits/chosen": -1.9566547870635986, |
|
"logits/rejected": -1.9202098846435547, |
|
"logps/chosen": -395.9665832519531, |
|
"logps/rejected": -496.5779724121094, |
|
"loss": 0.5563, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9744913578033447, |
|
"rewards/margins": 0.9160418510437012, |
|
"rewards/rejected": -2.890532970428467, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 26.76251092001294, |
|
"learning_rate": 7.881924407594129e-08, |
|
"logits/chosen": -1.9259990453720093, |
|
"logits/rejected": -1.8814588785171509, |
|
"logps/chosen": -420.6258850097656, |
|
"logps/rejected": -471.1128845214844, |
|
"loss": 0.5346, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.2005252838134766, |
|
"rewards/margins": 0.6154937744140625, |
|
"rewards/rejected": -2.816019296646118, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 31.270267795635966, |
|
"learning_rate": 7.518957474892148e-08, |
|
"logits/chosen": -1.873970627784729, |
|
"logits/rejected": -1.8780710697174072, |
|
"logps/chosen": -387.642333984375, |
|
"logps/rejected": -460.996337890625, |
|
"loss": 0.561, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.0759739875793457, |
|
"rewards/margins": 0.5735403895378113, |
|
"rewards/rejected": -2.6495144367218018, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 25.79672067849548, |
|
"learning_rate": 7.16306169749074e-08, |
|
"logits/chosen": -1.9269657135009766, |
|
"logits/rejected": -1.8575401306152344, |
|
"logps/chosen": -386.0765380859375, |
|
"logps/rejected": -446.42095947265625, |
|
"loss": 0.5122, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.8944480419158936, |
|
"rewards/margins": 0.7809109687805176, |
|
"rewards/rejected": -2.675359010696411, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 36.12006272451077, |
|
"learning_rate": 6.814381036730274e-08, |
|
"logits/chosen": -1.9610059261322021, |
|
"logits/rejected": -1.9268226623535156, |
|
"logps/chosen": -397.60357666015625, |
|
"logps/rejected": -479.10125732421875, |
|
"loss": 0.5363, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.9262597560882568, |
|
"rewards/margins": 0.6647717952728271, |
|
"rewards/rejected": -2.591031551361084, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 32.103751962383164, |
|
"learning_rate": 6.473056535406035e-08, |
|
"logits/chosen": -1.970505714416504, |
|
"logits/rejected": -1.9748294353485107, |
|
"logps/chosen": -398.49639892578125, |
|
"logps/rejected": -483.1766052246094, |
|
"loss": 0.5542, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.9573405981063843, |
|
"rewards/margins": 0.7238850593566895, |
|
"rewards/rejected": -2.681225538253784, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 29.27367179768827, |
|
"learning_rate": 6.139226260715872e-08, |
|
"logits/chosen": -1.9642279148101807, |
|
"logits/rejected": -1.9199190139770508, |
|
"logps/chosen": -412.734619140625, |
|
"logps/rejected": -482.08740234375, |
|
"loss": 0.5026, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -2.0462894439697266, |
|
"rewards/margins": 0.792068600654602, |
|
"rewards/rejected": -2.838358163833618, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 36.00438391939365, |
|
"learning_rate": 5.8130252484113964e-08, |
|
"logits/chosen": -1.9426565170288086, |
|
"logits/rejected": -1.9226014614105225, |
|
"logps/chosen": -385.748046875, |
|
"logps/rejected": -454.22412109375, |
|
"loss": 0.5491, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8511062860488892, |
|
"rewards/margins": 0.8074220418930054, |
|
"rewards/rejected": -2.6585285663604736, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 32.96643024329086, |
|
"learning_rate": 5.4945854481754734e-08, |
|
"logits/chosen": -1.9304873943328857, |
|
"logits/rejected": -1.900002121925354, |
|
"logps/chosen": -371.5887145996094, |
|
"logps/rejected": -445.46221923828125, |
|
"loss": 0.5265, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.909517526626587, |
|
"rewards/margins": 0.7846697568893433, |
|
"rewards/rejected": -2.6941871643066406, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_logits/chosen": -1.8071422576904297, |
|
"eval_logits/rejected": -1.7997641563415527, |
|
"eval_logps/chosen": -391.1993408203125, |
|
"eval_logps/rejected": -469.79913330078125, |
|
"eval_loss": 0.5436315536499023, |
|
"eval_rewards/accuracies": 0.7214611768722534, |
|
"eval_rewards/chosen": -2.28442645072937, |
|
"eval_rewards/margins": 0.7102989554405212, |
|
"eval_rewards/rejected": -2.994725227355957, |
|
"eval_runtime": 524.298, |
|
"eval_samples_per_second": 13.351, |
|
"eval_steps_per_second": 0.418, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 37.572173988295035, |
|
"learning_rate": 5.184035670247988e-08, |
|
"logits/chosen": -1.934077262878418, |
|
"logits/rejected": -1.920440912246704, |
|
"logps/chosen": -378.1131286621094, |
|
"logps/rejected": -462.83929443359375, |
|
"loss": 0.5312, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9387060403823853, |
|
"rewards/margins": 0.7842427492141724, |
|
"rewards/rejected": -2.7229487895965576, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 35.65341932271922, |
|
"learning_rate": 4.881501533321605e-08, |
|
"logits/chosen": -1.87544846534729, |
|
"logits/rejected": -1.8604532480239868, |
|
"logps/chosen": -388.5731506347656, |
|
"logps/rejected": -475.32330322265625, |
|
"loss": 0.519, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1021876335144043, |
|
"rewards/margins": 0.7690648436546326, |
|
"rewards/rejected": -2.8712525367736816, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 32.19415920453824, |
|
"learning_rate": 4.5871054137284564e-08, |
|
"logits/chosen": -1.9715772867202759, |
|
"logits/rejected": -1.93185555934906, |
|
"logps/chosen": -403.1676025390625, |
|
"logps/rejected": -498.1748962402344, |
|
"loss": 0.5203, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.9203879833221436, |
|
"rewards/margins": 0.8546259999275208, |
|
"rewards/rejected": -2.7750141620635986, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 57.11327150205332, |
|
"learning_rate": 4.300966395938377e-08, |
|
"logits/chosen": -1.9714921712875366, |
|
"logits/rejected": -1.9407069683074951, |
|
"logps/chosen": -409.3877258300781, |
|
"logps/rejected": -456.50244140625, |
|
"loss": 0.5818, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.9431222677230835, |
|
"rewards/margins": 0.5195000767707825, |
|
"rewards/rejected": -2.4626221656799316, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 21.830708345963956, |
|
"learning_rate": 4.023200224388787e-08, |
|
"logits/chosen": -1.9089914560317993, |
|
"logits/rejected": -1.855542778968811, |
|
"logps/chosen": -377.63653564453125, |
|
"logps/rejected": -457.47052001953125, |
|
"loss": 0.5071, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.7652651071548462, |
|
"rewards/margins": 0.8306269645690918, |
|
"rewards/rejected": -2.5958924293518066, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 38.15424519204087, |
|
"learning_rate": 3.7539192566655246e-08, |
|
"logits/chosen": -1.870527982711792, |
|
"logits/rejected": -1.821215271949768, |
|
"logps/chosen": -396.0817565917969, |
|
"logps/rejected": -436.74102783203125, |
|
"loss": 0.5454, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.0135273933410645, |
|
"rewards/margins": 0.600884735584259, |
|
"rewards/rejected": -2.6144118309020996, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 31.36535036359726, |
|
"learning_rate": 3.4932324180537736e-08, |
|
"logits/chosen": -1.926028847694397, |
|
"logits/rejected": -1.9150245189666748, |
|
"logps/chosen": -379.81573486328125, |
|
"logps/rejected": -472.4080505371094, |
|
"loss": 0.5271, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.8782703876495361, |
|
"rewards/margins": 0.8038197755813599, |
|
"rewards/rejected": -2.6820900440216064, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 29.336291125895066, |
|
"learning_rate": 3.24124515747731e-08, |
|
"logits/chosen": -1.9342174530029297, |
|
"logits/rejected": -1.8941189050674438, |
|
"logps/chosen": -409.3456726074219, |
|
"logps/rejected": -470.3741149902344, |
|
"loss": 0.5135, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9259653091430664, |
|
"rewards/margins": 0.7116618752479553, |
|
"rewards/rejected": -2.637627363204956, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 26.986665461110498, |
|
"learning_rate": 2.998059404843947e-08, |
|
"logits/chosen": -1.8567430973052979, |
|
"logits/rejected": -1.8131777048110962, |
|
"logps/chosen": -383.24371337890625, |
|
"logps/rejected": -440.67315673828125, |
|
"loss": 0.5239, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.9675956964492798, |
|
"rewards/margins": 0.6357102990150452, |
|
"rewards/rejected": -2.6033058166503906, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 26.37071629611169, |
|
"learning_rate": 2.763773529814506e-08, |
|
"logits/chosen": -1.8718721866607666, |
|
"logits/rejected": -1.8146419525146484, |
|
"logps/chosen": -384.16162109375, |
|
"logps/rejected": -487.1327209472656, |
|
"loss": 0.4844, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.8588807582855225, |
|
"rewards/margins": 0.849262535572052, |
|
"rewards/rejected": -2.708143711090088, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_logits/chosen": -1.7992874383926392, |
|
"eval_logits/rejected": -1.7887682914733887, |
|
"eval_logps/chosen": -386.97613525390625, |
|
"eval_logps/rejected": -465.661376953125, |
|
"eval_loss": 0.5432813763618469, |
|
"eval_rewards/accuracies": 0.719748854637146, |
|
"eval_rewards/chosen": -2.242194175720215, |
|
"eval_rewards/margins": 0.7111533284187317, |
|
"eval_rewards/rejected": -2.9533474445343018, |
|
"eval_runtime": 546.6018, |
|
"eval_samples_per_second": 12.806, |
|
"eval_steps_per_second": 0.401, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 33.8754723176866, |
|
"learning_rate": 2.5384823020118212e-08, |
|
"logits/chosen": -1.8839702606201172, |
|
"logits/rejected": -1.8382689952850342, |
|
"logps/chosen": -380.5359191894531, |
|
"logps/rejected": -447.4384765625, |
|
"loss": 0.5575, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.76059889793396, |
|
"rewards/margins": 0.7574474215507507, |
|
"rewards/rejected": -2.5180463790893555, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 26.785794346840675, |
|
"learning_rate": 2.3222768526860698e-08, |
|
"logits/chosen": -1.8873153924942017, |
|
"logits/rejected": -1.8357185125350952, |
|
"logps/chosen": -381.34564208984375, |
|
"logps/rejected": -430.72259521484375, |
|
"loss": 0.5231, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.8608728647232056, |
|
"rewards/margins": 0.7487791180610657, |
|
"rewards/rejected": -2.609651803970337, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 30.22760318351379, |
|
"learning_rate": 2.1152446378517818e-08, |
|
"logits/chosen": -1.8901411294937134, |
|
"logits/rejected": -1.839329719543457, |
|
"logps/chosen": -393.60308837890625, |
|
"logps/rejected": -461.057861328125, |
|
"loss": 0.5391, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.9723374843597412, |
|
"rewards/margins": 0.6977485418319702, |
|
"rewards/rejected": -2.670086145401001, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 26.484898729776308, |
|
"learning_rate": 1.9174694029115146e-08, |
|
"logits/chosen": -1.9374538660049438, |
|
"logits/rejected": -1.8765513896942139, |
|
"logps/chosen": -434.1646423339844, |
|
"logps/rejected": -468.3814392089844, |
|
"loss": 0.5262, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.045320987701416, |
|
"rewards/margins": 0.6686374545097351, |
|
"rewards/rejected": -2.713958263397217, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 26.251643116785377, |
|
"learning_rate": 1.7290311487804687e-08, |
|
"logits/chosen": -1.9080512523651123, |
|
"logits/rejected": -1.84622323513031, |
|
"logps/chosen": -375.2956848144531, |
|
"logps/rejected": -463.65765380859375, |
|
"loss": 0.5239, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.9260823726654053, |
|
"rewards/margins": 0.8633429408073425, |
|
"rewards/rejected": -2.7894253730773926, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 22.1873285162568, |
|
"learning_rate": 1.5500060995258134e-08, |
|
"logits/chosen": -1.9254217147827148, |
|
"logits/rejected": -1.8602027893066406, |
|
"logps/chosen": -406.3652648925781, |
|
"logps/rejected": -459.74273681640625, |
|
"loss": 0.5039, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8533226251602173, |
|
"rewards/margins": 0.7270603179931641, |
|
"rewards/rejected": -2.580382823944092, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 31.87656820271237, |
|
"learning_rate": 1.3804666715337116e-08, |
|
"logits/chosen": -1.911505103111267, |
|
"logits/rejected": -1.8812297582626343, |
|
"logps/chosen": -399.38519287109375, |
|
"logps/rejected": -496.04168701171875, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9619176387786865, |
|
"rewards/margins": 0.8611427545547485, |
|
"rewards/rejected": -2.8230605125427246, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 28.90758640199268, |
|
"learning_rate": 1.2204814442165812e-08, |
|
"logits/chosen": -1.8718591928482056, |
|
"logits/rejected": -1.8608993291854858, |
|
"logps/chosen": -397.22100830078125, |
|
"logps/rejected": -456.198486328125, |
|
"loss": 0.5244, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.995205283164978, |
|
"rewards/margins": 0.7341451644897461, |
|
"rewards/rejected": -2.7293505668640137, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 31.58825212692507, |
|
"learning_rate": 1.070115132272445e-08, |
|
"logits/chosen": -1.8871160745620728, |
|
"logits/rejected": -1.827559232711792, |
|
"logps/chosen": -390.8136901855469, |
|
"logps/rejected": -458.43798828125, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.8970882892608643, |
|
"rewards/margins": 0.9112474322319031, |
|
"rewards/rejected": -2.808335781097412, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 29.086437438100777, |
|
"learning_rate": 9.294285595075669e-09, |
|
"logits/chosen": -1.9274282455444336, |
|
"logits/rejected": -1.9093879461288452, |
|
"logps/chosen": -414.84576416015625, |
|
"logps/rejected": -500.18768310546875, |
|
"loss": 0.5612, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.002596616744995, |
|
"rewards/margins": 0.8606833219528198, |
|
"rewards/rejected": -2.8632798194885254, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_logits/chosen": -1.7875818014144897, |
|
"eval_logits/rejected": -1.7765936851501465, |
|
"eval_logps/chosen": -389.4626159667969, |
|
"eval_logps/rejected": -469.811279296875, |
|
"eval_loss": 0.5427327156066895, |
|
"eval_rewards/accuracies": 0.7208904027938843, |
|
"eval_rewards/chosen": -2.267058849334717, |
|
"eval_rewards/margins": 0.727787435054779, |
|
"eval_rewards/rejected": -2.9948465824127197, |
|
"eval_runtime": 523.5533, |
|
"eval_samples_per_second": 13.37, |
|
"eval_steps_per_second": 0.418, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 27.963113959175715, |
|
"learning_rate": 7.984786342329492e-09, |
|
"logits/chosen": -1.9024436473846436, |
|
"logits/rejected": -1.8931423425674438, |
|
"logps/chosen": -392.8687438964844, |
|
"logps/rejected": -472.52203369140625, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.090585470199585, |
|
"rewards/margins": 0.7528320550918579, |
|
"rewards/rejected": -2.843417167663574, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 38.701178208422625, |
|
"learning_rate": 6.773183262446914e-09, |
|
"logits/chosen": -1.9047428369522095, |
|
"logits/rejected": -1.8428666591644287, |
|
"logps/chosen": -408.89788818359375, |
|
"logps/rejected": -467.36309814453125, |
|
"loss": 0.5324, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9106022119522095, |
|
"rewards/margins": 0.8250144720077515, |
|
"rewards/rejected": -2.735616683959961, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 30.118277072421385, |
|
"learning_rate": 5.6599664539749295e-09, |
|
"logits/chosen": -1.9470701217651367, |
|
"logits/rejected": -1.926031470298767, |
|
"logps/chosen": -415.57330322265625, |
|
"logps/rejected": -491.3902282714844, |
|
"loss": 0.5135, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -1.970380425453186, |
|
"rewards/margins": 0.8686148524284363, |
|
"rewards/rejected": -2.8389952182769775, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 37.40579887540256, |
|
"learning_rate": 4.645586217799452e-09, |
|
"logits/chosen": -1.9280283451080322, |
|
"logits/rejected": -1.9276561737060547, |
|
"logps/chosen": -423.79217529296875, |
|
"logps/rejected": -496.7911071777344, |
|
"loss": 0.5503, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.9242738485336304, |
|
"rewards/margins": 0.7551409602165222, |
|
"rewards/rejected": -2.679414749145508, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 32.746312132544105, |
|
"learning_rate": 3.730452874996737e-09, |
|
"logits/chosen": -1.92330801486969, |
|
"logits/rejected": -1.8721330165863037, |
|
"logps/chosen": -395.4407958984375, |
|
"logps/rejected": -454.64178466796875, |
|
"loss": 0.5192, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.9143590927124023, |
|
"rewards/margins": 0.6810831427574158, |
|
"rewards/rejected": -2.595442056655884, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 35.835333829114475, |
|
"learning_rate": 2.9149366008568987e-09, |
|
"logits/chosen": -1.9155769348144531, |
|
"logits/rejected": -1.8720881938934326, |
|
"logps/chosen": -402.2400207519531, |
|
"logps/rejected": -452.55755615234375, |
|
"loss": 0.5239, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.848528265953064, |
|
"rewards/margins": 0.7459059953689575, |
|
"rewards/rejected": -2.5944347381591797, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 27.738054909743056, |
|
"learning_rate": 2.1993672751463576e-09, |
|
"logits/chosen": -1.9466373920440674, |
|
"logits/rejected": -1.9023081064224243, |
|
"logps/chosen": -407.79254150390625, |
|
"logps/rejected": -487.1842346191406, |
|
"loss": 0.499, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.0100741386413574, |
|
"rewards/margins": 0.8334406614303589, |
|
"rewards/rejected": -2.843514919281006, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 28.810543533175498, |
|
"learning_rate": 1.5840343486700215e-09, |
|
"logits/chosen": -1.9565961360931396, |
|
"logits/rejected": -1.8820337057113647, |
|
"logps/chosen": -377.89697265625, |
|
"logps/rejected": -461.2203063964844, |
|
"loss": 0.4816, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7978055477142334, |
|
"rewards/margins": 0.8113381266593933, |
|
"rewards/rejected": -2.6091437339782715, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 26.747095486222516, |
|
"learning_rate": 1.0691867261874154e-09, |
|
"logits/chosen": -1.9276363849639893, |
|
"logits/rejected": -1.8795725107192993, |
|
"logps/chosen": -400.2092590332031, |
|
"logps/rejected": -457.8243103027344, |
|
"loss": 0.5346, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.8860292434692383, |
|
"rewards/margins": 0.778502881526947, |
|
"rewards/rejected": -2.66453218460083, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 21.69669900920832, |
|
"learning_rate": 6.550326657293881e-10, |
|
"logits/chosen": -1.915302038192749, |
|
"logits/rejected": -1.897491216659546, |
|
"logps/chosen": -411.01904296875, |
|
"logps/rejected": -480.05078125, |
|
"loss": 0.5017, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.052908420562744, |
|
"rewards/margins": 0.8378399610519409, |
|
"rewards/rejected": -2.8907482624053955, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_logits/chosen": -1.7904165983200073, |
|
"eval_logits/rejected": -1.7796399593353271, |
|
"eval_logps/chosen": -389.3405456542969, |
|
"eval_logps/rejected": -469.7990417480469, |
|
"eval_loss": 0.542646050453186, |
|
"eval_rewards/accuracies": 0.7214611768722534, |
|
"eval_rewards/chosen": -2.2658379077911377, |
|
"eval_rewards/margins": 0.7288866639137268, |
|
"eval_rewards/rejected": -2.9947245121002197, |
|
"eval_runtime": 549.1787, |
|
"eval_samples_per_second": 12.746, |
|
"eval_steps_per_second": 0.399, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 27.017405119205325, |
|
"learning_rate": 3.4173969435710715e-10, |
|
"logits/chosen": -1.907459020614624, |
|
"logits/rejected": -1.9028345346450806, |
|
"logps/chosen": -404.1794738769531, |
|
"logps/rejected": -475.02490234375, |
|
"loss": 0.5398, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.1053857803344727, |
|
"rewards/margins": 0.5925677418708801, |
|
"rewards/rejected": -2.697953701019287, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 28.429723265538055, |
|
"learning_rate": 1.2943454039654467e-10, |
|
"logits/chosen": -1.8941481113433838, |
|
"logits/rejected": -1.8836424350738525, |
|
"logps/chosen": -381.0569152832031, |
|
"logps/rejected": -472.2247009277344, |
|
"loss": 0.5433, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.9636991024017334, |
|
"rewards/margins": 0.7842205762863159, |
|
"rewards/rejected": -2.7479193210601807, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 23.62637877799544, |
|
"learning_rate": 1.8203082176287964e-11, |
|
"logits/chosen": -1.8356783390045166, |
|
"logits/rejected": -1.797844648361206, |
|
"logps/chosen": -407.73516845703125, |
|
"logps/rejected": -473.55120849609375, |
|
"loss": 0.5069, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.220022201538086, |
|
"rewards/margins": 0.7199020385742188, |
|
"rewards/rejected": -2.9399242401123047, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1736, |
|
"total_flos": 0.0, |
|
"train_loss": 0.55459001399405, |
|
"train_runtime": 38266.551, |
|
"train_samples_per_second": 2.904, |
|
"train_steps_per_second": 0.045 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1736, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|