|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 1455, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.36986301369863e-08, |
|
"logits/chosen": -2.8295512199401855, |
|
"logits/rejected": -2.9639337062835693, |
|
"logps/chosen": -242.64569091796875, |
|
"logps/rejected": -75.87144470214844, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.36986301369863e-07, |
|
"logits/chosen": -2.778250217437744, |
|
"logits/rejected": -2.813397169113159, |
|
"logps/chosen": -292.17218017578125, |
|
"logps/rejected": -78.88499450683594, |
|
"loss": 0.6942, |
|
"rewards/accuracies": 0.4097222089767456, |
|
"rewards/chosen": -0.001105638686567545, |
|
"rewards/margins": -0.0026314095593988895, |
|
"rewards/rejected": 0.0015257701743394136, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.73972602739726e-07, |
|
"logits/chosen": -2.76747465133667, |
|
"logits/rejected": -2.7585418224334717, |
|
"logps/chosen": -300.1101989746094, |
|
"logps/rejected": -81.14244079589844, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -0.005251293070614338, |
|
"rewards/margins": -0.003377618733793497, |
|
"rewards/rejected": -0.001873674220405519, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.10958904109589e-07, |
|
"logits/chosen": -2.776379108428955, |
|
"logits/rejected": -2.7856156826019287, |
|
"logps/chosen": -290.00897216796875, |
|
"logps/rejected": -78.96559143066406, |
|
"loss": 0.6885, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.005190015770494938, |
|
"rewards/margins": 0.011348642408847809, |
|
"rewards/rejected": -0.006158626172691584, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5.47945205479452e-07, |
|
"logits/chosen": -2.7960293292999268, |
|
"logits/rejected": -2.814054250717163, |
|
"logps/chosen": -245.3889617919922, |
|
"logps/rejected": -84.38166046142578, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.008710930123925209, |
|
"rewards/margins": 0.027255941182374954, |
|
"rewards/rejected": -0.018545005470514297, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 6.84931506849315e-07, |
|
"logits/chosen": -2.7724318504333496, |
|
"logits/rejected": -2.8249223232269287, |
|
"logps/chosen": -252.29598999023438, |
|
"logps/rejected": -80.92951965332031, |
|
"loss": 0.6751, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.00529166916385293, |
|
"rewards/margins": 0.03346817195415497, |
|
"rewards/rejected": -0.028176506981253624, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 8.21917808219178e-07, |
|
"logits/chosen": -2.8436636924743652, |
|
"logits/rejected": -2.7858223915100098, |
|
"logps/chosen": -281.6982421875, |
|
"logps/rejected": -78.57672119140625, |
|
"loss": 0.6656, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 0.0231462512165308, |
|
"rewards/margins": 0.06612209975719452, |
|
"rewards/rejected": -0.04297585040330887, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.58904109589041e-07, |
|
"logits/chosen": -2.8172953128814697, |
|
"logits/rejected": -2.813781261444092, |
|
"logps/chosen": -264.85235595703125, |
|
"logps/rejected": -74.86351013183594, |
|
"loss": 0.6541, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.026092741638422012, |
|
"rewards/margins": 0.0869915708899498, |
|
"rewards/rejected": -0.060898829251527786, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.095890410958904e-06, |
|
"logits/chosen": -2.8075528144836426, |
|
"logits/rejected": -2.8162968158721924, |
|
"logps/chosen": -255.69869995117188, |
|
"logps/rejected": -73.08162689208984, |
|
"loss": 0.6355, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.0321514829993248, |
|
"rewards/margins": 0.11635198444128036, |
|
"rewards/rejected": -0.08420050889253616, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.232876712328767e-06, |
|
"logits/chosen": -2.841773748397827, |
|
"logits/rejected": -2.825883388519287, |
|
"logps/chosen": -267.5022888183594, |
|
"logps/rejected": -85.21376037597656, |
|
"loss": 0.6119, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.030611341819167137, |
|
"rewards/margins": 0.16599974036216736, |
|
"rewards/rejected": -0.13538840413093567, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.36986301369863e-06, |
|
"logits/chosen": -2.852410316467285, |
|
"logits/rejected": -2.8519997596740723, |
|
"logps/chosen": -286.9059143066406, |
|
"logps/rejected": -82.15430450439453, |
|
"loss": 0.5808, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": 0.057482797652482986, |
|
"rewards/margins": 0.26051220297813416, |
|
"rewards/rejected": -0.20302939414978027, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5068493150684932e-06, |
|
"logits/chosen": -2.7674930095672607, |
|
"logits/rejected": -2.7689127922058105, |
|
"logps/chosen": -277.5893249511719, |
|
"logps/rejected": -84.25486755371094, |
|
"loss": 0.5528, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.05910978466272354, |
|
"rewards/margins": 0.3146067261695862, |
|
"rewards/rejected": -0.25549691915512085, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.643835616438356e-06, |
|
"logits/chosen": -2.7497572898864746, |
|
"logits/rejected": -2.808637857437134, |
|
"logps/chosen": -277.6904602050781, |
|
"logps/rejected": -82.65525817871094, |
|
"loss": 0.5245, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.08153042942285538, |
|
"rewards/margins": 0.4226166605949402, |
|
"rewards/rejected": -0.3410862386226654, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.780821917808219e-06, |
|
"logits/chosen": -2.765390634536743, |
|
"logits/rejected": -2.7765676975250244, |
|
"logps/chosen": -284.7118835449219, |
|
"logps/rejected": -90.37391662597656, |
|
"loss": 0.5057, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.1007312536239624, |
|
"rewards/margins": 0.49337419867515564, |
|
"rewards/rejected": -0.39264291524887085, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.917808219178082e-06, |
|
"logits/chosen": -2.8435187339782715, |
|
"logits/rejected": -2.848928213119507, |
|
"logps/chosen": -293.25323486328125, |
|
"logps/rejected": -88.73191833496094, |
|
"loss": 0.4691, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.09035232663154602, |
|
"rewards/margins": 0.6013891696929932, |
|
"rewards/rejected": -0.5110368132591248, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9938884644766997e-06, |
|
"logits/chosen": -2.788867235183716, |
|
"logits/rejected": -2.797130584716797, |
|
"logps/chosen": -290.54638671875, |
|
"logps/rejected": -82.95954895019531, |
|
"loss": 0.4398, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.09947594255208969, |
|
"rewards/margins": 0.6646188497543335, |
|
"rewards/rejected": -0.5651428699493408, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.978609625668449e-06, |
|
"logits/chosen": -2.809487819671631, |
|
"logits/rejected": -2.8850138187408447, |
|
"logps/chosen": -271.88629150390625, |
|
"logps/rejected": -83.17223358154297, |
|
"loss": 0.4204, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.07071445882320404, |
|
"rewards/margins": 0.7117933034896851, |
|
"rewards/rejected": -0.6410789489746094, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9633307868601984e-06, |
|
"logits/chosen": -2.7891199588775635, |
|
"logits/rejected": -2.789405584335327, |
|
"logps/chosen": -288.9054870605469, |
|
"logps/rejected": -86.50032043457031, |
|
"loss": 0.4053, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.13311851024627686, |
|
"rewards/margins": 0.861064076423645, |
|
"rewards/rejected": -0.7279455065727234, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.948051948051948e-06, |
|
"logits/chosen": -2.760305643081665, |
|
"logits/rejected": -2.7756900787353516, |
|
"logps/chosen": -271.7988586425781, |
|
"logps/rejected": -88.26414489746094, |
|
"loss": 0.3764, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 0.1391439437866211, |
|
"rewards/margins": 0.9942502975463867, |
|
"rewards/rejected": -0.8551063537597656, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.9327731092436974e-06, |
|
"logits/chosen": -2.7618346214294434, |
|
"logits/rejected": -2.7937939167022705, |
|
"logps/chosen": -274.2486877441406, |
|
"logps/rejected": -89.63475799560547, |
|
"loss": 0.3633, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.06488532572984695, |
|
"rewards/margins": 0.9964747428894043, |
|
"rewards/rejected": -0.9315894246101379, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.917494270435447e-06, |
|
"logits/chosen": -2.834862232208252, |
|
"logits/rejected": -2.8097951412200928, |
|
"logps/chosen": -267.5804748535156, |
|
"logps/rejected": -88.04959869384766, |
|
"loss": 0.3405, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 0.11280632019042969, |
|
"rewards/margins": 1.167088508605957, |
|
"rewards/rejected": -1.054282307624817, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.9022154316271963e-06, |
|
"logits/chosen": -2.789674997329712, |
|
"logits/rejected": -2.7489635944366455, |
|
"logps/chosen": -271.354736328125, |
|
"logps/rejected": -101.02803039550781, |
|
"loss": 0.3406, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.08271731436252594, |
|
"rewards/margins": 1.1206178665161133, |
|
"rewards/rejected": -1.037900447845459, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.8869365928189456e-06, |
|
"logits/chosen": -2.8190276622772217, |
|
"logits/rejected": -2.7801876068115234, |
|
"logps/chosen": -252.3684844970703, |
|
"logps/rejected": -83.58897399902344, |
|
"loss": 0.3366, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": 0.047989681363105774, |
|
"rewards/margins": 1.1506556272506714, |
|
"rewards/rejected": -1.102665901184082, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.8716577540106951e-06, |
|
"logits/chosen": -2.7455031871795654, |
|
"logits/rejected": -2.7612051963806152, |
|
"logps/chosen": -296.0608825683594, |
|
"logps/rejected": -85.1841049194336, |
|
"loss": 0.3066, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.12809792160987854, |
|
"rewards/margins": 1.3899600505828857, |
|
"rewards/rejected": -1.2618623971939087, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.8563789152024445e-06, |
|
"logits/chosen": -2.775570869445801, |
|
"logits/rejected": -2.819958209991455, |
|
"logps/chosen": -282.4068908691406, |
|
"logps/rejected": -89.96455383300781, |
|
"loss": 0.3055, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.09846373647451401, |
|
"rewards/margins": 1.4890722036361694, |
|
"rewards/rejected": -1.390608549118042, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.841100076394194e-06, |
|
"logits/chosen": -2.799436092376709, |
|
"logits/rejected": -2.8345470428466797, |
|
"logps/chosen": -265.59735107421875, |
|
"logps/rejected": -96.5230941772461, |
|
"loss": 0.2986, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.011983467265963554, |
|
"rewards/margins": 1.4505140781402588, |
|
"rewards/rejected": -1.462497591972351, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.8258212375859433e-06, |
|
"logits/chosen": -2.767268419265747, |
|
"logits/rejected": -2.7291500568389893, |
|
"logps/chosen": -284.69512939453125, |
|
"logps/rejected": -84.67759704589844, |
|
"loss": 0.2975, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": 0.13364484906196594, |
|
"rewards/margins": 1.5072095394134521, |
|
"rewards/rejected": -1.373564600944519, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.8105423987776928e-06, |
|
"logits/chosen": -2.740736961364746, |
|
"logits/rejected": -2.7466208934783936, |
|
"logps/chosen": -276.2334899902344, |
|
"logps/rejected": -102.246826171875, |
|
"loss": 0.271, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.018421288579702377, |
|
"rewards/margins": 1.600907325744629, |
|
"rewards/rejected": -1.5824859142303467, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.7952635599694424e-06, |
|
"logits/chosen": -2.763476610183716, |
|
"logits/rejected": -2.826669216156006, |
|
"logps/chosen": -256.71234130859375, |
|
"logps/rejected": -84.85465240478516, |
|
"loss": 0.2796, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.021867703646421432, |
|
"rewards/margins": 1.4527885913848877, |
|
"rewards/rejected": -1.474656343460083, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.7799847211611915e-06, |
|
"logits/chosen": -2.778783082962036, |
|
"logits/rejected": -2.7868287563323975, |
|
"logps/chosen": -278.37847900390625, |
|
"logps/rejected": -101.5394287109375, |
|
"loss": 0.2617, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.06040378659963608, |
|
"rewards/margins": 1.8995882272720337, |
|
"rewards/rejected": -1.8391841650009155, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.764705882352941e-06, |
|
"logits/chosen": -2.727238893508911, |
|
"logits/rejected": -2.723588466644287, |
|
"logps/chosen": -275.4689025878906, |
|
"logps/rejected": -93.48865509033203, |
|
"loss": 0.2612, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.02573985978960991, |
|
"rewards/margins": 1.606034517288208, |
|
"rewards/rejected": -1.5802944898605347, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7494270435446906e-06, |
|
"logits/chosen": -2.8035991191864014, |
|
"logits/rejected": -2.8192009925842285, |
|
"logps/chosen": -284.1933898925781, |
|
"logps/rejected": -100.13687896728516, |
|
"loss": 0.2521, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.07830613851547241, |
|
"rewards/margins": 1.9092552661895752, |
|
"rewards/rejected": -1.9875609874725342, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7341482047364399e-06, |
|
"logits/chosen": -2.8154501914978027, |
|
"logits/rejected": -2.810351848602295, |
|
"logps/chosen": -274.22906494140625, |
|
"logps/rejected": -105.29779052734375, |
|
"loss": 0.2418, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.09999797493219376, |
|
"rewards/margins": 1.8004556894302368, |
|
"rewards/rejected": -1.9004535675048828, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7188693659281894e-06, |
|
"logits/chosen": -2.7782671451568604, |
|
"logits/rejected": -2.7969181537628174, |
|
"logps/chosen": -253.5823516845703, |
|
"logps/rejected": -91.79356384277344, |
|
"loss": 0.238, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.16337540745735168, |
|
"rewards/margins": 1.8200305700302124, |
|
"rewards/rejected": -1.9834058284759521, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.703590527119939e-06, |
|
"logits/chosen": -2.7838528156280518, |
|
"logits/rejected": -2.7845559120178223, |
|
"logps/chosen": -276.6927795410156, |
|
"logps/rejected": -105.1791763305664, |
|
"loss": 0.2408, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.11859698593616486, |
|
"rewards/margins": 1.980875015258789, |
|
"rewards/rejected": -2.0994718074798584, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.688311688311688e-06, |
|
"logits/chosen": -2.8013651371002197, |
|
"logits/rejected": -2.7973175048828125, |
|
"logps/chosen": -280.02337646484375, |
|
"logps/rejected": -108.97808837890625, |
|
"loss": 0.2366, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.17565980553627014, |
|
"rewards/margins": 2.0567195415496826, |
|
"rewards/rejected": -2.23237943649292, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.6730328495034376e-06, |
|
"logits/chosen": -2.7511441707611084, |
|
"logits/rejected": -2.79335355758667, |
|
"logps/chosen": -295.89630126953125, |
|
"logps/rejected": -102.7479476928711, |
|
"loss": 0.2329, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.09856131672859192, |
|
"rewards/margins": 2.122011184692383, |
|
"rewards/rejected": -2.2205727100372314, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6577540106951871e-06, |
|
"logits/chosen": -2.7538399696350098, |
|
"logits/rejected": -2.7933411598205566, |
|
"logps/chosen": -278.8785705566406, |
|
"logps/rejected": -95.23030090332031, |
|
"loss": 0.2309, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.1667167693376541, |
|
"rewards/margins": 2.049661159515381, |
|
"rewards/rejected": -2.2163777351379395, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.6424751718869364e-06, |
|
"logits/chosen": -2.7607922554016113, |
|
"logits/rejected": -2.763124942779541, |
|
"logps/chosen": -270.15484619140625, |
|
"logps/rejected": -103.03666687011719, |
|
"loss": 0.2252, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.10847791284322739, |
|
"rewards/margins": 2.2359797954559326, |
|
"rewards/rejected": -2.3444576263427734, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.627196333078686e-06, |
|
"logits/chosen": -2.7692110538482666, |
|
"logits/rejected": -2.7727513313293457, |
|
"logps/chosen": -284.1410217285156, |
|
"logps/rejected": -103.25160217285156, |
|
"loss": 0.2183, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.07571631669998169, |
|
"rewards/margins": 2.3671867847442627, |
|
"rewards/rejected": -2.4429030418395996, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.6119174942704355e-06, |
|
"logits/chosen": -2.713252544403076, |
|
"logits/rejected": -2.695067882537842, |
|
"logps/chosen": -246.3933563232422, |
|
"logps/rejected": -98.11837768554688, |
|
"loss": 0.2068, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.18476447463035583, |
|
"rewards/margins": 2.0507025718688965, |
|
"rewards/rejected": -2.2354671955108643, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.5966386554621848e-06, |
|
"logits/chosen": -2.788705348968506, |
|
"logits/rejected": -2.7501461505889893, |
|
"logps/chosen": -268.0991516113281, |
|
"logps/rejected": -102.09769439697266, |
|
"loss": 0.1976, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.12583544850349426, |
|
"rewards/margins": 2.4059669971466064, |
|
"rewards/rejected": -2.5318026542663574, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.5813598166539341e-06, |
|
"logits/chosen": -2.728642702102661, |
|
"logits/rejected": -2.7183501720428467, |
|
"logps/chosen": -277.0406188964844, |
|
"logps/rejected": -108.7503890991211, |
|
"loss": 0.2093, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.2682625353336334, |
|
"rewards/margins": 2.486672878265381, |
|
"rewards/rejected": -2.7549355030059814, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.5660809778456837e-06, |
|
"logits/chosen": -2.755733013153076, |
|
"logits/rejected": -2.794435977935791, |
|
"logps/chosen": -267.26983642578125, |
|
"logps/rejected": -103.68330383300781, |
|
"loss": 0.1918, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.2670840322971344, |
|
"rewards/margins": 2.529799461364746, |
|
"rewards/rejected": -2.7968833446502686, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.550802139037433e-06, |
|
"logits/chosen": -2.722480058670044, |
|
"logits/rejected": -2.7511894702911377, |
|
"logps/chosen": -271.193359375, |
|
"logps/rejected": -101.18013000488281, |
|
"loss": 0.1901, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.22587260603904724, |
|
"rewards/margins": 2.3586864471435547, |
|
"rewards/rejected": -2.584559202194214, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.5355233002291825e-06, |
|
"logits/chosen": -2.724370241165161, |
|
"logits/rejected": -2.754976511001587, |
|
"logps/chosen": -300.2513122558594, |
|
"logps/rejected": -107.4105224609375, |
|
"loss": 0.2001, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.2436836212873459, |
|
"rewards/margins": 2.5888543128967285, |
|
"rewards/rejected": -2.83253812789917, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.520244461420932e-06, |
|
"logits/chosen": -2.7703804969787598, |
|
"logits/rejected": -2.743251323699951, |
|
"logps/chosen": -304.1031799316406, |
|
"logps/rejected": -113.75921630859375, |
|
"loss": 0.2065, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.15126582980155945, |
|
"rewards/margins": 2.703249931335449, |
|
"rewards/rejected": -2.854515552520752, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.5049656226126814e-06, |
|
"logits/chosen": -2.7441253662109375, |
|
"logits/rejected": -2.7229297161102295, |
|
"logps/chosen": -240.0807342529297, |
|
"logps/rejected": -100.44654846191406, |
|
"loss": 0.2019, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.32086318731307983, |
|
"rewards/margins": 2.4921185970306396, |
|
"rewards/rejected": -2.8129820823669434, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.4896867838044307e-06, |
|
"logits/chosen": -2.704709529876709, |
|
"logits/rejected": -2.7237319946289062, |
|
"logps/chosen": -255.7570343017578, |
|
"logps/rejected": -113.80975341796875, |
|
"loss": 0.2024, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.40018701553344727, |
|
"rewards/margins": 2.4386959075927734, |
|
"rewards/rejected": -2.8388831615448, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -2.7680206298828125, |
|
"eval_logits/rejected": -2.7271738052368164, |
|
"eval_logps/chosen": -277.8106689453125, |
|
"eval_logps/rejected": -191.96604919433594, |
|
"eval_loss": 0.4197174608707428, |
|
"eval_rewards/accuracies": 0.80859375, |
|
"eval_rewards/chosen": -0.3973674178123474, |
|
"eval_rewards/margins": 1.4956284761428833, |
|
"eval_rewards/rejected": -1.892995834350586, |
|
"eval_runtime": 259.9183, |
|
"eval_samples_per_second": 7.695, |
|
"eval_steps_per_second": 0.062, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.4744079449961802e-06, |
|
"logits/chosen": -2.7513904571533203, |
|
"logits/rejected": -2.8016788959503174, |
|
"logps/chosen": -279.7330017089844, |
|
"logps/rejected": -109.1887435913086, |
|
"loss": 0.1934, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.32768136262893677, |
|
"rewards/margins": 2.722477436065674, |
|
"rewards/rejected": -3.0501585006713867, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.4591291061879296e-06, |
|
"logits/chosen": -2.705814838409424, |
|
"logits/rejected": -2.6782376766204834, |
|
"logps/chosen": -274.86480712890625, |
|
"logps/rejected": -113.91358947753906, |
|
"loss": 0.1797, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.2557370066642761, |
|
"rewards/margins": 2.544098377227783, |
|
"rewards/rejected": -2.799835205078125, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.443850267379679e-06, |
|
"logits/chosen": -2.7348856925964355, |
|
"logits/rejected": -2.7256247997283936, |
|
"logps/chosen": -260.6488342285156, |
|
"logps/rejected": -98.91087341308594, |
|
"loss": 0.1826, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.3437056541442871, |
|
"rewards/margins": 2.4448533058166504, |
|
"rewards/rejected": -2.7885591983795166, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"logits/chosen": -2.799448013305664, |
|
"logits/rejected": -2.778778076171875, |
|
"logps/chosen": -279.9732360839844, |
|
"logps/rejected": -124.17852783203125, |
|
"loss": 0.1678, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.4281793236732483, |
|
"rewards/margins": 2.834944248199463, |
|
"rewards/rejected": -3.2631232738494873, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.413292589763178e-06, |
|
"logits/chosen": -2.7671663761138916, |
|
"logits/rejected": -2.7472527027130127, |
|
"logps/chosen": -306.42181396484375, |
|
"logps/rejected": -113.77569580078125, |
|
"loss": 0.1743, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.31418782472610474, |
|
"rewards/margins": 2.8712990283966064, |
|
"rewards/rejected": -3.1854867935180664, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.3980137509549275e-06, |
|
"logits/chosen": -2.7520015239715576, |
|
"logits/rejected": -2.772326946258545, |
|
"logps/chosen": -247.4855499267578, |
|
"logps/rejected": -101.20677185058594, |
|
"loss": 0.1779, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.4306362271308899, |
|
"rewards/margins": 2.6422040462493896, |
|
"rewards/rejected": -3.072840452194214, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.3827349121466768e-06, |
|
"logits/chosen": -2.685715436935425, |
|
"logits/rejected": -2.705381155014038, |
|
"logps/chosen": -269.1335144042969, |
|
"logps/rejected": -111.51566314697266, |
|
"loss": 0.1742, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.4332028329372406, |
|
"rewards/margins": 2.756269693374634, |
|
"rewards/rejected": -3.1894726753234863, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.3674560733384261e-06, |
|
"logits/chosen": -2.737140417098999, |
|
"logits/rejected": -2.7757163047790527, |
|
"logps/chosen": -294.99530029296875, |
|
"logps/rejected": -112.22420501708984, |
|
"loss": 0.163, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.3779616951942444, |
|
"rewards/margins": 3.002772331237793, |
|
"rewards/rejected": -3.3807339668273926, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.3521772345301757e-06, |
|
"logits/chosen": -2.765671730041504, |
|
"logits/rejected": -2.776446580886841, |
|
"logps/chosen": -291.44732666015625, |
|
"logps/rejected": -112.7077865600586, |
|
"loss": 0.1604, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.1892886459827423, |
|
"rewards/margins": 3.2649528980255127, |
|
"rewards/rejected": -3.4542412757873535, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.3368983957219252e-06, |
|
"logits/chosen": -2.711365222930908, |
|
"logits/rejected": -2.761141300201416, |
|
"logps/chosen": -246.60989379882812, |
|
"logps/rejected": -106.89306640625, |
|
"loss": 0.1662, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -0.5674314498901367, |
|
"rewards/margins": 2.7119510173797607, |
|
"rewards/rejected": -3.2793827056884766, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.3216195569136745e-06, |
|
"logits/chosen": -2.706845283508301, |
|
"logits/rejected": -2.7259907722473145, |
|
"logps/chosen": -266.33453369140625, |
|
"logps/rejected": -109.5294189453125, |
|
"loss": 0.1638, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.49850577116012573, |
|
"rewards/margins": 2.929001569747925, |
|
"rewards/rejected": -3.4275078773498535, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.306340718105424e-06, |
|
"logits/chosen": -2.7694671154022217, |
|
"logits/rejected": -2.7433857917785645, |
|
"logps/chosen": -279.09429931640625, |
|
"logps/rejected": -113.45039367675781, |
|
"loss": 0.1663, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.5474687218666077, |
|
"rewards/margins": 2.8283703327178955, |
|
"rewards/rejected": -3.3758392333984375, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.2910618792971732e-06, |
|
"logits/chosen": -2.724958896636963, |
|
"logits/rejected": -2.6607446670532227, |
|
"logps/chosen": -268.52862548828125, |
|
"logps/rejected": -99.50247955322266, |
|
"loss": 0.1663, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.39182907342910767, |
|
"rewards/margins": 2.8105075359344482, |
|
"rewards/rejected": -3.2023367881774902, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.2757830404889227e-06, |
|
"logits/chosen": -2.7382755279541016, |
|
"logits/rejected": -2.747498035430908, |
|
"logps/chosen": -295.7679748535156, |
|
"logps/rejected": -118.82535552978516, |
|
"loss": 0.1486, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.37939247488975525, |
|
"rewards/margins": 3.1222548484802246, |
|
"rewards/rejected": -3.5016472339630127, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.2605042016806722e-06, |
|
"logits/chosen": -2.7766032218933105, |
|
"logits/rejected": -2.7410993576049805, |
|
"logps/chosen": -282.5389099121094, |
|
"logps/rejected": -115.2748794555664, |
|
"loss": 0.153, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.4390658736228943, |
|
"rewards/margins": 3.1960551738739014, |
|
"rewards/rejected": -3.6351211071014404, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.2452253628724215e-06, |
|
"logits/chosen": -2.7750675678253174, |
|
"logits/rejected": -2.781956195831299, |
|
"logps/chosen": -299.9912109375, |
|
"logps/rejected": -124.12471008300781, |
|
"loss": 0.1546, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.4881526827812195, |
|
"rewards/margins": 3.3200020790100098, |
|
"rewards/rejected": -3.808154582977295, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.229946524064171e-06, |
|
"logits/chosen": -2.7297987937927246, |
|
"logits/rejected": -2.668787956237793, |
|
"logps/chosen": -285.3520202636719, |
|
"logps/rejected": -114.99342346191406, |
|
"loss": 0.1551, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.43964657187461853, |
|
"rewards/margins": 3.080749988555908, |
|
"rewards/rejected": -3.5203967094421387, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.2146676852559206e-06, |
|
"logits/chosen": -2.70336651802063, |
|
"logits/rejected": -2.7211241722106934, |
|
"logps/chosen": -288.2897644042969, |
|
"logps/rejected": -117.46827697753906, |
|
"loss": 0.1598, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.5533924102783203, |
|
"rewards/margins": 3.033695697784424, |
|
"rewards/rejected": -3.587088108062744, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.19938884644767e-06, |
|
"logits/chosen": -2.7294211387634277, |
|
"logits/rejected": -2.776686191558838, |
|
"logps/chosen": -279.3514709472656, |
|
"logps/rejected": -110.04444885253906, |
|
"loss": 0.1681, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.5076431035995483, |
|
"rewards/margins": 3.0213823318481445, |
|
"rewards/rejected": -3.5290253162384033, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.1841100076394192e-06, |
|
"logits/chosen": -2.749238967895508, |
|
"logits/rejected": -2.7573282718658447, |
|
"logps/chosen": -270.27874755859375, |
|
"logps/rejected": -111.33082580566406, |
|
"loss": 0.1629, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.4736942648887634, |
|
"rewards/margins": 3.030879259109497, |
|
"rewards/rejected": -3.5045738220214844, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.1688311688311688e-06, |
|
"logits/chosen": -2.7193169593811035, |
|
"logits/rejected": -2.7363669872283936, |
|
"logps/chosen": -299.65753173828125, |
|
"logps/rejected": -120.58744812011719, |
|
"loss": 0.1436, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.4750828146934509, |
|
"rewards/margins": 3.319221019744873, |
|
"rewards/rejected": -3.794304370880127, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.153552330022918e-06, |
|
"logits/chosen": -2.7201449871063232, |
|
"logits/rejected": -2.7599716186523438, |
|
"logps/chosen": -285.97869873046875, |
|
"logps/rejected": -111.79976654052734, |
|
"loss": 0.1637, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.46921300888061523, |
|
"rewards/margins": 3.1174368858337402, |
|
"rewards/rejected": -3.5866501331329346, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.1382734912146676e-06, |
|
"logits/chosen": -2.683954954147339, |
|
"logits/rejected": -2.760031223297119, |
|
"logps/chosen": -285.8088073730469, |
|
"logps/rejected": -114.0859375, |
|
"loss": 0.1569, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.4851107597351074, |
|
"rewards/margins": 3.097794771194458, |
|
"rewards/rejected": -3.5829052925109863, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.1229946524064172e-06, |
|
"logits/chosen": -2.695244550704956, |
|
"logits/rejected": -2.7127084732055664, |
|
"logps/chosen": -258.01812744140625, |
|
"logps/rejected": -116.52490234375, |
|
"loss": 0.148, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.48574456572532654, |
|
"rewards/margins": 3.0761725902557373, |
|
"rewards/rejected": -3.5619170665740967, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.1077158135981665e-06, |
|
"logits/chosen": -2.6948394775390625, |
|
"logits/rejected": -2.7225587368011475, |
|
"logps/chosen": -247.1420440673828, |
|
"logps/rejected": -108.1529312133789, |
|
"loss": 0.1546, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.6709108352661133, |
|
"rewards/margins": 2.9054205417633057, |
|
"rewards/rejected": -3.5763309001922607, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.0924369747899158e-06, |
|
"logits/chosen": -2.776315450668335, |
|
"logits/rejected": -2.776728868484497, |
|
"logps/chosen": -270.6875915527344, |
|
"logps/rejected": -114.87939453125, |
|
"loss": 0.1503, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.6296391487121582, |
|
"rewards/margins": 3.1651525497436523, |
|
"rewards/rejected": -3.7947916984558105, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 1.0771581359816653e-06, |
|
"logits/chosen": -2.733975887298584, |
|
"logits/rejected": -2.6755785942077637, |
|
"logps/chosen": -253.8512420654297, |
|
"logps/rejected": -123.96826171875, |
|
"loss": 0.1483, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.5190945267677307, |
|
"rewards/margins": 3.2409470081329346, |
|
"rewards/rejected": -3.7600414752960205, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.0618792971734147e-06, |
|
"logits/chosen": -2.6686155796051025, |
|
"logits/rejected": -2.7576305866241455, |
|
"logps/chosen": -315.3510437011719, |
|
"logps/rejected": -116.48152160644531, |
|
"loss": 0.1401, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.4066368043422699, |
|
"rewards/margins": 3.404694080352783, |
|
"rewards/rejected": -3.811330795288086, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.0466004583651642e-06, |
|
"logits/chosen": -2.7223763465881348, |
|
"logits/rejected": -2.7043228149414062, |
|
"logps/chosen": -285.30767822265625, |
|
"logps/rejected": -123.26036071777344, |
|
"loss": 0.1394, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.6819589734077454, |
|
"rewards/margins": 3.2477848529815674, |
|
"rewards/rejected": -3.929744005203247, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.0313216195569137e-06, |
|
"logits/chosen": -2.710700511932373, |
|
"logits/rejected": -2.748891592025757, |
|
"logps/chosen": -267.1021423339844, |
|
"logps/rejected": -108.77484130859375, |
|
"loss": 0.15, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.7118343710899353, |
|
"rewards/margins": 3.120725154876709, |
|
"rewards/rejected": -3.832559585571289, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.016042780748663e-06, |
|
"logits/chosen": -2.6949081420898438, |
|
"logits/rejected": -2.6823809146881104, |
|
"logps/chosen": -278.98101806640625, |
|
"logps/rejected": -121.89730072021484, |
|
"loss": 0.1449, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.5939213037490845, |
|
"rewards/margins": 3.3531200885772705, |
|
"rewards/rejected": -3.9470412731170654, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.0007639419404126e-06, |
|
"logits/chosen": -2.710453748703003, |
|
"logits/rejected": -2.720715045928955, |
|
"logps/chosen": -277.281982421875, |
|
"logps/rejected": -114.69615173339844, |
|
"loss": 0.1473, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.6140307188034058, |
|
"rewards/margins": 3.254765748977661, |
|
"rewards/rejected": -3.8687965869903564, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 9.85485103132162e-07, |
|
"logits/chosen": -2.7251040935516357, |
|
"logits/rejected": -2.693974494934082, |
|
"logps/chosen": -286.97686767578125, |
|
"logps/rejected": -119.7470703125, |
|
"loss": 0.1369, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.5266898274421692, |
|
"rewards/margins": 3.49212384223938, |
|
"rewards/rejected": -4.018813133239746, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 9.702062643239114e-07, |
|
"logits/chosen": -2.7554683685302734, |
|
"logits/rejected": -2.762106418609619, |
|
"logps/chosen": -289.8918762207031, |
|
"logps/rejected": -125.37986755371094, |
|
"loss": 0.1401, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.6475861668586731, |
|
"rewards/margins": 3.4364724159240723, |
|
"rewards/rejected": -4.08405876159668, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 9.549274255156608e-07, |
|
"logits/chosen": -2.7048392295837402, |
|
"logits/rejected": -2.6878910064697266, |
|
"logps/chosen": -284.31219482421875, |
|
"logps/rejected": -122.46697998046875, |
|
"loss": 0.1402, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.6133157014846802, |
|
"rewards/margins": 3.361485242843628, |
|
"rewards/rejected": -3.9748013019561768, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 9.396485867074102e-07, |
|
"logits/chosen": -2.6750662326812744, |
|
"logits/rejected": -2.696866750717163, |
|
"logps/chosen": -291.42938232421875, |
|
"logps/rejected": -113.9036865234375, |
|
"loss": 0.1509, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.6713167428970337, |
|
"rewards/margins": 3.037714719772339, |
|
"rewards/rejected": -3.709031581878662, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 9.243697478991597e-07, |
|
"logits/chosen": -2.68489933013916, |
|
"logits/rejected": -2.6937882900238037, |
|
"logps/chosen": -254.6746063232422, |
|
"logps/rejected": -115.2020263671875, |
|
"loss": 0.1543, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.8679746389389038, |
|
"rewards/margins": 3.083875894546509, |
|
"rewards/rejected": -3.951850414276123, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 9.09090909090909e-07, |
|
"logits/chosen": -2.6926121711730957, |
|
"logits/rejected": -2.698666572570801, |
|
"logps/chosen": -280.2935485839844, |
|
"logps/rejected": -116.54022216796875, |
|
"loss": 0.1349, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.43773943185806274, |
|
"rewards/margins": 3.511465072631836, |
|
"rewards/rejected": -3.949204683303833, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 8.938120702826585e-07, |
|
"logits/chosen": -2.740382671356201, |
|
"logits/rejected": -2.68933367729187, |
|
"logps/chosen": -276.387451171875, |
|
"logps/rejected": -115.27424621582031, |
|
"loss": 0.136, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.5380962491035461, |
|
"rewards/margins": 3.400437593460083, |
|
"rewards/rejected": -3.9385337829589844, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 8.785332314744079e-07, |
|
"logits/chosen": -2.7333688735961914, |
|
"logits/rejected": -2.771758794784546, |
|
"logps/chosen": -293.49578857421875, |
|
"logps/rejected": -126.72651672363281, |
|
"loss": 0.1379, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.6212050914764404, |
|
"rewards/margins": 3.3602237701416016, |
|
"rewards/rejected": -3.9814281463623047, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 8.632543926661573e-07, |
|
"logits/chosen": -2.709484577178955, |
|
"logits/rejected": -2.7399744987487793, |
|
"logps/chosen": -309.5272521972656, |
|
"logps/rejected": -123.85481262207031, |
|
"loss": 0.1368, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.5241509675979614, |
|
"rewards/margins": 3.5228843688964844, |
|
"rewards/rejected": -4.0470356941223145, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 8.479755538579067e-07, |
|
"logits/chosen": -2.7088303565979004, |
|
"logits/rejected": -2.7025811672210693, |
|
"logps/chosen": -271.97784423828125, |
|
"logps/rejected": -118.300537109375, |
|
"loss": 0.1319, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.5483931303024292, |
|
"rewards/margins": 3.45857310295105, |
|
"rewards/rejected": -4.006965637207031, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 8.326967150496562e-07, |
|
"logits/chosen": -2.747811794281006, |
|
"logits/rejected": -2.6834397315979004, |
|
"logps/chosen": -301.4414367675781, |
|
"logps/rejected": -123.6478271484375, |
|
"loss": 0.1264, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.5513515472412109, |
|
"rewards/margins": 3.684753894805908, |
|
"rewards/rejected": -4.236104965209961, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 8.174178762414056e-07, |
|
"logits/chosen": -2.7275288105010986, |
|
"logits/rejected": -2.7216854095458984, |
|
"logps/chosen": -300.93328857421875, |
|
"logps/rejected": -133.4979248046875, |
|
"loss": 0.1301, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.5985573530197144, |
|
"rewards/margins": 3.8093056678771973, |
|
"rewards/rejected": -4.407863140106201, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 8.02139037433155e-07, |
|
"logits/chosen": -2.7658352851867676, |
|
"logits/rejected": -2.738119602203369, |
|
"logps/chosen": -272.04449462890625, |
|
"logps/rejected": -120.6009521484375, |
|
"loss": 0.1252, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.6955040693283081, |
|
"rewards/margins": 3.5386176109313965, |
|
"rewards/rejected": -4.234121799468994, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 7.868601986249045e-07, |
|
"logits/chosen": -2.7340145111083984, |
|
"logits/rejected": -2.7205958366394043, |
|
"logps/chosen": -286.5509948730469, |
|
"logps/rejected": -126.727294921875, |
|
"loss": 0.1468, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.8658866882324219, |
|
"rewards/margins": 3.4253039360046387, |
|
"rewards/rejected": -4.2911906242370605, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 7.71581359816654e-07, |
|
"logits/chosen": -2.7004799842834473, |
|
"logits/rejected": -2.6438944339752197, |
|
"logps/chosen": -226.2438507080078, |
|
"logps/rejected": -113.9751205444336, |
|
"loss": 0.1347, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.913210391998291, |
|
"rewards/margins": 3.264578342437744, |
|
"rewards/rejected": -4.177788734436035, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 7.563025210084033e-07, |
|
"logits/chosen": -2.7258245944976807, |
|
"logits/rejected": -2.792160987854004, |
|
"logps/chosen": -270.41314697265625, |
|
"logps/rejected": -115.57466125488281, |
|
"loss": 0.1153, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.7112643122673035, |
|
"rewards/margins": 3.4535746574401855, |
|
"rewards/rejected": -4.164839267730713, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 7.410236822001527e-07, |
|
"logits/chosen": -2.682445526123047, |
|
"logits/rejected": -2.680621862411499, |
|
"logps/chosen": -300.4261474609375, |
|
"logps/rejected": -123.0173110961914, |
|
"loss": 0.1305, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.544654130935669, |
|
"rewards/margins": 3.4888763427734375, |
|
"rewards/rejected": -4.033530235290527, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -2.730585813522339, |
|
"eval_logits/rejected": -2.6866092681884766, |
|
"eval_logps/chosen": -281.4207763671875, |
|
"eval_logps/rejected": -201.63304138183594, |
|
"eval_loss": 0.3693665862083435, |
|
"eval_rewards/accuracies": 0.82421875, |
|
"eval_rewards/chosen": -0.7583777904510498, |
|
"eval_rewards/margins": 2.1013174057006836, |
|
"eval_rewards/rejected": -2.8596951961517334, |
|
"eval_runtime": 259.2152, |
|
"eval_samples_per_second": 7.716, |
|
"eval_steps_per_second": 0.062, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 7.257448433919023e-07, |
|
"logits/chosen": -2.7307159900665283, |
|
"logits/rejected": -2.760425090789795, |
|
"logps/chosen": -263.69964599609375, |
|
"logps/rejected": -121.00809478759766, |
|
"loss": 0.1237, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.7474689483642578, |
|
"rewards/margins": 3.526792526245117, |
|
"rewards/rejected": -4.274261474609375, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 7.104660045836516e-07, |
|
"logits/chosen": -2.688023090362549, |
|
"logits/rejected": -2.6981234550476074, |
|
"logps/chosen": -283.63311767578125, |
|
"logps/rejected": -119.1172866821289, |
|
"loss": 0.1167, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.5560388565063477, |
|
"rewards/margins": 3.7879276275634766, |
|
"rewards/rejected": -4.343966484069824, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 6.95187165775401e-07, |
|
"logits/chosen": -2.6954102516174316, |
|
"logits/rejected": -2.696408748626709, |
|
"logps/chosen": -277.02764892578125, |
|
"logps/rejected": -115.6692886352539, |
|
"loss": 0.1243, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.6953684091567993, |
|
"rewards/margins": 3.4351089000701904, |
|
"rewards/rejected": -4.130476951599121, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 6.799083269671506e-07, |
|
"logits/chosen": -2.6869771480560303, |
|
"logits/rejected": -2.7037670612335205, |
|
"logps/chosen": -251.7620086669922, |
|
"logps/rejected": -117.48094177246094, |
|
"loss": 0.1317, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.8689507246017456, |
|
"rewards/margins": 3.475069046020508, |
|
"rewards/rejected": -4.344019889831543, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 6.646294881588999e-07, |
|
"logits/chosen": -2.6988840103149414, |
|
"logits/rejected": -2.6869451999664307, |
|
"logps/chosen": -287.7177734375, |
|
"logps/rejected": -123.6144790649414, |
|
"loss": 0.119, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.7602699995040894, |
|
"rewards/margins": 3.628121852874756, |
|
"rewards/rejected": -4.388391971588135, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 6.493506493506493e-07, |
|
"logits/chosen": -2.7356371879577637, |
|
"logits/rejected": -2.8164889812469482, |
|
"logps/chosen": -305.6545715332031, |
|
"logps/rejected": -119.76715087890625, |
|
"loss": 0.1255, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.7907172441482544, |
|
"rewards/margins": 3.6126227378845215, |
|
"rewards/rejected": -4.403339862823486, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 6.340718105423987e-07, |
|
"logits/chosen": -2.647362232208252, |
|
"logits/rejected": -2.6018130779266357, |
|
"logps/chosen": -281.76092529296875, |
|
"logps/rejected": -118.23252868652344, |
|
"loss": 0.121, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.64565110206604, |
|
"rewards/margins": 3.727663516998291, |
|
"rewards/rejected": -4.37331485748291, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 6.187929717341482e-07, |
|
"logits/chosen": -2.7285995483398438, |
|
"logits/rejected": -2.67795467376709, |
|
"logps/chosen": -273.88555908203125, |
|
"logps/rejected": -112.9361801147461, |
|
"loss": 0.1223, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.5947860479354858, |
|
"rewards/margins": 3.604292392730713, |
|
"rewards/rejected": -4.199078559875488, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 6.035141329258976e-07, |
|
"logits/chosen": -2.718034505844116, |
|
"logits/rejected": -2.6936326026916504, |
|
"logps/chosen": -287.3531799316406, |
|
"logps/rejected": -123.18647766113281, |
|
"loss": 0.1196, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.7472411394119263, |
|
"rewards/margins": 3.734687328338623, |
|
"rewards/rejected": -4.481928825378418, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 5.88235294117647e-07, |
|
"logits/chosen": -2.699462413787842, |
|
"logits/rejected": -2.667182683944702, |
|
"logps/chosen": -255.7397003173828, |
|
"logps/rejected": -114.04801177978516, |
|
"loss": 0.1287, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.706480860710144, |
|
"rewards/margins": 3.5075461864471436, |
|
"rewards/rejected": -4.214027404785156, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 5.729564553093965e-07, |
|
"logits/chosen": -2.719064712524414, |
|
"logits/rejected": -2.724663734436035, |
|
"logps/chosen": -283.41607666015625, |
|
"logps/rejected": -120.31791687011719, |
|
"loss": 0.1205, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.8808507919311523, |
|
"rewards/margins": 3.6230416297912598, |
|
"rewards/rejected": -4.503891944885254, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 5.576776165011459e-07, |
|
"logits/chosen": -2.6613950729370117, |
|
"logits/rejected": -2.669309377670288, |
|
"logps/chosen": -274.6718444824219, |
|
"logps/rejected": -123.7230453491211, |
|
"loss": 0.1295, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.8109513521194458, |
|
"rewards/margins": 3.535714626312256, |
|
"rewards/rejected": -4.346665859222412, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 5.423987776928953e-07, |
|
"logits/chosen": -2.752354860305786, |
|
"logits/rejected": -2.689492702484131, |
|
"logps/chosen": -297.46575927734375, |
|
"logps/rejected": -123.75872802734375, |
|
"loss": 0.1112, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.8303642272949219, |
|
"rewards/margins": 3.775070905685425, |
|
"rewards/rejected": -4.605435371398926, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 5.271199388846448e-07, |
|
"logits/chosen": -2.7163376808166504, |
|
"logits/rejected": -2.7011306285858154, |
|
"logps/chosen": -268.9468078613281, |
|
"logps/rejected": -129.1416473388672, |
|
"loss": 0.1147, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.7546231150627136, |
|
"rewards/margins": 3.6865787506103516, |
|
"rewards/rejected": -4.441201686859131, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 5.118411000763941e-07, |
|
"logits/chosen": -2.7277450561523438, |
|
"logits/rejected": -2.6973800659179688, |
|
"logps/chosen": -282.5606689453125, |
|
"logps/rejected": -124.7701416015625, |
|
"loss": 0.1192, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.5854411125183105, |
|
"rewards/margins": 3.8541629314422607, |
|
"rewards/rejected": -4.439603805541992, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.965622612681436e-07, |
|
"logits/chosen": -2.6750576496124268, |
|
"logits/rejected": -2.7182953357696533, |
|
"logps/chosen": -274.384765625, |
|
"logps/rejected": -122.15213775634766, |
|
"loss": 0.1222, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.7071852684020996, |
|
"rewards/margins": 3.846698045730591, |
|
"rewards/rejected": -4.5538835525512695, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 4.81283422459893e-07, |
|
"logits/chosen": -2.676004648208618, |
|
"logits/rejected": -2.6993675231933594, |
|
"logps/chosen": -263.89752197265625, |
|
"logps/rejected": -116.71295166015625, |
|
"loss": 0.1107, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.7977498769760132, |
|
"rewards/margins": 3.523756742477417, |
|
"rewards/rejected": -4.321506500244141, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.660045836516425e-07, |
|
"logits/chosen": -2.7055160999298096, |
|
"logits/rejected": -2.7121243476867676, |
|
"logps/chosen": -289.3218994140625, |
|
"logps/rejected": -121.4267807006836, |
|
"loss": 0.1189, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.6630896329879761, |
|
"rewards/margins": 3.811305522918701, |
|
"rewards/rejected": -4.474394798278809, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.5072574484339185e-07, |
|
"logits/chosen": -2.6670591831207275, |
|
"logits/rejected": -2.690974712371826, |
|
"logps/chosen": -278.7057189941406, |
|
"logps/rejected": -117.63764953613281, |
|
"loss": 0.1214, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.798798143863678, |
|
"rewards/margins": 3.643995761871338, |
|
"rewards/rejected": -4.442793846130371, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 4.3544690603514133e-07, |
|
"logits/chosen": -2.686856269836426, |
|
"logits/rejected": -2.6309545040130615, |
|
"logps/chosen": -271.1042785644531, |
|
"logps/rejected": -131.63259887695312, |
|
"loss": 0.1233, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.7242040038108826, |
|
"rewards/margins": 3.6964995861053467, |
|
"rewards/rejected": -4.420703411102295, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.2016806722689076e-07, |
|
"logits/chosen": -2.677506923675537, |
|
"logits/rejected": -2.7353405952453613, |
|
"logps/chosen": -304.0357360839844, |
|
"logps/rejected": -128.03981018066406, |
|
"loss": 0.1109, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.6244128942489624, |
|
"rewards/margins": 3.888216733932495, |
|
"rewards/rejected": -4.512629985809326, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.0488922841864013e-07, |
|
"logits/chosen": -2.738661527633667, |
|
"logits/rejected": -2.696394443511963, |
|
"logps/chosen": -263.1926574707031, |
|
"logps/rejected": -117.48931884765625, |
|
"loss": 0.1266, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.903983473777771, |
|
"rewards/margins": 3.5497829914093018, |
|
"rewards/rejected": -4.453766345977783, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.896103896103896e-07, |
|
"logits/chosen": -2.721292018890381, |
|
"logits/rejected": -2.6752657890319824, |
|
"logps/chosen": -270.2406005859375, |
|
"logps/rejected": -121.4746322631836, |
|
"loss": 0.1118, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.7563194036483765, |
|
"rewards/margins": 3.8605034351348877, |
|
"rewards/rejected": -4.616823673248291, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.7433155080213904e-07, |
|
"logits/chosen": -2.6903328895568848, |
|
"logits/rejected": -2.7279880046844482, |
|
"logps/chosen": -263.3211975097656, |
|
"logps/rejected": -120.23514556884766, |
|
"loss": 0.1132, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.7410348057746887, |
|
"rewards/margins": 3.5494637489318848, |
|
"rewards/rejected": -4.290497779846191, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.590527119938884e-07, |
|
"logits/chosen": -2.697645425796509, |
|
"logits/rejected": -2.676959753036499, |
|
"logps/chosen": -275.2843322753906, |
|
"logps/rejected": -126.24308776855469, |
|
"loss": 0.1157, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.9119027256965637, |
|
"rewards/margins": 3.6408069133758545, |
|
"rewards/rejected": -4.552709102630615, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.437738731856379e-07, |
|
"logits/chosen": -2.704871654510498, |
|
"logits/rejected": -2.6901638507843018, |
|
"logps/chosen": -292.1809387207031, |
|
"logps/rejected": -129.85531616210938, |
|
"loss": 0.1134, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.8088245391845703, |
|
"rewards/margins": 3.841012477874756, |
|
"rewards/rejected": -4.649837493896484, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.2849503437738727e-07, |
|
"logits/chosen": -2.6897072792053223, |
|
"logits/rejected": -2.6975724697113037, |
|
"logps/chosen": -289.3966369628906, |
|
"logps/rejected": -130.11814880371094, |
|
"loss": 0.1215, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.8563445210456848, |
|
"rewards/margins": 3.8016419410705566, |
|
"rewards/rejected": -4.657986640930176, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 3.1321619556913675e-07, |
|
"logits/chosen": -2.6511547565460205, |
|
"logits/rejected": -2.6918509006500244, |
|
"logps/chosen": -304.6783447265625, |
|
"logps/rejected": -128.91067504882812, |
|
"loss": 0.1256, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.6992138028144836, |
|
"rewards/margins": 3.894549608230591, |
|
"rewards/rejected": -4.593764305114746, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.9793735676088617e-07, |
|
"logits/chosen": -2.7180018424987793, |
|
"logits/rejected": -2.73219633102417, |
|
"logps/chosen": -287.21697998046875, |
|
"logps/rejected": -128.64976501464844, |
|
"loss": 0.1276, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.9189088940620422, |
|
"rewards/margins": 3.821709394454956, |
|
"rewards/rejected": -4.7406182289123535, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.8265851795263555e-07, |
|
"logits/chosen": -2.6880316734313965, |
|
"logits/rejected": -2.6963746547698975, |
|
"logps/chosen": -270.5711364746094, |
|
"logps/rejected": -109.63471984863281, |
|
"loss": 0.1227, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.8319600820541382, |
|
"rewards/margins": 3.4531593322753906, |
|
"rewards/rejected": -4.285120010375977, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.6737967914438503e-07, |
|
"logits/chosen": -2.718834400177002, |
|
"logits/rejected": -2.730088472366333, |
|
"logps/chosen": -285.2646179199219, |
|
"logps/rejected": -129.10108947753906, |
|
"loss": 0.1177, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.7013461589813232, |
|
"rewards/margins": 4.131698131561279, |
|
"rewards/rejected": -4.833044052124023, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.5210084033613445e-07, |
|
"logits/chosen": -2.6872308254241943, |
|
"logits/rejected": -2.71856689453125, |
|
"logps/chosen": -264.56591796875, |
|
"logps/rejected": -117.89599609375, |
|
"loss": 0.115, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.6815407872200012, |
|
"rewards/margins": 3.6640231609344482, |
|
"rewards/rejected": -4.3455634117126465, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.3682200152788388e-07, |
|
"logits/chosen": -2.723666191101074, |
|
"logits/rejected": -2.7219574451446533, |
|
"logps/chosen": -317.8732604980469, |
|
"logps/rejected": -120.55049896240234, |
|
"loss": 0.1072, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.7161759734153748, |
|
"rewards/margins": 3.9293036460876465, |
|
"rewards/rejected": -4.645480155944824, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.215431627196333e-07, |
|
"logits/chosen": -2.6789979934692383, |
|
"logits/rejected": -2.6807262897491455, |
|
"logps/chosen": -260.33062744140625, |
|
"logps/rejected": -120.8504409790039, |
|
"loss": 0.1201, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.854164719581604, |
|
"rewards/margins": 3.6763319969177246, |
|
"rewards/rejected": -4.530496597290039, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.0626432391138274e-07, |
|
"logits/chosen": -2.703617572784424, |
|
"logits/rejected": -2.696929931640625, |
|
"logps/chosen": -250.5372772216797, |
|
"logps/rejected": -126.94450378417969, |
|
"loss": 0.1262, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.0846295356750488, |
|
"rewards/margins": 3.540682554244995, |
|
"rewards/rejected": -4.625311374664307, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 1.9098548510313214e-07, |
|
"logits/chosen": -2.7046430110931396, |
|
"logits/rejected": -2.691488027572632, |
|
"logps/chosen": -267.9579162597656, |
|
"logps/rejected": -123.9790267944336, |
|
"loss": 0.1141, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -0.6386908292770386, |
|
"rewards/margins": 4.031415939331055, |
|
"rewards/rejected": -4.670106887817383, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.757066462948816e-07, |
|
"logits/chosen": -2.689009189605713, |
|
"logits/rejected": -2.7033090591430664, |
|
"logps/chosen": -266.8208312988281, |
|
"logps/rejected": -126.87858581542969, |
|
"loss": 0.1157, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.6870437860488892, |
|
"rewards/margins": 3.979443073272705, |
|
"rewards/rejected": -4.666487216949463, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.6042780748663102e-07, |
|
"logits/chosen": -2.70473051071167, |
|
"logits/rejected": -2.6794614791870117, |
|
"logps/chosen": -278.357421875, |
|
"logps/rejected": -125.93829345703125, |
|
"loss": 0.1152, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.9934309124946594, |
|
"rewards/margins": 3.67195200920105, |
|
"rewards/rejected": -4.665382385253906, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.4514896867838044e-07, |
|
"logits/chosen": -2.6879210472106934, |
|
"logits/rejected": -2.7055411338806152, |
|
"logps/chosen": -301.1689147949219, |
|
"logps/rejected": -137.4368896484375, |
|
"loss": 0.1134, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.8130217790603638, |
|
"rewards/margins": 3.979782819747925, |
|
"rewards/rejected": -4.792804718017578, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.2987012987012984e-07, |
|
"logits/chosen": -2.6889195442199707, |
|
"logits/rejected": -2.677527666091919, |
|
"logps/chosen": -300.85272216796875, |
|
"logps/rejected": -131.1885986328125, |
|
"loss": 0.1181, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.8304030299186707, |
|
"rewards/margins": 4.027594089508057, |
|
"rewards/rejected": -4.857996940612793, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.1459129106187928e-07, |
|
"logits/chosen": -2.675020694732666, |
|
"logits/rejected": -2.6782567501068115, |
|
"logps/chosen": -329.69696044921875, |
|
"logps/rejected": -135.40345764160156, |
|
"loss": 0.1182, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.631290078163147, |
|
"rewards/margins": 3.959303379058838, |
|
"rewards/rejected": -4.5905938148498535, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 9.931245225362872e-08, |
|
"logits/chosen": -2.6736457347869873, |
|
"logits/rejected": -2.6810030937194824, |
|
"logps/chosen": -274.8864440917969, |
|
"logps/rejected": -124.03629302978516, |
|
"loss": 0.1258, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -0.7999323606491089, |
|
"rewards/margins": 3.9392266273498535, |
|
"rewards/rejected": -4.73915958404541, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 8.403361344537815e-08, |
|
"logits/chosen": -2.699571132659912, |
|
"logits/rejected": -2.701141834259033, |
|
"logps/chosen": -283.51751708984375, |
|
"logps/rejected": -130.84814453125, |
|
"loss": 0.1212, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.7443857192993164, |
|
"rewards/margins": 3.96293568611145, |
|
"rewards/rejected": -4.707321643829346, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 6.875477463712758e-08, |
|
"logits/chosen": -2.690229892730713, |
|
"logits/rejected": -2.7361502647399902, |
|
"logps/chosen": -310.0445556640625, |
|
"logps/rejected": -141.15748596191406, |
|
"loss": 0.1115, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.7573784589767456, |
|
"rewards/margins": 4.050500869750977, |
|
"rewards/rejected": -4.807879447937012, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 5.3475935828877e-08, |
|
"logits/chosen": -2.7038116455078125, |
|
"logits/rejected": -2.715642213821411, |
|
"logps/chosen": -294.73486328125, |
|
"logps/rejected": -127.1126480102539, |
|
"loss": 0.1176, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.720859944820404, |
|
"rewards/margins": 3.844661235809326, |
|
"rewards/rejected": -4.565520763397217, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.8197097020626426e-08, |
|
"logits/chosen": -2.695003032684326, |
|
"logits/rejected": -2.7012951374053955, |
|
"logps/chosen": -257.13287353515625, |
|
"logps/rejected": -120.28269958496094, |
|
"loss": 0.1259, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.8394849896430969, |
|
"rewards/margins": 3.808011531829834, |
|
"rewards/rejected": -4.647497177124023, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.291825821237586e-08, |
|
"logits/chosen": -2.7045774459838867, |
|
"logits/rejected": -2.679112672805786, |
|
"logps/chosen": -280.9797058105469, |
|
"logps/rejected": -127.39398193359375, |
|
"loss": 0.1096, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.8950273394584656, |
|
"rewards/margins": 3.8749961853027344, |
|
"rewards/rejected": -4.770023345947266, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 7.639419404125286e-09, |
|
"logits/chosen": -2.7298693656921387, |
|
"logits/rejected": -2.673051357269287, |
|
"logps/chosen": -329.364990234375, |
|
"logps/rejected": -131.12486267089844, |
|
"loss": 0.109, |
|
"rewards/accuracies": 0.981249988079071, |
|
"rewards/chosen": -0.5651420950889587, |
|
"rewards/margins": 4.195387840270996, |
|
"rewards/rejected": -4.760529518127441, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_logits/chosen": -2.715555429458618, |
|
"eval_logits/rejected": -2.6699323654174805, |
|
"eval_logps/chosen": -282.458740234375, |
|
"eval_logps/rejected": -204.27066040039062, |
|
"eval_loss": 0.3553008437156677, |
|
"eval_rewards/accuracies": 0.828125, |
|
"eval_rewards/chosen": -0.8621728420257568, |
|
"eval_rewards/margins": 2.261284112930298, |
|
"eval_rewards/rejected": -3.123457193374634, |
|
"eval_runtime": 259.2547, |
|
"eval_samples_per_second": 7.714, |
|
"eval_steps_per_second": 0.062, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1455, |
|
"total_flos": 0.0, |
|
"train_loss": 0.21351368668972423, |
|
"train_runtime": 46913.4477, |
|
"train_samples_per_second": 3.969, |
|
"train_steps_per_second": 0.031 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1455, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|