{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5912711054475948, "eval_steps": 500, "global_step": 58, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010194329404268876, "grad_norm": 0.7573416233062744, "learning_rate": 2.9999999999999997e-05, "log_odds_chosen": 0.5699290037155151, "log_odds_ratio": -0.47619175910949707, "logits/chosen": -5.881921291351318, "logits/rejected": -5.716203689575195, "logps/chosen": -0.7556048631668091, "logps/rejected": -1.0891886949539185, "loss": 0.971, "nll_loss": 0.9233458042144775, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07556048780679703, "rewards/margins": 0.0333583801984787, "rewards/rejected": -0.10891886055469513, "step": 1 }, { "epoch": 0.02038865880853775, "grad_norm": 0.8255464434623718, "learning_rate": 5.9999999999999995e-05, "log_odds_chosen": 0.8270730972290039, "log_odds_ratio": -0.3858833909034729, "logits/chosen": -6.026644229888916, "logits/rejected": -5.734494209289551, "logps/chosen": -0.6564911603927612, "logps/rejected": -1.1349902153015137, "loss": 0.8322, "nll_loss": 0.7935733795166016, "rewards/accuracies": 0.9375, "rewards/chosen": -0.0656491070985794, "rewards/margins": 0.04784991592168808, "rewards/rejected": -0.11349902302026749, "step": 2 }, { "epoch": 0.030582988212806625, "grad_norm": 0.6873063445091248, "learning_rate": 8.999999999999999e-05, "log_odds_chosen": 0.9449296593666077, "log_odds_ratio": -0.3508719801902771, "logits/chosen": -6.142429828643799, "logits/rejected": -5.885470867156982, "logps/chosen": -0.5670567750930786, "logps/rejected": -1.0744154453277588, "loss": 0.7612, "nll_loss": 0.7261290550231934, "rewards/accuracies": 0.96875, "rewards/chosen": -0.056705668568611145, "rewards/margins": 0.050735872238874435, "rewards/rejected": -0.10744155198335648, "step": 3 }, { "epoch": 0.0407773176170755, "grad_norm": 0.6718329191207886, "learning_rate": 0.00011999999999999999, "log_odds_chosen": 1.5755469799041748, "log_odds_ratio": -0.20553907752037048, "logits/chosen": -6.1619157791137695, "logits/rejected": -5.763967514038086, "logps/chosen": -0.3855120837688446, "logps/rejected": -1.1750808954238892, "loss": 0.5648, "nll_loss": 0.5442639589309692, "rewards/accuracies": 1.0, "rewards/chosen": -0.0385512076318264, "rewards/margins": 0.07895689457654953, "rewards/rejected": -0.11750810593366623, "step": 4 }, { "epoch": 0.05097164702134438, "grad_norm": 0.6536553502082825, "learning_rate": 0.00015, "log_odds_chosen": 1.5343743562698364, "log_odds_ratio": -0.21383415162563324, "logits/chosen": -6.650542259216309, "logits/rejected": -6.332062721252441, "logps/chosen": -0.37913548946380615, "logps/rejected": -1.0929796695709229, "loss": 0.561, "nll_loss": 0.5396607518196106, "rewards/accuracies": 1.0, "rewards/chosen": -0.03791355341672897, "rewards/margins": 0.07138442248106003, "rewards/rejected": -0.1092979684472084, "step": 5 }, { "epoch": 0.06116597642561325, "grad_norm": 0.5619150400161743, "learning_rate": 0.00017999999999999998, "log_odds_chosen": 1.6949516534805298, "log_odds_ratio": -0.18262828886508942, "logits/chosen": -7.268585205078125, "logits/rejected": -6.876243591308594, "logps/chosen": -0.3971864879131317, "logps/rejected": -1.2302331924438477, "loss": 0.5403, "nll_loss": 0.5220563411712646, "rewards/accuracies": 1.0, "rewards/chosen": -0.03971865028142929, "rewards/margins": 0.08330468088388443, "rewards/rejected": -0.12302333116531372, "step": 6 }, { "epoch": 0.07136030582988213, "grad_norm": 0.6898521780967712, "learning_rate": 0.00020999999999999998, "log_odds_chosen": 1.3639956712722778, "log_odds_ratio": -0.24654226005077362, "logits/chosen": -8.031928062438965, "logits/rejected": -7.74884557723999, "logps/chosen": -0.48026007413864136, "logps/rejected": -1.176466464996338, "loss": 0.6935, "nll_loss": 0.6688414812088013, "rewards/accuracies": 1.0, "rewards/chosen": -0.048026010394096375, "rewards/margins": 0.06962063908576965, "rewards/rejected": -0.11764664947986603, "step": 7 }, { "epoch": 0.081554635234151, "grad_norm": 1.1064831018447876, "learning_rate": 0.00023999999999999998, "log_odds_chosen": 0.6979130506515503, "log_odds_ratio": -0.4440312087535858, "logits/chosen": -9.434717178344727, "logits/rejected": -9.158509254455566, "logps/chosen": -0.8219408392906189, "logps/rejected": -1.2313414812088013, "loss": 1.0247, "nll_loss": 0.9803025722503662, "rewards/accuracies": 0.875, "rewards/chosen": -0.08219408988952637, "rewards/margins": 0.040940068662166595, "rewards/rejected": -0.12313415855169296, "step": 8 }, { "epoch": 0.09174896463841987, "grad_norm": 1.3165839910507202, "learning_rate": 0.00027, "log_odds_chosen": 0.5627075433731079, "log_odds_ratio": -0.48641741275787354, "logits/chosen": -9.972105979919434, "logits/rejected": -9.6690092086792, "logps/chosen": -0.873996376991272, "logps/rejected": -1.2415555715560913, "loss": 1.0735, "nll_loss": 1.0249079465866089, "rewards/accuracies": 0.84375, "rewards/chosen": -0.08739963173866272, "rewards/margins": 0.03675593435764313, "rewards/rejected": -0.12415556609630585, "step": 9 }, { "epoch": 0.10194329404268876, "grad_norm": 1.0939090251922607, "learning_rate": 0.0003, "log_odds_chosen": 0.5926020741462708, "log_odds_ratio": -0.4752326011657715, "logits/chosen": -9.060307502746582, "logits/rejected": -8.911764144897461, "logps/chosen": -0.864991307258606, "logps/rejected": -1.2381335496902466, "loss": 1.0671, "nll_loss": 1.0196049213409424, "rewards/accuracies": 0.875, "rewards/chosen": -0.08649912476539612, "rewards/margins": 0.03731423243880272, "rewards/rejected": -0.12381334602832794, "step": 10 }, { "epoch": 0.11213762344695763, "grad_norm": 0.8981418609619141, "learning_rate": 0.00029659090909090906, "log_odds_chosen": 0.3323814272880554, "log_odds_ratio": -0.6069820523262024, "logits/chosen": -7.860880374908447, "logits/rejected": -7.7285614013671875, "logps/chosen": -0.8284704685211182, "logps/rejected": -1.0068429708480835, "loss": 1.0189, "nll_loss": 0.9581549167633057, "rewards/accuracies": 0.65625, "rewards/chosen": -0.08284705132246017, "rewards/margins": 0.017837243154644966, "rewards/rejected": -0.10068429261445999, "step": 11 }, { "epoch": 0.1223319528512265, "grad_norm": 0.6975793242454529, "learning_rate": 0.00029318181818181814, "log_odds_chosen": 0.3913690447807312, "log_odds_ratio": -0.5592631101608276, "logits/chosen": -6.680542945861816, "logits/rejected": -6.604436874389648, "logps/chosen": -0.8059054017066956, "logps/rejected": -1.013627529144287, "loss": 1.0139, "nll_loss": 0.9579247236251831, "rewards/accuracies": 0.78125, "rewards/chosen": -0.08059053868055344, "rewards/margins": 0.020772218704223633, "rewards/rejected": -0.10136276483535767, "step": 12 }, { "epoch": 0.13252628225549537, "grad_norm": 0.7373881340026855, "learning_rate": 0.0002897727272727273, "log_odds_chosen": 0.4727020263671875, "log_odds_ratio": -0.513884961605072, "logits/chosen": -5.979405403137207, "logits/rejected": -5.886721611022949, "logps/chosen": -0.8267030119895935, "logps/rejected": -1.1043840646743774, "loss": 1.0133, "nll_loss": 0.9619178771972656, "rewards/accuracies": 0.78125, "rewards/chosen": -0.08267030864953995, "rewards/margins": 0.027768105268478394, "rewards/rejected": -0.11043839901685715, "step": 13 }, { "epoch": 0.14272061165976427, "grad_norm": 0.6061244606971741, "learning_rate": 0.00028636363636363636, "log_odds_chosen": 0.3888252079486847, "log_odds_ratio": -0.5602706670761108, "logits/chosen": -5.693607807159424, "logits/rejected": -5.5878586769104, "logps/chosen": -0.8379849195480347, "logps/rejected": -1.0506603717803955, "loss": 0.9967, "nll_loss": 0.9406490325927734, "rewards/accuracies": 0.8125, "rewards/chosen": -0.08379849046468735, "rewards/margins": 0.02126755192875862, "rewards/rejected": -0.10506604611873627, "step": 14 }, { "epoch": 0.15291494106403314, "grad_norm": 0.6430971026420593, "learning_rate": 0.00028295454545454544, "log_odds_chosen": 0.5663943290710449, "log_odds_ratio": -0.4711042642593384, "logits/chosen": -5.467785358428955, "logits/rejected": -5.275339126586914, "logps/chosen": -0.7516398429870605, "logps/rejected": -1.0913419723510742, "loss": 0.9845, "nll_loss": 0.93742436170578, "rewards/accuracies": 0.9375, "rewards/chosen": -0.07516399025917053, "rewards/margins": 0.033970218151807785, "rewards/rejected": -0.10913420468568802, "step": 15 }, { "epoch": 0.163109270468302, "grad_norm": 0.6216456294059753, "learning_rate": 0.0002795454545454545, "log_odds_chosen": 0.48780113458633423, "log_odds_ratio": -0.5268898010253906, "logits/chosen": -5.544975757598877, "logits/rejected": -5.340371608734131, "logps/chosen": -0.7598400115966797, "logps/rejected": -1.0393458604812622, "loss": 0.9841, "nll_loss": 0.9314062595367432, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07598400115966797, "rewards/margins": 0.02795059233903885, "rewards/rejected": -0.10393458604812622, "step": 16 }, { "epoch": 0.17330359987257088, "grad_norm": 0.6070399880409241, "learning_rate": 0.0002761363636363636, "log_odds_chosen": 0.512615978717804, "log_odds_ratio": -0.4918256998062134, "logits/chosen": -5.72025203704834, "logits/rejected": -5.575632572174072, "logps/chosen": -0.8014565110206604, "logps/rejected": -1.1142818927764893, "loss": 1.0131, "nll_loss": 0.9638931751251221, "rewards/accuracies": 0.84375, "rewards/chosen": -0.08014565706253052, "rewards/margins": 0.03128252923488617, "rewards/rejected": -0.11142819374799728, "step": 17 }, { "epoch": 0.18349792927683975, "grad_norm": 0.6751457452774048, "learning_rate": 0.0002727272727272727, "log_odds_chosen": 0.7217385768890381, "log_odds_ratio": -0.42509520053863525, "logits/chosen": -6.0239152908325195, "logits/rejected": -5.791830062866211, "logps/chosen": -0.6774111986160278, "logps/rejected": -1.0791475772857666, "loss": 0.9399, "nll_loss": 0.8973686099052429, "rewards/accuracies": 0.90625, "rewards/chosen": -0.06774111092090607, "rewards/margins": 0.040173646062612534, "rewards/rejected": -0.1079147607088089, "step": 18 }, { "epoch": 0.19369225868110862, "grad_norm": 0.6394343376159668, "learning_rate": 0.00026931818181818177, "log_odds_chosen": 0.4962349534034729, "log_odds_ratio": -0.5181080102920532, "logits/chosen": -6.177443504333496, "logits/rejected": -6.040605545043945, "logps/chosen": -0.7530431747436523, "logps/rejected": -1.0457844734191895, "loss": 0.9766, "nll_loss": 0.9247891902923584, "rewards/accuracies": 0.75, "rewards/chosen": -0.07530432194471359, "rewards/margins": 0.02927413024008274, "rewards/rejected": -0.10457845032215118, "step": 19 }, { "epoch": 0.20388658808537752, "grad_norm": 0.6201019883155823, "learning_rate": 0.0002659090909090909, "log_odds_chosen": 0.6209574341773987, "log_odds_ratio": -0.4821361303329468, "logits/chosen": -6.149329662322998, "logits/rejected": -5.94755744934082, "logps/chosen": -0.7279878854751587, "logps/rejected": -1.0953131914138794, "loss": 0.9745, "nll_loss": 0.9263360500335693, "rewards/accuracies": 0.75, "rewards/chosen": -0.07279878854751587, "rewards/margins": 0.03673253580927849, "rewards/rejected": -0.10953132808208466, "step": 20 }, { "epoch": 0.2140809174896464, "grad_norm": 0.7272652387619019, "learning_rate": 0.0002625, "log_odds_chosen": 0.6105677485466003, "log_odds_ratio": -0.45292237401008606, "logits/chosen": -6.463402271270752, "logits/rejected": -6.246770858764648, "logps/chosen": -0.6842864751815796, "logps/rejected": -1.0331110954284668, "loss": 0.9289, "nll_loss": 0.8835590481758118, "rewards/accuracies": 0.90625, "rewards/chosen": -0.0684286504983902, "rewards/margins": 0.034882452338933945, "rewards/rejected": -0.10331110656261444, "step": 21 }, { "epoch": 0.22427524689391526, "grad_norm": 0.703335165977478, "learning_rate": 0.00025909090909090907, "log_odds_chosen": 0.628079354763031, "log_odds_ratio": -0.4692823588848114, "logits/chosen": -6.345945835113525, "logits/rejected": -6.104197978973389, "logps/chosen": -0.7916602492332458, "logps/rejected": -1.175963044166565, "loss": 1.0146, "nll_loss": 0.9676375389099121, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07916602492332458, "rewards/margins": 0.03843028470873833, "rewards/rejected": -0.1175963282585144, "step": 22 }, { "epoch": 0.23446957629818413, "grad_norm": 0.6977505683898926, "learning_rate": 0.00025568181818181815, "log_odds_chosen": 0.6730536818504333, "log_odds_ratio": -0.4550727605819702, "logits/chosen": -6.329110145568848, "logits/rejected": -6.0888824462890625, "logps/chosen": -0.722844123840332, "logps/rejected": -1.1306208372116089, "loss": 0.962, "nll_loss": 0.9164655208587646, "rewards/accuracies": 0.875, "rewards/chosen": -0.07228441536426544, "rewards/margins": 0.040777672082185745, "rewards/rejected": -0.11306207627058029, "step": 23 }, { "epoch": 0.244663905702453, "grad_norm": 0.642013669013977, "learning_rate": 0.0002522727272727273, "log_odds_chosen": 0.5060898065567017, "log_odds_ratio": -0.5109108686447144, "logits/chosen": -6.223945617675781, "logits/rejected": -6.106039047241211, "logps/chosen": -0.696851372718811, "logps/rejected": -0.9982959628105164, "loss": 0.9322, "nll_loss": 0.8811229467391968, "rewards/accuracies": 0.75, "rewards/chosen": -0.06968513131141663, "rewards/margins": 0.03014446049928665, "rewards/rejected": -0.09982959181070328, "step": 24 }, { "epoch": 0.2548582351067219, "grad_norm": 0.6632686853408813, "learning_rate": 0.00024886363636363637, "log_odds_chosen": 0.5426498055458069, "log_odds_ratio": -0.5096245408058167, "logits/chosen": -6.269081115722656, "logits/rejected": -6.013895034790039, "logps/chosen": -0.7527675628662109, "logps/rejected": -1.060716152191162, "loss": 0.9568, "nll_loss": 0.9058137536048889, "rewards/accuracies": 0.78125, "rewards/chosen": -0.07527676224708557, "rewards/margins": 0.030794844031333923, "rewards/rejected": -0.1060715988278389, "step": 25 }, { "epoch": 0.26505256451099074, "grad_norm": 0.6192494630813599, "learning_rate": 0.00024545454545454545, "log_odds_chosen": 0.7147890329360962, "log_odds_ratio": -0.4311472475528717, "logits/chosen": -6.27325439453125, "logits/rejected": -6.029926776885986, "logps/chosen": -0.7463593482971191, "logps/rejected": -1.165663242340088, "loss": 0.9848, "nll_loss": 0.9416517019271851, "rewards/accuracies": 0.875, "rewards/chosen": -0.07463593780994415, "rewards/margins": 0.04193039610981941, "rewards/rejected": -0.11656633764505386, "step": 26 }, { "epoch": 0.27524689391525964, "grad_norm": 0.6525054574012756, "learning_rate": 0.0002420454545454545, "log_odds_chosen": 0.6342182159423828, "log_odds_ratio": -0.4612855315208435, "logits/chosen": -6.303836822509766, "logits/rejected": -5.9838032722473145, "logps/chosen": -0.7191063165664673, "logps/rejected": -1.0888221263885498, "loss": 0.9278, "nll_loss": 0.8816990256309509, "rewards/accuracies": 0.84375, "rewards/chosen": -0.07191063463687897, "rewards/margins": 0.0369715690612793, "rewards/rejected": -0.10888221114873886, "step": 27 }, { "epoch": 0.28544122331952854, "grad_norm": 0.6344391703605652, "learning_rate": 0.0002386363636363636, "log_odds_chosen": 0.7218108177185059, "log_odds_ratio": -0.42739707231521606, "logits/chosen": -6.518744945526123, "logits/rejected": -6.219590187072754, "logps/chosen": -0.6929553747177124, "logps/rejected": -1.116500735282898, "loss": 0.926, "nll_loss": 0.8832739591598511, "rewards/accuracies": 0.9375, "rewards/chosen": -0.06929554045200348, "rewards/margins": 0.0423545241355896, "rewards/rejected": -0.11165006458759308, "step": 28 }, { "epoch": 0.2956355527237974, "grad_norm": 0.6501351594924927, "learning_rate": 0.0002352272727272727, "log_odds_chosen": 0.5734024047851562, "log_odds_ratio": -0.480106920003891, "logits/chosen": -6.212159633636475, "logits/rejected": -6.121041297912598, "logps/chosen": -0.7016568183898926, "logps/rejected": -1.0185236930847168, "loss": 0.9377, "nll_loss": 0.8897032141685486, "rewards/accuracies": 0.90625, "rewards/chosen": -0.07016568630933762, "rewards/margins": 0.0316866934299469, "rewards/rejected": -0.10185237973928452, "step": 29 }, { "epoch": 0.3058298821280663, "grad_norm": 0.6058350205421448, "learning_rate": 0.0002318181818181818, "log_odds_chosen": 0.4939781725406647, "log_odds_ratio": -0.5149088501930237, "logits/chosen": -6.3843536376953125, "logits/rejected": -6.20231294631958, "logps/chosen": -0.7776230573654175, "logps/rejected": -1.0611307621002197, "loss": 0.9604, "nll_loss": 0.9089440703392029, "rewards/accuracies": 0.75, "rewards/chosen": -0.07776231318712234, "rewards/margins": 0.02835078164935112, "rewards/rejected": -0.10611309111118317, "step": 30 }, { "epoch": 0.3160242115323351, "grad_norm": 0.5737637877464294, "learning_rate": 0.00022840909090909088, "log_odds_chosen": 0.7423917055130005, "log_odds_ratio": -0.42824113368988037, "logits/chosen": -6.262265205383301, "logits/rejected": -6.01313591003418, "logps/chosen": -0.6851417422294617, "logps/rejected": -1.1119240522384644, "loss": 0.9356, "nll_loss": 0.892802357673645, "rewards/accuracies": 0.84375, "rewards/chosen": -0.06851417571306229, "rewards/margins": 0.04267823323607445, "rewards/rejected": -0.11119241267442703, "step": 31 }, { "epoch": 0.326218540936604, "grad_norm": 0.6048231720924377, "learning_rate": 0.000225, "log_odds_chosen": 0.5270333886146545, "log_odds_ratio": -0.5013642311096191, "logits/chosen": -6.226537227630615, "logits/rejected": -6.064816951751709, "logps/chosen": -0.7441041469573975, "logps/rejected": -1.0214492082595825, "loss": 1.0008, "nll_loss": 0.9506412148475647, "rewards/accuracies": 0.875, "rewards/chosen": -0.07441041618585587, "rewards/margins": 0.02773449756205082, "rewards/rejected": -0.10214491933584213, "step": 32 }, { "epoch": 0.3364128703408729, "grad_norm": 0.6168726682662964, "learning_rate": 0.00022159090909090908, "log_odds_chosen": 0.7599021792411804, "log_odds_ratio": -0.40911978483200073, "logits/chosen": -6.250336170196533, "logits/rejected": -5.903271198272705, "logps/chosen": -0.6250157356262207, "logps/rejected": -1.0473593473434448, "loss": 0.8808, "nll_loss": 0.8398489356040955, "rewards/accuracies": 0.9375, "rewards/chosen": -0.06250157952308655, "rewards/margins": 0.042234357446432114, "rewards/rejected": -0.10473594069480896, "step": 33 }, { "epoch": 0.34660719974514176, "grad_norm": 0.5676644444465637, "learning_rate": 0.00021818181818181816, "log_odds_chosen": 0.6422507762908936, "log_odds_ratio": -0.44997766613960266, "logits/chosen": -6.285076141357422, "logits/rejected": -6.035497665405273, "logps/chosen": -0.7211968898773193, "logps/rejected": -1.0853862762451172, "loss": 0.9522, "nll_loss": 0.9072052240371704, "rewards/accuracies": 0.90625, "rewards/chosen": -0.07211969047784805, "rewards/margins": 0.036418940871953964, "rewards/rejected": -0.10853863507509232, "step": 34 }, { "epoch": 0.35680152914941066, "grad_norm": 0.5508589744567871, "learning_rate": 0.00021477272727272727, "log_odds_chosen": 0.6378032565116882, "log_odds_ratio": -0.4539848566055298, "logits/chosen": -6.31094217300415, "logits/rejected": -6.19523286819458, "logps/chosen": -0.705143928527832, "logps/rejected": -1.0693531036376953, "loss": 0.8995, "nll_loss": 0.8540608882904053, "rewards/accuracies": 0.875, "rewards/chosen": -0.07051438838243484, "rewards/margins": 0.03642091527581215, "rewards/rejected": -0.10693531483411789, "step": 35 }, { "epoch": 0.3669958585536795, "grad_norm": 0.5418716073036194, "learning_rate": 0.00021136363636363635, "log_odds_chosen": 0.7797142267227173, "log_odds_ratio": -0.4039728343486786, "logits/chosen": -6.470402240753174, "logits/rejected": -6.2065253257751465, "logps/chosen": -0.6367918848991394, "logps/rejected": -1.063814640045166, "loss": 0.8759, "nll_loss": 0.835533618927002, "rewards/accuracies": 0.9375, "rewards/chosen": -0.06367918848991394, "rewards/margins": 0.04270227625966072, "rewards/rejected": -0.10638146847486496, "step": 36 }, { "epoch": 0.3771901879579484, "grad_norm": 0.57692950963974, "learning_rate": 0.00020795454545454546, "log_odds_chosen": 0.568789541721344, "log_odds_ratio": -0.47468000650405884, "logits/chosen": -6.621763229370117, "logits/rejected": -6.455575466156006, "logps/chosen": -0.7674496173858643, "logps/rejected": -1.0972856283187866, "loss": 0.9796, "nll_loss": 0.9321722388267517, "rewards/accuracies": 0.875, "rewards/chosen": -0.07674495875835419, "rewards/margins": 0.032983604818582535, "rewards/rejected": -0.10972855985164642, "step": 37 }, { "epoch": 0.38738451736221724, "grad_norm": 0.5527637600898743, "learning_rate": 0.0002045454545454545, "log_odds_chosen": 0.9506668448448181, "log_odds_ratio": -0.3585732579231262, "logits/chosen": -6.864900588989258, "logits/rejected": -6.427186012268066, "logps/chosen": -0.6164684891700745, "logps/rejected": -1.167106032371521, "loss": 0.8366, "nll_loss": 0.8007556796073914, "rewards/accuracies": 0.96875, "rewards/chosen": -0.061646852642297745, "rewards/margins": 0.05506375432014465, "rewards/rejected": -0.1167106032371521, "step": 38 }, { "epoch": 0.39757884676648614, "grad_norm": 0.5780960321426392, "learning_rate": 0.0002011363636363636, "log_odds_chosen": 0.7690156698226929, "log_odds_ratio": -0.43319082260131836, "logits/chosen": -6.718786716461182, "logits/rejected": -6.493645191192627, "logps/chosen": -0.6450703144073486, "logps/rejected": -1.0530625581741333, "loss": 0.8837, "nll_loss": 0.8403509855270386, "rewards/accuracies": 0.875, "rewards/chosen": -0.06450702995061874, "rewards/margins": 0.040799226611852646, "rewards/rejected": -0.10530626773834229, "step": 39 }, { "epoch": 0.40777317617075504, "grad_norm": 0.6378159523010254, "learning_rate": 0.0001977272727272727, "log_odds_chosen": 0.754075288772583, "log_odds_ratio": -0.42545363306999207, "logits/chosen": -6.970082759857178, "logits/rejected": -6.712213516235352, "logps/chosen": -0.6380664706230164, "logps/rejected": -1.0491538047790527, "loss": 0.8563, "nll_loss": 0.8137477040290833, "rewards/accuracies": 0.90625, "rewards/chosen": -0.06380664557218552, "rewards/margins": 0.04110872745513916, "rewards/rejected": -0.10491538047790527, "step": 40 }, { "epoch": 0.4179675055750239, "grad_norm": 0.5837756395339966, "learning_rate": 0.00019431818181818179, "log_odds_chosen": 0.6944710612297058, "log_odds_ratio": -0.46601662039756775, "logits/chosen": -7.037945747375488, "logits/rejected": -6.881357669830322, "logps/chosen": -0.6695957183837891, "logps/rejected": -1.0475564002990723, "loss": 0.9246, "nll_loss": 0.8780001401901245, "rewards/accuracies": 0.78125, "rewards/chosen": -0.06695956736803055, "rewards/margins": 0.037796080112457275, "rewards/rejected": -0.10475565493106842, "step": 41 }, { "epoch": 0.4281618349792928, "grad_norm": 0.5805338621139526, "learning_rate": 0.0001909090909090909, "log_odds_chosen": 0.8210141658782959, "log_odds_ratio": -0.3949948251247406, "logits/chosen": -7.184229850769043, "logits/rejected": -6.872468948364258, "logps/chosen": -0.6862068176269531, "logps/rejected": -1.172074556350708, "loss": 0.8969, "nll_loss": 0.8574280738830566, "rewards/accuracies": 1.0, "rewards/chosen": -0.06862068176269531, "rewards/margins": 0.04858678579330444, "rewards/rejected": -0.11720746010541916, "step": 42 }, { "epoch": 0.4383561643835616, "grad_norm": 0.5663599967956543, "learning_rate": 0.00018749999999999998, "log_odds_chosen": 0.8663272261619568, "log_odds_ratio": -0.4099063575267792, "logits/chosen": -6.940430641174316, "logits/rejected": -6.632985591888428, "logps/chosen": -0.6582378149032593, "logps/rejected": -1.1708117723464966, "loss": 0.8702, "nll_loss": 0.8291637897491455, "rewards/accuracies": 0.84375, "rewards/chosen": -0.06582378596067429, "rewards/margins": 0.051257383078336716, "rewards/rejected": -0.1170811727643013, "step": 43 }, { "epoch": 0.4485504937878305, "grad_norm": 0.6202066540718079, "learning_rate": 0.00018409090909090909, "log_odds_chosen": 0.743742048740387, "log_odds_ratio": -0.4359492063522339, "logits/chosen": -6.940640926361084, "logits/rejected": -6.672406196594238, "logps/chosen": -0.7529934644699097, "logps/rejected": -1.2044422626495361, "loss": 0.9466, "nll_loss": 0.9030457735061646, "rewards/accuracies": 0.9375, "rewards/chosen": -0.07529934495687485, "rewards/margins": 0.04514488950371742, "rewards/rejected": -0.12044423073530197, "step": 44 }, { "epoch": 0.4587448231920994, "grad_norm": 0.5987519025802612, "learning_rate": 0.00018068181818181817, "log_odds_chosen": 0.7920289635658264, "log_odds_ratio": -0.4145981967449188, "logits/chosen": -6.779760837554932, "logits/rejected": -6.494051933288574, "logps/chosen": -0.7311891317367554, "logps/rejected": -1.1889228820800781, "loss": 0.9624, "nll_loss": 0.9209387302398682, "rewards/accuracies": 0.84375, "rewards/chosen": -0.0731189101934433, "rewards/margins": 0.04577338322997093, "rewards/rejected": -0.11889229714870453, "step": 45 }, { "epoch": 0.46893915259636826, "grad_norm": 0.574632465839386, "learning_rate": 0.00017727272727272728, "log_odds_chosen": 0.7554950714111328, "log_odds_ratio": -0.4141073226928711, "logits/chosen": -6.77302360534668, "logits/rejected": -6.466286659240723, "logps/chosen": -0.6649895310401917, "logps/rejected": -1.0962055921554565, "loss": 0.8721, "nll_loss": 0.8307065963745117, "rewards/accuracies": 0.96875, "rewards/chosen": -0.06649895757436752, "rewards/margins": 0.04312160983681679, "rewards/rejected": -0.10962056368589401, "step": 46 }, { "epoch": 0.47913348200063716, "grad_norm": 0.5619173049926758, "learning_rate": 0.00017386363636363636, "log_odds_chosen": 0.8238332867622375, "log_odds_ratio": -0.4221497178077698, "logits/chosen": -6.700472354888916, "logits/rejected": -6.371788501739502, "logps/chosen": -0.6685025095939636, "logps/rejected": -1.1434869766235352, "loss": 0.8734, "nll_loss": 0.8312162756919861, "rewards/accuracies": 0.84375, "rewards/chosen": -0.06685025244951248, "rewards/margins": 0.047498442232608795, "rewards/rejected": -0.11434869468212128, "step": 47 }, { "epoch": 0.489327811404906, "grad_norm": 0.5814753174781799, "learning_rate": 0.00017045454545454547, "log_odds_chosen": 0.6857788562774658, "log_odds_ratio": -0.44806724786758423, "logits/chosen": -6.448180675506592, "logits/rejected": -6.374576568603516, "logps/chosen": -0.6842226982116699, "logps/rejected": -1.0758768320083618, "loss": 0.8998, "nll_loss": 0.8549746870994568, "rewards/accuracies": 0.90625, "rewards/chosen": -0.06842227280139923, "rewards/margins": 0.03916541486978531, "rewards/rejected": -0.10758768022060394, "step": 48 }, { "epoch": 0.4995221408091749, "grad_norm": 0.567726194858551, "learning_rate": 0.00016704545454545452, "log_odds_chosen": 0.7473108768463135, "log_odds_ratio": -0.42737889289855957, "logits/chosen": -6.513983249664307, "logits/rejected": -6.179078578948975, "logps/chosen": -0.7466605305671692, "logps/rejected": -1.1984943151474, "loss": 0.9469, "nll_loss": 0.9041528701782227, "rewards/accuracies": 0.9375, "rewards/chosen": -0.07466604560613632, "rewards/margins": 0.04518338292837143, "rewards/rejected": -0.11984942853450775, "step": 49 }, { "epoch": 0.5097164702134438, "grad_norm": 0.5788094997406006, "learning_rate": 0.0001636363636363636, "log_odds_chosen": 0.6143008470535278, "log_odds_ratio": -0.4699273407459259, "logits/chosen": -6.7270331382751465, "logits/rejected": -6.504873752593994, "logps/chosen": -0.6836742758750916, "logps/rejected": -1.0247901678085327, "loss": 0.8756, "nll_loss": 0.8285655975341797, "rewards/accuracies": 0.875, "rewards/chosen": -0.06836743652820587, "rewards/margins": 0.03411158546805382, "rewards/rejected": -0.1024790108203888, "step": 50 }, { "epoch": 0.5199107996177127, "grad_norm": 0.5850347876548767, "learning_rate": 0.00016022727272727271, "log_odds_chosen": 0.6994005441665649, "log_odds_ratio": -0.444344699382782, "logits/chosen": -6.444051742553711, "logits/rejected": -6.238152027130127, "logps/chosen": -0.6921380758285522, "logps/rejected": -1.0841060876846313, "loss": 0.9192, "nll_loss": 0.8747261762619019, "rewards/accuracies": 0.875, "rewards/chosen": -0.06921380758285522, "rewards/margins": 0.039196811616420746, "rewards/rejected": -0.10841061919927597, "step": 51 }, { "epoch": 0.5301051290219815, "grad_norm": 0.5676955580711365, "learning_rate": 0.0001568181818181818, "log_odds_chosen": 0.8152589797973633, "log_odds_ratio": -0.4141468405723572, "logits/chosen": -6.520391464233398, "logits/rejected": -6.294327259063721, "logps/chosen": -0.7015136480331421, "logps/rejected": -1.177841305732727, "loss": 0.9074, "nll_loss": 0.8660153150558472, "rewards/accuracies": 0.90625, "rewards/chosen": -0.07015138119459152, "rewards/margins": 0.047632765024900436, "rewards/rejected": -0.11778414249420166, "step": 52 }, { "epoch": 0.5402994584262504, "grad_norm": 0.5812381505966187, "learning_rate": 0.0001534090909090909, "log_odds_chosen": 0.7849770784378052, "log_odds_ratio": -0.42094579339027405, "logits/chosen": -6.526767253875732, "logits/rejected": -6.295316219329834, "logps/chosen": -0.6963866353034973, "logps/rejected": -1.160320520401001, "loss": 0.9251, "nll_loss": 0.8830394744873047, "rewards/accuracies": 0.90625, "rewards/chosen": -0.06963866204023361, "rewards/margins": 0.04639340192079544, "rewards/rejected": -0.11603207141160965, "step": 53 }, { "epoch": 0.5504937878305193, "grad_norm": 0.5327312350273132, "learning_rate": 0.00015, "log_odds_chosen": 0.9011355638504028, "log_odds_ratio": -0.40153947472572327, "logits/chosen": -6.676137924194336, "logits/rejected": -6.344363212585449, "logps/chosen": -0.6891724467277527, "logps/rejected": -1.2165887355804443, "loss": 0.8742, "nll_loss": 0.8340070247650146, "rewards/accuracies": 0.90625, "rewards/chosen": -0.06891724467277527, "rewards/margins": 0.05274162441492081, "rewards/rejected": -0.12165886908769608, "step": 54 }, { "epoch": 0.5606881172347882, "grad_norm": 0.5606175065040588, "learning_rate": 0.00014659090909090907, "log_odds_chosen": 0.8986623883247375, "log_odds_ratio": -0.3838304579257965, "logits/chosen": -6.634624004364014, "logits/rejected": -6.3021039962768555, "logps/chosen": -0.6508579254150391, "logps/rejected": -1.1660997867584229, "loss": 0.8963, "nll_loss": 0.857949435710907, "rewards/accuracies": 0.96875, "rewards/chosen": -0.06508579105138779, "rewards/margins": 0.05152418464422226, "rewards/rejected": -0.11660997569561005, "step": 55 }, { "epoch": 0.5708824466390571, "grad_norm": 0.5399141907691956, "learning_rate": 0.00014318181818181818, "log_odds_chosen": 0.6909326314926147, "log_odds_ratio": -0.4454424977302551, "logits/chosen": -6.556330680847168, "logits/rejected": -6.490412712097168, "logps/chosen": -0.6346659660339355, "logps/rejected": -1.0124220848083496, "loss": 0.8936, "nll_loss": 0.8490685820579529, "rewards/accuracies": 0.90625, "rewards/chosen": -0.06346660107374191, "rewards/margins": 0.037775613367557526, "rewards/rejected": -0.10124220699071884, "step": 56 }, { "epoch": 0.5810767760433259, "grad_norm": 0.5948923826217651, "learning_rate": 0.00013977272727272726, "log_odds_chosen": 0.9090598225593567, "log_odds_ratio": -0.3938583433628082, "logits/chosen": -6.822213172912598, "logits/rejected": -6.596620082855225, "logps/chosen": -0.6263031363487244, "logps/rejected": -1.1398905515670776, "loss": 0.8429, "nll_loss": 0.8035197257995605, "rewards/accuracies": 0.90625, "rewards/chosen": -0.0626303181052208, "rewards/margins": 0.05135873705148697, "rewards/rejected": -0.11398905515670776, "step": 57 }, { "epoch": 0.5912711054475948, "grad_norm": 0.5601215958595276, "learning_rate": 0.00013636363636363634, "log_odds_chosen": 0.9498623609542847, "log_odds_ratio": -0.35345450043678284, "logits/chosen": -6.869694709777832, "logits/rejected": -6.53272819519043, "logps/chosen": -0.7229388356208801, "logps/rejected": -1.3177077770233154, "loss": 0.8727, "nll_loss": 0.8374002575874329, "rewards/accuracies": 0.96875, "rewards/chosen": -0.07229389250278473, "rewards/margins": 0.05947688966989517, "rewards/rejected": -0.1317707896232605, "step": 58 } ], "logging_steps": 1, "max_steps": 98, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }