{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.733991717107359, "eval_steps": 500, "global_step": 36, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02038865880853775, "grad_norm": 10.073711395263672, "learning_rate": 1e-05, "log_odds_chosen": -0.03190982714295387, "log_odds_ratio": -0.7741295099258423, "logits/chosen": -2.249119758605957, "logits/rejected": -2.2597196102142334, "logps/chosen": -5.181360721588135, "logps/rejected": -5.149355888366699, "loss": 5.4553, "nll_loss": 5.37792444229126, "rewards/accuracies": 0.4375, "rewards/chosen": -0.5181360840797424, "rewards/margins": -0.0032004904933273792, "rewards/rejected": -0.5149356126785278, "step": 1 }, { "epoch": 0.0407773176170755, "grad_norm": 9.501357078552246, "learning_rate": 2e-05, "log_odds_chosen": -0.044936653226614, "log_odds_ratio": -0.7695960402488708, "logits/chosen": -2.2498621940612793, "logits/rejected": -2.260127067565918, "logps/chosen": -5.01981258392334, "logps/rejected": -4.974053382873535, "loss": 5.3213, "nll_loss": 5.2443060874938965, "rewards/accuracies": 0.53125, "rewards/chosen": -0.501981258392334, "rewards/margins": -0.0045759049244225025, "rewards/rejected": -0.49740538001060486, "step": 2 }, { "epoch": 0.06116597642561325, "grad_norm": 9.111624717712402, "learning_rate": 3e-05, "log_odds_chosen": -0.008361914195120335, "log_odds_ratio": -0.7590417265892029, "logits/chosen": -2.287618398666382, "logits/rejected": -2.3050174713134766, "logps/chosen": -4.931052207946777, "logps/rejected": -4.922361850738525, "loss": 5.195, "nll_loss": 5.119076251983643, "rewards/accuracies": 0.453125, "rewards/chosen": -0.4931052029132843, "rewards/margins": -0.0008690543472766876, "rewards/rejected": -0.4922361373901367, "step": 3 }, { "epoch": 0.081554635234151, "grad_norm": 9.194869995117188, "learning_rate": 4e-05, "log_odds_chosen": -0.06829527020454407, "log_odds_ratio": -0.7628177404403687, "logits/chosen": -2.2833240032196045, "logits/rejected": -2.2870724201202393, "logps/chosen": -4.916200160980225, "logps/rejected": -4.847894668579102, "loss": 5.1872, "nll_loss": 5.110929489135742, "rewards/accuracies": 0.421875, "rewards/chosen": -0.4916200041770935, "rewards/margins": -0.006830527447164059, "rewards/rejected": -0.48478949069976807, "step": 4 }, { "epoch": 0.10194329404268876, "grad_norm": 9.331730842590332, "learning_rate": 5e-05, "log_odds_chosen": -0.07571306079626083, "log_odds_ratio": -0.7841904163360596, "logits/chosen": -2.3229925632476807, "logits/rejected": -2.337273120880127, "logps/chosen": -4.687668323516846, "logps/rejected": -4.612335205078125, "loss": 4.9652, "nll_loss": 4.886806011199951, "rewards/accuracies": 0.5, "rewards/chosen": -0.46876680850982666, "rewards/margins": -0.0075332350097596645, "rewards/rejected": -0.46123358607292175, "step": 5 }, { "epoch": 0.1223319528512265, "grad_norm": 9.857681274414062, "learning_rate": 6e-05, "log_odds_chosen": -0.19471649825572968, "log_odds_ratio": -0.8327277302742004, "logits/chosen": -2.3558108806610107, "logits/rejected": -2.3881752490997314, "logps/chosen": -4.344555377960205, "logps/rejected": -4.152055263519287, "loss": 4.6379, "nll_loss": 4.554634094238281, "rewards/accuracies": 0.40625, "rewards/chosen": -0.4344555735588074, "rewards/margins": -0.019250018522143364, "rewards/rejected": -0.41520553827285767, "step": 6 }, { "epoch": 0.14272061165976427, "grad_norm": 7.811158657073975, "learning_rate": 7e-05, "log_odds_chosen": -0.0166391022503376, "log_odds_ratio": -0.7380757331848145, "logits/chosen": -2.388141632080078, "logits/rejected": -2.4195711612701416, "logps/chosen": -3.969034194946289, "logps/rejected": -3.951852798461914, "loss": 4.2265, "nll_loss": 4.152710914611816, "rewards/accuracies": 0.53125, "rewards/chosen": -0.39690345525741577, "rewards/margins": -0.0017181318253278732, "rewards/rejected": -0.39518529176712036, "step": 7 }, { "epoch": 0.163109270468302, "grad_norm": 7.66775369644165, "learning_rate": 8e-05, "log_odds_chosen": 0.02697194740176201, "log_odds_ratio": -0.7153270840644836, "logits/chosen": -2.477844476699829, "logits/rejected": -2.474121570587158, "logps/chosen": -3.4541428089141846, "logps/rejected": -3.4769277572631836, "loss": 3.6939, "nll_loss": 3.6224091053009033, "rewards/accuracies": 0.53125, "rewards/chosen": -0.34541425108909607, "rewards/margins": 0.0022785186301916838, "rewards/rejected": -0.3476927876472473, "step": 8 }, { "epoch": 0.18349792927683975, "grad_norm": 6.6126298904418945, "learning_rate": 9e-05, "log_odds_chosen": 0.16790322959423065, "log_odds_ratio": -0.6347489356994629, "logits/chosen": -2.5679516792297363, "logits/rejected": -2.5715692043304443, "logps/chosen": -2.9835710525512695, "logps/rejected": -3.140634059906006, "loss": 3.2274, "nll_loss": 3.163942813873291, "rewards/accuracies": 0.625, "rewards/chosen": -0.2983570992946625, "rewards/margins": 0.015706289559602737, "rewards/rejected": -0.3140634000301361, "step": 9 }, { "epoch": 0.20388658808537752, "grad_norm": 6.9558281898498535, "learning_rate": 0.0001, "log_odds_chosen": 0.17972886562347412, "log_odds_ratio": -0.6277650594711304, "logits/chosen": -2.6125497817993164, "logits/rejected": -2.6118133068084717, "logps/chosen": -2.6501834392547607, "logps/rejected": -2.8179259300231934, "loss": 2.8684, "nll_loss": 2.8056435585021973, "rewards/accuracies": 0.703125, "rewards/chosen": -0.26501837372779846, "rewards/margins": 0.01677425391972065, "rewards/rejected": -0.2817925810813904, "step": 10 }, { "epoch": 0.22427524689391526, "grad_norm": 7.142885684967041, "learning_rate": 9.743589743589744e-05, "log_odds_chosen": 0.11056404560804367, "log_odds_ratio": -0.662803053855896, "logits/chosen": -2.665982723236084, "logits/rejected": -2.6722326278686523, "logps/chosen": -2.1527411937713623, "logps/rejected": -2.2486300468444824, "loss": 2.383, "nll_loss": 2.316676378250122, "rewards/accuracies": 0.625, "rewards/chosen": -0.2152741551399231, "rewards/margins": 0.009588859975337982, "rewards/rejected": -0.2248629927635193, "step": 11 }, { "epoch": 0.244663905702453, "grad_norm": 4.808487415313721, "learning_rate": 9.487179487179487e-05, "log_odds_chosen": 0.09381386637687683, "log_odds_ratio": -0.6690701842308044, "logits/chosen": -2.6697304248809814, "logits/rejected": -2.684809446334839, "logps/chosen": -1.6216576099395752, "logps/rejected": -1.695598840713501, "loss": 1.9113, "nll_loss": 1.8443692922592163, "rewards/accuracies": 0.53125, "rewards/chosen": -0.16216576099395752, "rewards/margins": 0.007394128944724798, "rewards/rejected": -0.16955989599227905, "step": 12 }, { "epoch": 0.26505256451099074, "grad_norm": 2.420715093612671, "learning_rate": 9.230769230769232e-05, "log_odds_chosen": 0.10657332092523575, "log_odds_ratio": -0.6556077599525452, "logits/chosen": -2.683443784713745, "logits/rejected": -2.6935503482818604, "logps/chosen": -1.4284594058990479, "logps/rejected": -1.508366584777832, "loss": 1.7205, "nll_loss": 1.654909372329712, "rewards/accuracies": 0.65625, "rewards/chosen": -0.14284594357013702, "rewards/margins": 0.007990704849362373, "rewards/rejected": -0.15083666145801544, "step": 13 }, { "epoch": 0.28544122331952854, "grad_norm": 2.851985216140747, "learning_rate": 8.974358974358975e-05, "log_odds_chosen": 0.156551793217659, "log_odds_ratio": -0.6308416128158569, "logits/chosen": -2.6921679973602295, "logits/rejected": -2.6880078315734863, "logps/chosen": -1.332141637802124, "logps/rejected": -1.4488908052444458, "loss": 1.5952, "nll_loss": 1.5321555137634277, "rewards/accuracies": 0.671875, "rewards/chosen": -0.13321417570114136, "rewards/margins": 0.01167491264641285, "rewards/rejected": -0.14488908648490906, "step": 14 }, { "epoch": 0.3058298821280663, "grad_norm": 2.4120404720306396, "learning_rate": 8.717948717948718e-05, "log_odds_chosen": 0.0856461226940155, "log_odds_ratio": -0.6610275506973267, "logits/chosen": -2.6284494400024414, "logits/rejected": -2.6575706005096436, "logps/chosen": -1.342667579650879, "logps/rejected": -1.4053808450698853, "loss": 1.5791, "nll_loss": 1.5130078792572021, "rewards/accuracies": 0.671875, "rewards/chosen": -0.13426676392555237, "rewards/margins": 0.006271325517445803, "rewards/rejected": -0.1405380815267563, "step": 15 }, { "epoch": 0.326218540936604, "grad_norm": 2.2445013523101807, "learning_rate": 8.461538461538461e-05, "log_odds_chosen": 0.18759144842624664, "log_odds_ratio": -0.617063045501709, "logits/chosen": -2.574859857559204, "logits/rejected": -2.5844004154205322, "logps/chosen": -1.24087393283844, "logps/rejected": -1.3730320930480957, "loss": 1.5016, "nll_loss": 1.439911127090454, "rewards/accuracies": 0.703125, "rewards/chosen": -0.1240873858332634, "rewards/margins": 0.013215810991823673, "rewards/rejected": -0.1373032033443451, "step": 16 }, { "epoch": 0.34660719974514176, "grad_norm": 2.1423990726470947, "learning_rate": 8.205128205128205e-05, "log_odds_chosen": 0.2600902020931244, "log_odds_ratio": -0.5869597792625427, "logits/chosen": -2.4825172424316406, "logits/rejected": -2.4918212890625, "logps/chosen": -1.1209421157836914, "logps/rejected": -1.2962286472320557, "loss": 1.4016, "nll_loss": 1.3428654670715332, "rewards/accuracies": 0.8125, "rewards/chosen": -0.1120942085981369, "rewards/margins": 0.01752866432070732, "rewards/rejected": -0.12962287664413452, "step": 17 }, { "epoch": 0.3669958585536795, "grad_norm": 2.0624334812164307, "learning_rate": 7.948717948717948e-05, "log_odds_chosen": 0.19398407638072968, "log_odds_ratio": -0.6082965731620789, "logits/chosen": -2.403656482696533, "logits/rejected": -2.4159321784973145, "logps/chosen": -1.0768545866012573, "logps/rejected": -1.204436182975769, "loss": 1.3427, "nll_loss": 1.2818515300750732, "rewards/accuracies": 0.796875, "rewards/chosen": -0.10768546164035797, "rewards/margins": 0.012758150696754456, "rewards/rejected": -0.12044361233711243, "step": 18 }, { "epoch": 0.38738451736221724, "grad_norm": 2.0077810287475586, "learning_rate": 7.692307692307693e-05, "log_odds_chosen": 0.2774355113506317, "log_odds_ratio": -0.5771675109863281, "logits/chosen": -2.32011079788208, "logits/rejected": -2.309196949005127, "logps/chosen": -0.982606828212738, "logps/rejected": -1.1639189720153809, "loss": 1.272, "nll_loss": 1.2142971754074097, "rewards/accuracies": 0.828125, "rewards/chosen": -0.09826068580150604, "rewards/margins": 0.01813122071325779, "rewards/rejected": -0.11639191210269928, "step": 19 }, { "epoch": 0.40777317617075504, "grad_norm": 1.5790979862213135, "learning_rate": 7.435897435897436e-05, "log_odds_chosen": 0.3015543520450592, "log_odds_ratio": -0.571921706199646, "logits/chosen": -2.204582691192627, "logits/rejected": -2.2353363037109375, "logps/chosen": -0.8750602006912231, "logps/rejected": -1.055103063583374, "loss": 1.1722, "nll_loss": 1.1150364875793457, "rewards/accuracies": 0.828125, "rewards/chosen": -0.08750602602958679, "rewards/margins": 0.018004287034273148, "rewards/rejected": -0.10551030933856964, "step": 20 }, { "epoch": 0.4281618349792928, "grad_norm": 1.4550942182540894, "learning_rate": 7.17948717948718e-05, "log_odds_chosen": 0.2803484797477722, "log_odds_ratio": -0.5891110897064209, "logits/chosen": -2.177445888519287, "logits/rejected": -2.1862730979919434, "logps/chosen": -0.8740922808647156, "logps/rejected": -1.0376415252685547, "loss": 1.184, "nll_loss": 1.125113606452942, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08740923553705215, "rewards/margins": 0.01635492593050003, "rewards/rejected": -0.10376415401697159, "step": 21 }, { "epoch": 0.4485504937878305, "grad_norm": 1.5131646394729614, "learning_rate": 6.923076923076924e-05, "log_odds_chosen": 0.3196752965450287, "log_odds_ratio": -0.5673432350158691, "logits/chosen": -2.139277458190918, "logits/rejected": -2.1643970012664795, "logps/chosen": -0.8622347116470337, "logps/rejected": -1.060903549194336, "loss": 1.1375, "nll_loss": 1.0807565450668335, "rewards/accuracies": 0.78125, "rewards/chosen": -0.08622346818447113, "rewards/margins": 0.019866881892085075, "rewards/rejected": -0.10609035938978195, "step": 22 }, { "epoch": 0.46893915259636826, "grad_norm": 1.7129428386688232, "learning_rate": 6.666666666666667e-05, "log_odds_chosen": 0.3558296263217926, "log_odds_ratio": -0.551045298576355, "logits/chosen": -2.1384575366973877, "logits/rejected": -2.1461870670318604, "logps/chosen": -0.8587465286254883, "logps/rejected": -1.0661779642105103, "loss": 1.1327, "nll_loss": 1.0775768756866455, "rewards/accuracies": 0.8125, "rewards/chosen": -0.08587465435266495, "rewards/margins": 0.02074313722550869, "rewards/rejected": -0.10661779344081879, "step": 23 }, { "epoch": 0.489327811404906, "grad_norm": 1.7440029382705688, "learning_rate": 6.410256410256412e-05, "log_odds_chosen": 0.32858026027679443, "log_odds_ratio": -0.5619024038314819, "logits/chosen": -2.1546478271484375, "logits/rejected": -2.1749908924102783, "logps/chosen": -0.835049033164978, "logps/rejected": -1.0198912620544434, "loss": 1.1057, "nll_loss": 1.0495383739471436, "rewards/accuracies": 0.78125, "rewards/chosen": -0.08350490033626556, "rewards/margins": 0.01848422922194004, "rewards/rejected": -0.10198913514614105, "step": 24 }, { "epoch": 0.5097164702134438, "grad_norm": 1.9139939546585083, "learning_rate": 6.153846153846155e-05, "log_odds_chosen": 0.32062438130378723, "log_odds_ratio": -0.5715374946594238, "logits/chosen": -2.172311305999756, "logits/rejected": -2.1929304599761963, "logps/chosen": -0.8502154350280762, "logps/rejected": -1.0373834371566772, "loss": 1.1209, "nll_loss": 1.0637274980545044, "rewards/accuracies": 0.75, "rewards/chosen": -0.08502154797315598, "rewards/margins": 0.018716804683208466, "rewards/rejected": -0.10373835265636444, "step": 25 }, { "epoch": 0.5301051290219815, "grad_norm": 2.14544415473938, "learning_rate": 5.897435897435898e-05, "log_odds_chosen": 0.38874953985214233, "log_odds_ratio": -0.5423570275306702, "logits/chosen": -2.2168030738830566, "logits/rejected": -2.2049760818481445, "logps/chosen": -0.8020574450492859, "logps/rejected": -1.0160338878631592, "loss": 1.068, "nll_loss": 1.013757348060608, "rewards/accuracies": 0.84375, "rewards/chosen": -0.08020574599504471, "rewards/margins": 0.02139764279127121, "rewards/rejected": -0.10160338878631592, "step": 26 }, { "epoch": 0.5504937878305193, "grad_norm": 2.258091449737549, "learning_rate": 5.6410256410256414e-05, "log_odds_chosen": 0.44487276673316956, "log_odds_ratio": -0.5278628468513489, "logits/chosen": -2.2525055408477783, "logits/rejected": -2.233915328979492, "logps/chosen": -0.7687065005302429, "logps/rejected": -1.0176327228546143, "loss": 1.0399, "nll_loss": 0.9870870113372803, "rewards/accuracies": 0.796875, "rewards/chosen": -0.07687065005302429, "rewards/margins": 0.02489262819290161, "rewards/rejected": -0.1017632856965065, "step": 27 }, { "epoch": 0.5708824466390571, "grad_norm": 2.6239125728607178, "learning_rate": 5.384615384615385e-05, "log_odds_chosen": 0.38128751516342163, "log_odds_ratio": -0.55021071434021, "logits/chosen": -2.2505478858947754, "logits/rejected": -2.2465157508850098, "logps/chosen": -0.7138630151748657, "logps/rejected": -0.9125551581382751, "loss": 1.0329, "nll_loss": 0.977867603302002, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07138630747795105, "rewards/margins": 0.019869212061166763, "rewards/rejected": -0.09125552326440811, "step": 28 }, { "epoch": 0.5912711054475948, "grad_norm": 1.6056082248687744, "learning_rate": 5.128205128205128e-05, "log_odds_chosen": 0.5487304925918579, "log_odds_ratio": -0.478320449590683, "logits/chosen": -2.2418763637542725, "logits/rejected": -2.2638566493988037, "logps/chosen": -0.7001346349716187, "logps/rejected": -1.0050172805786133, "loss": 0.9538, "nll_loss": 0.9059388041496277, "rewards/accuracies": 0.921875, "rewards/chosen": -0.07001346349716187, "rewards/margins": 0.030488261952996254, "rewards/rejected": -0.10050173103809357, "step": 29 }, { "epoch": 0.6116597642561326, "grad_norm": 2.707125663757324, "learning_rate": 4.871794871794872e-05, "log_odds_chosen": 0.433153361082077, "log_odds_ratio": -0.5276967883110046, "logits/chosen": -2.2189157009124756, "logits/rejected": -2.228149890899658, "logps/chosen": -0.6423375606536865, "logps/rejected": -0.8684220910072327, "loss": 0.943, "nll_loss": 0.89023357629776, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06423375755548477, "rewards/margins": 0.022608455270528793, "rewards/rejected": -0.08684220910072327, "step": 30 }, { "epoch": 0.6320484230646702, "grad_norm": 3.1514434814453125, "learning_rate": 4.615384615384616e-05, "log_odds_chosen": 0.3669428527355194, "log_odds_ratio": -0.5465207695960999, "logits/chosen": -2.243276357650757, "logits/rejected": -2.231745719909668, "logps/chosen": -0.7233006954193115, "logps/rejected": -0.9113064408302307, "loss": 1.0016, "nll_loss": 0.9469515681266785, "rewards/accuracies": 0.8125, "rewards/chosen": -0.07233007252216339, "rewards/margins": 0.018800577148795128, "rewards/rejected": -0.09113065153360367, "step": 31 }, { "epoch": 0.652437081873208, "grad_norm": 2.5206167697906494, "learning_rate": 4.358974358974359e-05, "log_odds_chosen": 0.5872430205345154, "log_odds_ratio": -0.46654394268989563, "logits/chosen": -2.2469942569732666, "logits/rejected": -2.269362211227417, "logps/chosen": -0.6239035725593567, "logps/rejected": -0.9376953840255737, "loss": 0.9061, "nll_loss": 0.8594872951507568, "rewards/accuracies": 0.875, "rewards/chosen": -0.06239035725593567, "rewards/margins": 0.03137918934226036, "rewards/rejected": -0.09376954287290573, "step": 32 }, { "epoch": 0.6728257406817458, "grad_norm": 1.3284610509872437, "learning_rate": 4.1025641025641023e-05, "log_odds_chosen": 0.5279171466827393, "log_odds_ratio": -0.49398207664489746, "logits/chosen": -2.263728618621826, "logits/rejected": -2.288696050643921, "logps/chosen": -0.6505022644996643, "logps/rejected": -0.9282124042510986, "loss": 0.9288, "nll_loss": 0.8793907761573792, "rewards/accuracies": 0.890625, "rewards/chosen": -0.06505022943019867, "rewards/margins": 0.027771014720201492, "rewards/rejected": -0.09282123297452927, "step": 33 }, { "epoch": 0.6932143994902835, "grad_norm": 1.2634799480438232, "learning_rate": 3.846153846153846e-05, "log_odds_chosen": 0.5196709036827087, "log_odds_ratio": -0.5028817653656006, "logits/chosen": -2.3093760013580322, "logits/rejected": -2.3152754306793213, "logps/chosen": -0.6717790365219116, "logps/rejected": -0.9456109404563904, "loss": 0.9413, "nll_loss": 0.8909698724746704, "rewards/accuracies": 0.796875, "rewards/chosen": -0.0671778991818428, "rewards/margins": 0.027383197098970413, "rewards/rejected": -0.09456109255552292, "step": 34 }, { "epoch": 0.7136030582988213, "grad_norm": 1.2822017669677734, "learning_rate": 3.58974358974359e-05, "log_odds_chosen": 0.44004446268081665, "log_odds_ratio": -0.53327876329422, "logits/chosen": -2.3234097957611084, "logits/rejected": -2.319546937942505, "logps/chosen": -0.7039386630058289, "logps/rejected": -0.9365054965019226, "loss": 0.9674, "nll_loss": 0.9140487909317017, "rewards/accuracies": 0.796875, "rewards/chosen": -0.070393867790699, "rewards/margins": 0.023256685584783554, "rewards/rejected": -0.09365054965019226, "step": 35 }, { "epoch": 0.733991717107359, "grad_norm": 1.3868314027786255, "learning_rate": 3.3333333333333335e-05, "log_odds_chosen": 0.40167126059532166, "log_odds_ratio": -0.5467191338539124, "logits/chosen": -2.3325843811035156, "logits/rejected": -2.3239898681640625, "logps/chosen": -0.7655161619186401, "logps/rejected": -0.9702532291412354, "loss": 0.9815, "nll_loss": 0.9267975091934204, "rewards/accuracies": 0.765625, "rewards/chosen": -0.07655161619186401, "rewards/margins": 0.02047371119260788, "rewards/rejected": -0.0970253199338913, "step": 36 } ], "logging_steps": 1, "max_steps": 49, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }