{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 27, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.037037037037037035, "grad_norm": 27.55777176285727, "learning_rate": 1.6666666666666665e-07, "logits/chosen": -1.3260200023651123, "logits/rejected": -1.3555822372436523, "logps/chosen": -620.6215209960938, "logps/rejected": -538.2889404296875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.07407407407407407, "grad_norm": 25.11934361945706, "learning_rate": 3.333333333333333e-07, "logits/chosen": -1.3862979412078857, "logits/rejected": -1.418839931488037, "logps/chosen": -548.8743286132812, "logps/rejected": -502.6883544921875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 2 }, { "epoch": 0.1111111111111111, "grad_norm": 26.735838110076433, "learning_rate": 5e-07, "logits/chosen": -1.3722721338272095, "logits/rejected": -1.4019339084625244, "logps/chosen": -593.66552734375, "logps/rejected": -518.8505249023438, "loss": 0.6926, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0003613852895796299, "rewards/margins": 0.002076806966215372, "rewards/rejected": -0.001715421793051064, "step": 3 }, { "epoch": 0.14814814814814814, "grad_norm": 27.831815566219696, "learning_rate": 4.978612153434526e-07, "logits/chosen": -1.3279492855072021, "logits/rejected": -1.4584531784057617, "logps/chosen": -616.8908081054688, "logps/rejected": -478.5179443359375, "loss": 0.6906, "rewards/accuracies": 0.625, "rewards/chosen": 0.002412938978523016, "rewards/margins": 0.00437101349234581, "rewards/rejected": -0.001958074513822794, "step": 4 }, { "epoch": 0.18518518518518517, "grad_norm": 26.45387550515481, "learning_rate": 4.91481456572267e-07, "logits/chosen": -1.2913222312927246, "logits/rejected": -1.4004031419754028, "logps/chosen": -596.9254150390625, "logps/rejected": -491.03643798828125, "loss": 0.6846, "rewards/accuracies": 1.0, "rewards/chosen": 0.012376622296869755, "rewards/margins": 0.019729193300008774, "rewards/rejected": -0.007352571468800306, "step": 5 }, { "epoch": 0.2222222222222222, "grad_norm": 26.113162913083638, "learning_rate": 4.809698831278217e-07, "logits/chosen": -1.2476236820220947, "logits/rejected": -1.3365733623504639, "logps/chosen": -576.7242431640625, "logps/rejected": -499.41351318359375, "loss": 0.6689, "rewards/accuracies": 0.9375, "rewards/chosen": 0.014585882425308228, "rewards/margins": 0.039108239114284515, "rewards/rejected": -0.024522356688976288, "step": 6 }, { "epoch": 0.25925925925925924, "grad_norm": 24.212448317206544, "learning_rate": 4.6650635094610966e-07, "logits/chosen": -1.3422677516937256, "logits/rejected": -1.4066834449768066, "logps/chosen": -550.8783569335938, "logps/rejected": -444.896484375, "loss": 0.6611, "rewards/accuracies": 0.875, "rewards/chosen": 0.01903172954916954, "rewards/margins": 0.05026581883430481, "rewards/rejected": -0.031234093010425568, "step": 7 }, { "epoch": 0.2962962962962963, "grad_norm": 25.92207003651225, "learning_rate": 4.483383350728088e-07, "logits/chosen": -1.155773401260376, "logits/rejected": -1.2405778169631958, "logps/chosen": -660.4547119140625, "logps/rejected": -549.8304443359375, "loss": 0.6308, "rewards/accuracies": 1.0, "rewards/chosen": 0.04862784221768379, "rewards/margins": 0.11470220237970352, "rewards/rejected": -0.06607436388731003, "step": 8 }, { "epoch": 0.3333333333333333, "grad_norm": 22.804718326627064, "learning_rate": 4.2677669529663686e-07, "logits/chosen": -1.1948490142822266, "logits/rejected": -1.2298305034637451, "logps/chosen": -661.9083251953125, "logps/rejected": -571.0025634765625, "loss": 0.6201, "rewards/accuracies": 1.0, "rewards/chosen": 0.05401439964771271, "rewards/margins": 0.20858871936798096, "rewards/rejected": -0.15457431972026825, "step": 9 }, { "epoch": 0.37037037037037035, "grad_norm": 21.49943323053994, "learning_rate": 4.0219035725218013e-07, "logits/chosen": -1.2277066707611084, "logits/rejected": -1.2695496082305908, "logps/chosen": -664.5496826171875, "logps/rejected": -551.2160034179688, "loss": 0.5948, "rewards/accuracies": 1.0, "rewards/chosen": 0.05593748763203621, "rewards/margins": 0.2475208044052124, "rewards/rejected": -0.1915833204984665, "step": 10 }, { "epoch": 0.4074074074074074, "grad_norm": 21.083207323643638, "learning_rate": 3.75e-07, "logits/chosen": -1.2142881155014038, "logits/rejected": -1.2608280181884766, "logps/chosen": -493.3670654296875, "logps/rejected": -460.8230285644531, "loss": 0.5937, "rewards/accuracies": 1.0, "rewards/chosen": -0.02144962176680565, "rewards/margins": 0.19565340876579285, "rewards/rejected": -0.2171030193567276, "step": 11 }, { "epoch": 0.4444444444444444, "grad_norm": 22.30950608958288, "learning_rate": 3.4567085809127245e-07, "logits/chosen": -1.1867148876190186, "logits/rejected": -1.204742431640625, "logps/chosen": -518.848876953125, "logps/rejected": -477.93121337890625, "loss": 0.5571, "rewards/accuracies": 0.9375, "rewards/chosen": -0.030759546905755997, "rewards/margins": 0.27591627836227417, "rewards/rejected": -0.3066757917404175, "step": 12 }, { "epoch": 0.48148148148148145, "grad_norm": 21.647385072039942, "learning_rate": 3.147047612756302e-07, "logits/chosen": -1.1367645263671875, "logits/rejected": -1.1599252223968506, "logps/chosen": -575.5039672851562, "logps/rejected": -499.92059326171875, "loss": 0.5172, "rewards/accuracies": 1.0, "rewards/chosen": -0.07214581221342087, "rewards/margins": 0.3617976903915405, "rewards/rejected": -0.4339434802532196, "step": 13 }, { "epoch": 0.5185185185185185, "grad_norm": 21.79316246052631, "learning_rate": 2.826315480550129e-07, "logits/chosen": -1.077150821685791, "logits/rejected": -1.1288504600524902, "logps/chosen": -556.2161865234375, "logps/rejected": -495.59539794921875, "loss": 0.4981, "rewards/accuracies": 0.9375, "rewards/chosen": -0.12262667715549469, "rewards/margins": 0.3966625928878784, "rewards/rejected": -0.5192892551422119, "step": 14 }, { "epoch": 0.5555555555555556, "grad_norm": 21.27092910668923, "learning_rate": 2.5e-07, "logits/chosen": -1.135519027709961, "logits/rejected": -1.1375882625579834, "logps/chosen": -600.9283447265625, "logps/rejected": -557.6646118164062, "loss": 0.4817, "rewards/accuracies": 1.0, "rewards/chosen": -0.17058339715003967, "rewards/margins": 0.44923293590545654, "rewards/rejected": -0.6198163628578186, "step": 15 }, { "epoch": 0.5925925925925926, "grad_norm": 20.418108861546624, "learning_rate": 2.1736845194498716e-07, "logits/chosen": -1.065222144126892, "logits/rejected": -1.1098757982254028, "logps/chosen": -516.3076782226562, "logps/rejected": -414.37371826171875, "loss": 0.4632, "rewards/accuracies": 0.875, "rewards/chosen": -0.3122747242450714, "rewards/margins": 0.4216526746749878, "rewards/rejected": -0.7339274287223816, "step": 16 }, { "epoch": 0.6296296296296297, "grad_norm": 19.79277850701313, "learning_rate": 1.8529523872436977e-07, "logits/chosen": -1.017812967300415, "logits/rejected": -1.0660429000854492, "logps/chosen": -720.6757202148438, "logps/rejected": -684.0345458984375, "loss": 0.4552, "rewards/accuracies": 1.0, "rewards/chosen": -0.32912153005599976, "rewards/margins": 0.5706872940063477, "rewards/rejected": -0.8998088240623474, "step": 17 }, { "epoch": 0.6666666666666666, "grad_norm": 20.42225137733936, "learning_rate": 1.5432914190872756e-07, "logits/chosen": -1.0442301034927368, "logits/rejected": -1.0349968671798706, "logps/chosen": -747.7850952148438, "logps/rejected": -685.4484252929688, "loss": 0.4673, "rewards/accuracies": 0.875, "rewards/chosen": -0.3863644599914551, "rewards/margins": 0.5358256101608276, "rewards/rejected": -0.9221900701522827, "step": 18 }, { "epoch": 0.7037037037037037, "grad_norm": 22.47255209023482, "learning_rate": 1.2500000000000005e-07, "logits/chosen": -1.1190571784973145, "logits/rejected": -1.1354550123214722, "logps/chosen": -685.4273681640625, "logps/rejected": -646.656005859375, "loss": 0.4169, "rewards/accuracies": 1.0, "rewards/chosen": -0.4319349527359009, "rewards/margins": 0.7433109879493713, "rewards/rejected": -1.175246000289917, "step": 19 }, { "epoch": 0.7407407407407407, "grad_norm": 20.30453136903234, "learning_rate": 9.780964274781983e-08, "logits/chosen": -0.9758723974227905, "logits/rejected": -0.9760102033615112, "logps/chosen": -701.876220703125, "logps/rejected": -603.935546875, "loss": 0.4295, "rewards/accuracies": 0.9375, "rewards/chosen": -0.4580194056034088, "rewards/margins": 0.6639784574508667, "rewards/rejected": -1.1219978332519531, "step": 20 }, { "epoch": 0.7777777777777778, "grad_norm": 19.920032346717264, "learning_rate": 7.322330470336313e-08, "logits/chosen": -1.0556690692901611, "logits/rejected": -1.038096308708191, "logps/chosen": -790.4100952148438, "logps/rejected": -738.6947021484375, "loss": 0.3989, "rewards/accuracies": 1.0, "rewards/chosen": -0.5596134662628174, "rewards/margins": 0.6681773662567139, "rewards/rejected": -1.2277909517288208, "step": 21 }, { "epoch": 0.8148148148148148, "grad_norm": 19.811761984641322, "learning_rate": 5.166166492719124e-08, "logits/chosen": -1.0022271871566772, "logits/rejected": -0.9993726015090942, "logps/chosen": -648.9810180664062, "logps/rejected": -578.921142578125, "loss": 0.3949, "rewards/accuracies": 1.0, "rewards/chosen": -0.5851563215255737, "rewards/margins": 0.786234974861145, "rewards/rejected": -1.3713912963867188, "step": 22 }, { "epoch": 0.8518518518518519, "grad_norm": 19.77551914709964, "learning_rate": 3.349364905389032e-08, "logits/chosen": -0.924078106880188, "logits/rejected": -0.9447305202484131, "logps/chosen": -727.5433959960938, "logps/rejected": -727.569580078125, "loss": 0.3777, "rewards/accuracies": 1.0, "rewards/chosen": -0.3620569705963135, "rewards/margins": 1.0265687704086304, "rewards/rejected": -1.3886257410049438, "step": 23 }, { "epoch": 0.8888888888888888, "grad_norm": 20.99090983878074, "learning_rate": 1.9030116872178314e-08, "logits/chosen": -0.9339395761489868, "logits/rejected": -1.002177357673645, "logps/chosen": -772.19921875, "logps/rejected": -641.7393798828125, "loss": 0.3809, "rewards/accuracies": 1.0, "rewards/chosen": -0.4156669080257416, "rewards/margins": 0.8427646160125732, "rewards/rejected": -1.2584315538406372, "step": 24 }, { "epoch": 0.9259259259259259, "grad_norm": 19.60455577981017, "learning_rate": 8.518543427732949e-09, "logits/chosen": -1.0230871438980103, "logits/rejected": -0.9638053178787231, "logps/chosen": -676.6509399414062, "logps/rejected": -698.9414672851562, "loss": 0.3781, "rewards/accuracies": 1.0, "rewards/chosen": -0.5820339918136597, "rewards/margins": 0.7440013885498047, "rewards/rejected": -1.326035499572754, "step": 25 }, { "epoch": 0.9629629629629629, "grad_norm": 20.184672646004614, "learning_rate": 2.1387846565474044e-09, "logits/chosen": -0.9462152123451233, "logits/rejected": -0.9009606242179871, "logps/chosen": -672.621337890625, "logps/rejected": -633.5792236328125, "loss": 0.3642, "rewards/accuracies": 0.875, "rewards/chosen": -0.6688261032104492, "rewards/margins": 0.8014542460441589, "rewards/rejected": -1.470280408859253, "step": 26 }, { "epoch": 1.0, "grad_norm": 19.209616333813695, "learning_rate": 0.0, "logits/chosen": -1.027219533920288, "logits/rejected": -1.0129032135009766, "logps/chosen": -630.52880859375, "logps/rejected": -564.9932250976562, "loss": 0.3937, "rewards/accuracies": 0.9375, "rewards/chosen": -0.5863191485404968, "rewards/margins": 0.763579249382019, "rewards/rejected": -1.349898338317871, "step": 27 }, { "epoch": 1.0, "step": 27, "total_flos": 0.0, "train_loss": 0.525856320504789, "train_runtime": 769.8946, "train_samples_per_second": 2.244, "train_steps_per_second": 0.035 } ], "logging_steps": 1.0, "max_steps": 27, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }