|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 27, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.037037037037037035, |
|
"grad_norm": 27.55777176285727, |
|
"learning_rate": 1.6666666666666665e-07, |
|
"logits/chosen": -1.3260200023651123, |
|
"logits/rejected": -1.3555822372436523, |
|
"logps/chosen": -620.6215209960938, |
|
"logps/rejected": -538.2889404296875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07407407407407407, |
|
"grad_norm": 25.11934361945706, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/chosen": -1.3862979412078857, |
|
"logits/rejected": -1.418839931488037, |
|
"logps/chosen": -548.8743286132812, |
|
"logps/rejected": -502.6883544921875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.1111111111111111, |
|
"grad_norm": 26.735838110076433, |
|
"learning_rate": 5e-07, |
|
"logits/chosen": -1.3722721338272095, |
|
"logits/rejected": -1.4019339084625244, |
|
"logps/chosen": -593.66552734375, |
|
"logps/rejected": -518.8505249023438, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0003613852895796299, |
|
"rewards/margins": 0.002076806966215372, |
|
"rewards/rejected": -0.001715421793051064, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.14814814814814814, |
|
"grad_norm": 27.831815566219696, |
|
"learning_rate": 4.978612153434526e-07, |
|
"logits/chosen": -1.3279492855072021, |
|
"logits/rejected": -1.4584531784057617, |
|
"logps/chosen": -616.8908081054688, |
|
"logps/rejected": -478.5179443359375, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.002412938978523016, |
|
"rewards/margins": 0.00437101349234581, |
|
"rewards/rejected": -0.001958074513822794, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.18518518518518517, |
|
"grad_norm": 26.45387550515481, |
|
"learning_rate": 4.91481456572267e-07, |
|
"logits/chosen": -1.2913222312927246, |
|
"logits/rejected": -1.4004031419754028, |
|
"logps/chosen": -596.9254150390625, |
|
"logps/rejected": -491.03643798828125, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.012376622296869755, |
|
"rewards/margins": 0.019729193300008774, |
|
"rewards/rejected": -0.007352571468800306, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 26.113162913083638, |
|
"learning_rate": 4.809698831278217e-07, |
|
"logits/chosen": -1.2476236820220947, |
|
"logits/rejected": -1.3365733623504639, |
|
"logps/chosen": -576.7242431640625, |
|
"logps/rejected": -499.41351318359375, |
|
"loss": 0.6689, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.014585882425308228, |
|
"rewards/margins": 0.039108239114284515, |
|
"rewards/rejected": -0.024522356688976288, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.25925925925925924, |
|
"grad_norm": 24.212448317206544, |
|
"learning_rate": 4.6650635094610966e-07, |
|
"logits/chosen": -1.3422677516937256, |
|
"logits/rejected": -1.4066834449768066, |
|
"logps/chosen": -550.8783569335938, |
|
"logps/rejected": -444.896484375, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.01903172954916954, |
|
"rewards/margins": 0.05026581883430481, |
|
"rewards/rejected": -0.031234093010425568, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.2962962962962963, |
|
"grad_norm": 25.92207003651225, |
|
"learning_rate": 4.483383350728088e-07, |
|
"logits/chosen": -1.155773401260376, |
|
"logits/rejected": -1.2405778169631958, |
|
"logps/chosen": -660.4547119140625, |
|
"logps/rejected": -549.8304443359375, |
|
"loss": 0.6308, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.04862784221768379, |
|
"rewards/margins": 0.11470220237970352, |
|
"rewards/rejected": -0.06607436388731003, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 22.804718326627064, |
|
"learning_rate": 4.2677669529663686e-07, |
|
"logits/chosen": -1.1948490142822266, |
|
"logits/rejected": -1.2298305034637451, |
|
"logps/chosen": -661.9083251953125, |
|
"logps/rejected": -571.0025634765625, |
|
"loss": 0.6201, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.05401439964771271, |
|
"rewards/margins": 0.20858871936798096, |
|
"rewards/rejected": -0.15457431972026825, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.37037037037037035, |
|
"grad_norm": 21.49943323053994, |
|
"learning_rate": 4.0219035725218013e-07, |
|
"logits/chosen": -1.2277066707611084, |
|
"logits/rejected": -1.2695496082305908, |
|
"logps/chosen": -664.5496826171875, |
|
"logps/rejected": -551.2160034179688, |
|
"loss": 0.5948, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.05593748763203621, |
|
"rewards/margins": 0.2475208044052124, |
|
"rewards/rejected": -0.1915833204984665, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.4074074074074074, |
|
"grad_norm": 21.083207323643638, |
|
"learning_rate": 3.75e-07, |
|
"logits/chosen": -1.2142881155014038, |
|
"logits/rejected": -1.2608280181884766, |
|
"logps/chosen": -493.3670654296875, |
|
"logps/rejected": -460.8230285644531, |
|
"loss": 0.5937, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.02144962176680565, |
|
"rewards/margins": 0.19565340876579285, |
|
"rewards/rejected": -0.2171030193567276, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 22.30950608958288, |
|
"learning_rate": 3.4567085809127245e-07, |
|
"logits/chosen": -1.1867148876190186, |
|
"logits/rejected": -1.204742431640625, |
|
"logps/chosen": -518.848876953125, |
|
"logps/rejected": -477.93121337890625, |
|
"loss": 0.5571, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.030759546905755997, |
|
"rewards/margins": 0.27591627836227417, |
|
"rewards/rejected": -0.3066757917404175, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.48148148148148145, |
|
"grad_norm": 21.647385072039942, |
|
"learning_rate": 3.147047612756302e-07, |
|
"logits/chosen": -1.1367645263671875, |
|
"logits/rejected": -1.1599252223968506, |
|
"logps/chosen": -575.5039672851562, |
|
"logps/rejected": -499.92059326171875, |
|
"loss": 0.5172, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.07214581221342087, |
|
"rewards/margins": 0.3617976903915405, |
|
"rewards/rejected": -0.4339434802532196, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.5185185185185185, |
|
"grad_norm": 21.79316246052631, |
|
"learning_rate": 2.826315480550129e-07, |
|
"logits/chosen": -1.077150821685791, |
|
"logits/rejected": -1.1288504600524902, |
|
"logps/chosen": -556.2161865234375, |
|
"logps/rejected": -495.59539794921875, |
|
"loss": 0.4981, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.12262667715549469, |
|
"rewards/margins": 0.3966625928878784, |
|
"rewards/rejected": -0.5192892551422119, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 21.27092910668923, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -1.135519027709961, |
|
"logits/rejected": -1.1375882625579834, |
|
"logps/chosen": -600.9283447265625, |
|
"logps/rejected": -557.6646118164062, |
|
"loss": 0.4817, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17058339715003967, |
|
"rewards/margins": 0.44923293590545654, |
|
"rewards/rejected": -0.6198163628578186, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.5925925925925926, |
|
"grad_norm": 20.418108861546624, |
|
"learning_rate": 2.1736845194498716e-07, |
|
"logits/chosen": -1.065222144126892, |
|
"logits/rejected": -1.1098757982254028, |
|
"logps/chosen": -516.3076782226562, |
|
"logps/rejected": -414.37371826171875, |
|
"loss": 0.4632, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.3122747242450714, |
|
"rewards/margins": 0.4216526746749878, |
|
"rewards/rejected": -0.7339274287223816, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.6296296296296297, |
|
"grad_norm": 19.79277850701313, |
|
"learning_rate": 1.8529523872436977e-07, |
|
"logits/chosen": -1.017812967300415, |
|
"logits/rejected": -1.0660429000854492, |
|
"logps/chosen": -720.6757202148438, |
|
"logps/rejected": -684.0345458984375, |
|
"loss": 0.4552, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.32912153005599976, |
|
"rewards/margins": 0.5706872940063477, |
|
"rewards/rejected": -0.8998088240623474, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 20.42225137733936, |
|
"learning_rate": 1.5432914190872756e-07, |
|
"logits/chosen": -1.0442301034927368, |
|
"logits/rejected": -1.0349968671798706, |
|
"logps/chosen": -747.7850952148438, |
|
"logps/rejected": -685.4484252929688, |
|
"loss": 0.4673, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.3863644599914551, |
|
"rewards/margins": 0.5358256101608276, |
|
"rewards/rejected": -0.9221900701522827, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.7037037037037037, |
|
"grad_norm": 22.47255209023482, |
|
"learning_rate": 1.2500000000000005e-07, |
|
"logits/chosen": -1.1190571784973145, |
|
"logits/rejected": -1.1354550123214722, |
|
"logps/chosen": -685.4273681640625, |
|
"logps/rejected": -646.656005859375, |
|
"loss": 0.4169, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4319349527359009, |
|
"rewards/margins": 0.7433109879493713, |
|
"rewards/rejected": -1.175246000289917, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.7407407407407407, |
|
"grad_norm": 20.30453136903234, |
|
"learning_rate": 9.780964274781983e-08, |
|
"logits/chosen": -0.9758723974227905, |
|
"logits/rejected": -0.9760102033615112, |
|
"logps/chosen": -701.876220703125, |
|
"logps/rejected": -603.935546875, |
|
"loss": 0.4295, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.4580194056034088, |
|
"rewards/margins": 0.6639784574508667, |
|
"rewards/rejected": -1.1219978332519531, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.7777777777777778, |
|
"grad_norm": 19.920032346717264, |
|
"learning_rate": 7.322330470336313e-08, |
|
"logits/chosen": -1.0556690692901611, |
|
"logits/rejected": -1.038096308708191, |
|
"logps/chosen": -790.4100952148438, |
|
"logps/rejected": -738.6947021484375, |
|
"loss": 0.3989, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5596134662628174, |
|
"rewards/margins": 0.6681773662567139, |
|
"rewards/rejected": -1.2277909517288208, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.8148148148148148, |
|
"grad_norm": 19.811761984641322, |
|
"learning_rate": 5.166166492719124e-08, |
|
"logits/chosen": -1.0022271871566772, |
|
"logits/rejected": -0.9993726015090942, |
|
"logps/chosen": -648.9810180664062, |
|
"logps/rejected": -578.921142578125, |
|
"loss": 0.3949, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5851563215255737, |
|
"rewards/margins": 0.786234974861145, |
|
"rewards/rejected": -1.3713912963867188, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.8518518518518519, |
|
"grad_norm": 19.77551914709964, |
|
"learning_rate": 3.349364905389032e-08, |
|
"logits/chosen": -0.924078106880188, |
|
"logits/rejected": -0.9447305202484131, |
|
"logps/chosen": -727.5433959960938, |
|
"logps/rejected": -727.569580078125, |
|
"loss": 0.3777, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3620569705963135, |
|
"rewards/margins": 1.0265687704086304, |
|
"rewards/rejected": -1.3886257410049438, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 20.99090983878074, |
|
"learning_rate": 1.9030116872178314e-08, |
|
"logits/chosen": -0.9339395761489868, |
|
"logits/rejected": -1.002177357673645, |
|
"logps/chosen": -772.19921875, |
|
"logps/rejected": -641.7393798828125, |
|
"loss": 0.3809, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4156669080257416, |
|
"rewards/margins": 0.8427646160125732, |
|
"rewards/rejected": -1.2584315538406372, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.9259259259259259, |
|
"grad_norm": 19.60455577981017, |
|
"learning_rate": 8.518543427732949e-09, |
|
"logits/chosen": -1.0230871438980103, |
|
"logits/rejected": -0.9638053178787231, |
|
"logps/chosen": -676.6509399414062, |
|
"logps/rejected": -698.9414672851562, |
|
"loss": 0.3781, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5820339918136597, |
|
"rewards/margins": 0.7440013885498047, |
|
"rewards/rejected": -1.326035499572754, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.9629629629629629, |
|
"grad_norm": 20.184672646004614, |
|
"learning_rate": 2.1387846565474044e-09, |
|
"logits/chosen": -0.9462152123451233, |
|
"logits/rejected": -0.9009606242179871, |
|
"logps/chosen": -672.621337890625, |
|
"logps/rejected": -633.5792236328125, |
|
"loss": 0.3642, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.6688261032104492, |
|
"rewards/margins": 0.8014542460441589, |
|
"rewards/rejected": -1.470280408859253, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 19.209616333813695, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.027219533920288, |
|
"logits/rejected": -1.0129032135009766, |
|
"logps/chosen": -630.52880859375, |
|
"logps/rejected": -564.9932250976562, |
|
"loss": 0.3937, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.5863191485404968, |
|
"rewards/margins": 0.763579249382019, |
|
"rewards/rejected": -1.349898338317871, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 27, |
|
"total_flos": 0.0, |
|
"train_loss": 0.525856320504789, |
|
"train_runtime": 769.8946, |
|
"train_samples_per_second": 2.244, |
|
"train_steps_per_second": 0.035 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 27, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|