dpo-full-0715 / trainer_state.json
nike00811's picture
Model save
6240a73 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 27,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.037037037037037035,
"grad_norm": 27.55777176285727,
"learning_rate": 1.6666666666666665e-07,
"logits/chosen": -1.3260200023651123,
"logits/rejected": -1.3555822372436523,
"logps/chosen": -620.6215209960938,
"logps/rejected": -538.2889404296875,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.07407407407407407,
"grad_norm": 25.11934361945706,
"learning_rate": 3.333333333333333e-07,
"logits/chosen": -1.3862979412078857,
"logits/rejected": -1.418839931488037,
"logps/chosen": -548.8743286132812,
"logps/rejected": -502.6883544921875,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 2
},
{
"epoch": 0.1111111111111111,
"grad_norm": 26.735838110076433,
"learning_rate": 5e-07,
"logits/chosen": -1.3722721338272095,
"logits/rejected": -1.4019339084625244,
"logps/chosen": -593.66552734375,
"logps/rejected": -518.8505249023438,
"loss": 0.6926,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.0003613852895796299,
"rewards/margins": 0.002076806966215372,
"rewards/rejected": -0.001715421793051064,
"step": 3
},
{
"epoch": 0.14814814814814814,
"grad_norm": 27.831815566219696,
"learning_rate": 4.978612153434526e-07,
"logits/chosen": -1.3279492855072021,
"logits/rejected": -1.4584531784057617,
"logps/chosen": -616.8908081054688,
"logps/rejected": -478.5179443359375,
"loss": 0.6906,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.002412938978523016,
"rewards/margins": 0.00437101349234581,
"rewards/rejected": -0.001958074513822794,
"step": 4
},
{
"epoch": 0.18518518518518517,
"grad_norm": 26.45387550515481,
"learning_rate": 4.91481456572267e-07,
"logits/chosen": -1.2913222312927246,
"logits/rejected": -1.4004031419754028,
"logps/chosen": -596.9254150390625,
"logps/rejected": -491.03643798828125,
"loss": 0.6846,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.012376622296869755,
"rewards/margins": 0.019729193300008774,
"rewards/rejected": -0.007352571468800306,
"step": 5
},
{
"epoch": 0.2222222222222222,
"grad_norm": 26.113162913083638,
"learning_rate": 4.809698831278217e-07,
"logits/chosen": -1.2476236820220947,
"logits/rejected": -1.3365733623504639,
"logps/chosen": -576.7242431640625,
"logps/rejected": -499.41351318359375,
"loss": 0.6689,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.014585882425308228,
"rewards/margins": 0.039108239114284515,
"rewards/rejected": -0.024522356688976288,
"step": 6
},
{
"epoch": 0.25925925925925924,
"grad_norm": 24.212448317206544,
"learning_rate": 4.6650635094610966e-07,
"logits/chosen": -1.3422677516937256,
"logits/rejected": -1.4066834449768066,
"logps/chosen": -550.8783569335938,
"logps/rejected": -444.896484375,
"loss": 0.6611,
"rewards/accuracies": 0.875,
"rewards/chosen": 0.01903172954916954,
"rewards/margins": 0.05026581883430481,
"rewards/rejected": -0.031234093010425568,
"step": 7
},
{
"epoch": 0.2962962962962963,
"grad_norm": 25.92207003651225,
"learning_rate": 4.483383350728088e-07,
"logits/chosen": -1.155773401260376,
"logits/rejected": -1.2405778169631958,
"logps/chosen": -660.4547119140625,
"logps/rejected": -549.8304443359375,
"loss": 0.6308,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.04862784221768379,
"rewards/margins": 0.11470220237970352,
"rewards/rejected": -0.06607436388731003,
"step": 8
},
{
"epoch": 0.3333333333333333,
"grad_norm": 22.804718326627064,
"learning_rate": 4.2677669529663686e-07,
"logits/chosen": -1.1948490142822266,
"logits/rejected": -1.2298305034637451,
"logps/chosen": -661.9083251953125,
"logps/rejected": -571.0025634765625,
"loss": 0.6201,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.05401439964771271,
"rewards/margins": 0.20858871936798096,
"rewards/rejected": -0.15457431972026825,
"step": 9
},
{
"epoch": 0.37037037037037035,
"grad_norm": 21.49943323053994,
"learning_rate": 4.0219035725218013e-07,
"logits/chosen": -1.2277066707611084,
"logits/rejected": -1.2695496082305908,
"logps/chosen": -664.5496826171875,
"logps/rejected": -551.2160034179688,
"loss": 0.5948,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.05593748763203621,
"rewards/margins": 0.2475208044052124,
"rewards/rejected": -0.1915833204984665,
"step": 10
},
{
"epoch": 0.4074074074074074,
"grad_norm": 21.083207323643638,
"learning_rate": 3.75e-07,
"logits/chosen": -1.2142881155014038,
"logits/rejected": -1.2608280181884766,
"logps/chosen": -493.3670654296875,
"logps/rejected": -460.8230285644531,
"loss": 0.5937,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.02144962176680565,
"rewards/margins": 0.19565340876579285,
"rewards/rejected": -0.2171030193567276,
"step": 11
},
{
"epoch": 0.4444444444444444,
"grad_norm": 22.30950608958288,
"learning_rate": 3.4567085809127245e-07,
"logits/chosen": -1.1867148876190186,
"logits/rejected": -1.204742431640625,
"logps/chosen": -518.848876953125,
"logps/rejected": -477.93121337890625,
"loss": 0.5571,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.030759546905755997,
"rewards/margins": 0.27591627836227417,
"rewards/rejected": -0.3066757917404175,
"step": 12
},
{
"epoch": 0.48148148148148145,
"grad_norm": 21.647385072039942,
"learning_rate": 3.147047612756302e-07,
"logits/chosen": -1.1367645263671875,
"logits/rejected": -1.1599252223968506,
"logps/chosen": -575.5039672851562,
"logps/rejected": -499.92059326171875,
"loss": 0.5172,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.07214581221342087,
"rewards/margins": 0.3617976903915405,
"rewards/rejected": -0.4339434802532196,
"step": 13
},
{
"epoch": 0.5185185185185185,
"grad_norm": 21.79316246052631,
"learning_rate": 2.826315480550129e-07,
"logits/chosen": -1.077150821685791,
"logits/rejected": -1.1288504600524902,
"logps/chosen": -556.2161865234375,
"logps/rejected": -495.59539794921875,
"loss": 0.4981,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.12262667715549469,
"rewards/margins": 0.3966625928878784,
"rewards/rejected": -0.5192892551422119,
"step": 14
},
{
"epoch": 0.5555555555555556,
"grad_norm": 21.27092910668923,
"learning_rate": 2.5e-07,
"logits/chosen": -1.135519027709961,
"logits/rejected": -1.1375882625579834,
"logps/chosen": -600.9283447265625,
"logps/rejected": -557.6646118164062,
"loss": 0.4817,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17058339715003967,
"rewards/margins": 0.44923293590545654,
"rewards/rejected": -0.6198163628578186,
"step": 15
},
{
"epoch": 0.5925925925925926,
"grad_norm": 20.418108861546624,
"learning_rate": 2.1736845194498716e-07,
"logits/chosen": -1.065222144126892,
"logits/rejected": -1.1098757982254028,
"logps/chosen": -516.3076782226562,
"logps/rejected": -414.37371826171875,
"loss": 0.4632,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.3122747242450714,
"rewards/margins": 0.4216526746749878,
"rewards/rejected": -0.7339274287223816,
"step": 16
},
{
"epoch": 0.6296296296296297,
"grad_norm": 19.79277850701313,
"learning_rate": 1.8529523872436977e-07,
"logits/chosen": -1.017812967300415,
"logits/rejected": -1.0660429000854492,
"logps/chosen": -720.6757202148438,
"logps/rejected": -684.0345458984375,
"loss": 0.4552,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.32912153005599976,
"rewards/margins": 0.5706872940063477,
"rewards/rejected": -0.8998088240623474,
"step": 17
},
{
"epoch": 0.6666666666666666,
"grad_norm": 20.42225137733936,
"learning_rate": 1.5432914190872756e-07,
"logits/chosen": -1.0442301034927368,
"logits/rejected": -1.0349968671798706,
"logps/chosen": -747.7850952148438,
"logps/rejected": -685.4484252929688,
"loss": 0.4673,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.3863644599914551,
"rewards/margins": 0.5358256101608276,
"rewards/rejected": -0.9221900701522827,
"step": 18
},
{
"epoch": 0.7037037037037037,
"grad_norm": 22.47255209023482,
"learning_rate": 1.2500000000000005e-07,
"logits/chosen": -1.1190571784973145,
"logits/rejected": -1.1354550123214722,
"logps/chosen": -685.4273681640625,
"logps/rejected": -646.656005859375,
"loss": 0.4169,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.4319349527359009,
"rewards/margins": 0.7433109879493713,
"rewards/rejected": -1.175246000289917,
"step": 19
},
{
"epoch": 0.7407407407407407,
"grad_norm": 20.30453136903234,
"learning_rate": 9.780964274781983e-08,
"logits/chosen": -0.9758723974227905,
"logits/rejected": -0.9760102033615112,
"logps/chosen": -701.876220703125,
"logps/rejected": -603.935546875,
"loss": 0.4295,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.4580194056034088,
"rewards/margins": 0.6639784574508667,
"rewards/rejected": -1.1219978332519531,
"step": 20
},
{
"epoch": 0.7777777777777778,
"grad_norm": 19.920032346717264,
"learning_rate": 7.322330470336313e-08,
"logits/chosen": -1.0556690692901611,
"logits/rejected": -1.038096308708191,
"logps/chosen": -790.4100952148438,
"logps/rejected": -738.6947021484375,
"loss": 0.3989,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.5596134662628174,
"rewards/margins": 0.6681773662567139,
"rewards/rejected": -1.2277909517288208,
"step": 21
},
{
"epoch": 0.8148148148148148,
"grad_norm": 19.811761984641322,
"learning_rate": 5.166166492719124e-08,
"logits/chosen": -1.0022271871566772,
"logits/rejected": -0.9993726015090942,
"logps/chosen": -648.9810180664062,
"logps/rejected": -578.921142578125,
"loss": 0.3949,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.5851563215255737,
"rewards/margins": 0.786234974861145,
"rewards/rejected": -1.3713912963867188,
"step": 22
},
{
"epoch": 0.8518518518518519,
"grad_norm": 19.77551914709964,
"learning_rate": 3.349364905389032e-08,
"logits/chosen": -0.924078106880188,
"logits/rejected": -0.9447305202484131,
"logps/chosen": -727.5433959960938,
"logps/rejected": -727.569580078125,
"loss": 0.3777,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.3620569705963135,
"rewards/margins": 1.0265687704086304,
"rewards/rejected": -1.3886257410049438,
"step": 23
},
{
"epoch": 0.8888888888888888,
"grad_norm": 20.99090983878074,
"learning_rate": 1.9030116872178314e-08,
"logits/chosen": -0.9339395761489868,
"logits/rejected": -1.002177357673645,
"logps/chosen": -772.19921875,
"logps/rejected": -641.7393798828125,
"loss": 0.3809,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.4156669080257416,
"rewards/margins": 0.8427646160125732,
"rewards/rejected": -1.2584315538406372,
"step": 24
},
{
"epoch": 0.9259259259259259,
"grad_norm": 19.60455577981017,
"learning_rate": 8.518543427732949e-09,
"logits/chosen": -1.0230871438980103,
"logits/rejected": -0.9638053178787231,
"logps/chosen": -676.6509399414062,
"logps/rejected": -698.9414672851562,
"loss": 0.3781,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.5820339918136597,
"rewards/margins": 0.7440013885498047,
"rewards/rejected": -1.326035499572754,
"step": 25
},
{
"epoch": 0.9629629629629629,
"grad_norm": 20.184672646004614,
"learning_rate": 2.1387846565474044e-09,
"logits/chosen": -0.9462152123451233,
"logits/rejected": -0.9009606242179871,
"logps/chosen": -672.621337890625,
"logps/rejected": -633.5792236328125,
"loss": 0.3642,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.6688261032104492,
"rewards/margins": 0.8014542460441589,
"rewards/rejected": -1.470280408859253,
"step": 26
},
{
"epoch": 1.0,
"grad_norm": 19.209616333813695,
"learning_rate": 0.0,
"logits/chosen": -1.027219533920288,
"logits/rejected": -1.0129032135009766,
"logps/chosen": -630.52880859375,
"logps/rejected": -564.9932250976562,
"loss": 0.3937,
"rewards/accuracies": 0.9375,
"rewards/chosen": -0.5863191485404968,
"rewards/margins": 0.763579249382019,
"rewards/rejected": -1.349898338317871,
"step": 27
},
{
"epoch": 1.0,
"step": 27,
"total_flos": 0.0,
"train_loss": 0.525856320504789,
"train_runtime": 769.8946,
"train_samples_per_second": 2.244,
"train_steps_per_second": 0.035
}
],
"logging_steps": 1.0,
"max_steps": 27,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}