zephyr-7b-dpo-full / trainer_state.json
weqweasdas's picture
Model save
15d5a92 verified
raw
history blame
22.6 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9996728819103696,
"eval_steps": 100,
"global_step": 382,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 8.037226671121555,
"learning_rate": 1.282051282051282e-08,
"logits/chosen": -2.9984583854675293,
"logits/rejected": -3.047058343887329,
"logps/chosen": -440.9766845703125,
"logps/rejected": -253.47076416015625,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"grad_norm": 9.369508939204481,
"learning_rate": 1.2820512820512818e-07,
"logits/chosen": -2.851626396179199,
"logits/rejected": -2.840857982635498,
"logps/chosen": -292.8236999511719,
"logps/rejected": -243.9152069091797,
"loss": 0.6931,
"rewards/accuracies": 0.4583333432674408,
"rewards/chosen": 0.0002360683138249442,
"rewards/margins": -7.22050535841845e-05,
"rewards/rejected": 0.00030827338923700154,
"step": 10
},
{
"epoch": 0.05,
"grad_norm": 7.770546674426242,
"learning_rate": 2.5641025641025636e-07,
"logits/chosen": -2.836233139038086,
"logits/rejected": -2.8243448734283447,
"logps/chosen": -269.3387145996094,
"logps/rejected": -226.77444458007812,
"loss": 0.6919,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.0006391379283741117,
"rewards/margins": 0.0022494590375572443,
"rewards/rejected": -0.0016103212255984545,
"step": 20
},
{
"epoch": 0.08,
"grad_norm": 7.555168855947224,
"learning_rate": 3.8461538461538463e-07,
"logits/chosen": -2.8013336658477783,
"logits/rejected": -2.8021349906921387,
"logps/chosen": -273.3194274902344,
"logps/rejected": -246.2598114013672,
"loss": 0.6851,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.01441938616335392,
"rewards/margins": 0.014810544438660145,
"rewards/rejected": -0.0003911592939402908,
"step": 30
},
{
"epoch": 0.1,
"grad_norm": 8.28569270937212,
"learning_rate": 4.999895137916554e-07,
"logits/chosen": -2.814171552658081,
"logits/rejected": -2.832319974899292,
"logps/chosen": -276.9251403808594,
"logps/rejected": -268.66461181640625,
"loss": 0.6723,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.04223880544304848,
"rewards/margins": 0.04603661596775055,
"rewards/rejected": -0.0037978135515004396,
"step": 40
},
{
"epoch": 0.13,
"grad_norm": 10.06207588110713,
"learning_rate": 4.987322328603843e-07,
"logits/chosen": -2.771031141281128,
"logits/rejected": -2.7740721702575684,
"logps/chosen": -280.4728088378906,
"logps/rejected": -270.3973083496094,
"loss": 0.6424,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.010499795898795128,
"rewards/margins": 0.13346262276172638,
"rewards/rejected": -0.1229628473520279,
"step": 50
},
{
"epoch": 0.16,
"grad_norm": 11.452381967640083,
"learning_rate": 4.953897892477663e-07,
"logits/chosen": -2.7735002040863037,
"logits/rejected": -2.7375593185424805,
"logps/chosen": -275.349853515625,
"logps/rejected": -285.12603759765625,
"loss": 0.6166,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.11094622313976288,
"rewards/margins": 0.23263141512870789,
"rewards/rejected": -0.34357762336730957,
"step": 60
},
{
"epoch": 0.18,
"grad_norm": 12.969857186189097,
"learning_rate": 4.899902032011388e-07,
"logits/chosen": -2.6529109477996826,
"logits/rejected": -2.628537893295288,
"logps/chosen": -302.51910400390625,
"logps/rejected": -317.7935791015625,
"loss": 0.5948,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.10311013460159302,
"rewards/margins": 0.31482020020484924,
"rewards/rejected": -0.4179303050041199,
"step": 70
},
{
"epoch": 0.21,
"grad_norm": 19.524374589693302,
"learning_rate": 4.825787403267712e-07,
"logits/chosen": -2.5461225509643555,
"logits/rejected": -2.522418737411499,
"logps/chosen": -298.61798095703125,
"logps/rejected": -317.22503662109375,
"loss": 0.5781,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.3027142286300659,
"rewards/margins": 0.3840213418006897,
"rewards/rejected": -0.6867355108261108,
"step": 80
},
{
"epoch": 0.24,
"grad_norm": 14.061479301034291,
"learning_rate": 4.732175321209399e-07,
"logits/chosen": -2.397352933883667,
"logits/rejected": -2.424654722213745,
"logps/chosen": -268.97369384765625,
"logps/rejected": -313.6884460449219,
"loss": 0.5836,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.3002493977546692,
"rewards/margins": 0.41001391410827637,
"rewards/rejected": -0.7102633118629456,
"step": 90
},
{
"epoch": 0.26,
"grad_norm": 17.956912133781355,
"learning_rate": 4.619850551115105e-07,
"logits/chosen": -2.1157875061035156,
"logits/rejected": -2.0495009422302246,
"logps/chosen": -304.60968017578125,
"logps/rejected": -369.248779296875,
"loss": 0.5474,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.5611704587936401,
"rewards/margins": 0.6696933507919312,
"rewards/rejected": -1.2308636903762817,
"step": 100
},
{
"epoch": 0.26,
"eval_logits/chosen": -2.308899164199829,
"eval_logits/rejected": -2.2413949966430664,
"eval_logps/chosen": -365.31329345703125,
"eval_logps/rejected": -384.8892822265625,
"eval_loss": 0.5624693632125854,
"eval_rewards/accuracies": 0.6850000023841858,
"eval_rewards/chosen": -0.7543885707855225,
"eval_rewards/margins": 0.41293570399284363,
"eval_rewards/rejected": -1.167324423789978,
"eval_runtime": 693.8722,
"eval_samples_per_second": 2.882,
"eval_steps_per_second": 0.144,
"step": 100
},
{
"epoch": 0.29,
"grad_norm": 26.962688754701436,
"learning_rate": 4.4897547297646633e-07,
"logits/chosen": -1.9299335479736328,
"logits/rejected": -1.8247636556625366,
"logps/chosen": -396.6432189941406,
"logps/rejected": -426.43658447265625,
"loss": 0.5435,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.9001098871231079,
"rewards/margins": 0.6095530390739441,
"rewards/rejected": -1.5096628665924072,
"step": 110
},
{
"epoch": 0.31,
"grad_norm": 24.58898873320785,
"learning_rate": 4.3429784715451177e-07,
"logits/chosen": -1.8942654132843018,
"logits/rejected": -1.5966886281967163,
"logps/chosen": -327.930419921875,
"logps/rejected": -381.9439697265625,
"loss": 0.5262,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.6237712502479553,
"rewards/margins": 0.8189311027526855,
"rewards/rejected": -1.442702293395996,
"step": 120
},
{
"epoch": 0.34,
"grad_norm": 22.502297297962723,
"learning_rate": 4.180752225653292e-07,
"logits/chosen": -1.746715784072876,
"logits/rejected": -1.6937605142593384,
"logps/chosen": -372.397705078125,
"logps/rejected": -407.4283142089844,
"loss": 0.5297,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.8563397526741028,
"rewards/margins": 0.5461622476577759,
"rewards/rejected": -1.4025019407272339,
"step": 130
},
{
"epoch": 0.37,
"grad_norm": 30.209865465712408,
"learning_rate": 4.00443596104045e-07,
"logits/chosen": -1.5750809907913208,
"logits/rejected": -1.1808230876922607,
"logps/chosen": -404.4322509765625,
"logps/rejected": -471.23883056640625,
"loss": 0.5301,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -1.0831854343414307,
"rewards/margins": 0.7117680907249451,
"rewards/rejected": -1.79495370388031,
"step": 140
},
{
"epoch": 0.39,
"grad_norm": 24.47710068833958,
"learning_rate": 3.815507765571832e-07,
"logits/chosen": -1.8257108926773071,
"logits/rejected": -1.4285178184509277,
"logps/chosen": -398.15509033203125,
"logps/rejected": -433.53375244140625,
"loss": 0.5141,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.8729391098022461,
"rewards/margins": 0.7921355366706848,
"rewards/rejected": -1.6650745868682861,
"step": 150
},
{
"epoch": 0.42,
"grad_norm": 21.92961027984523,
"learning_rate": 3.615551454976164e-07,
"logits/chosen": -1.5830988883972168,
"logits/rejected": -0.9580858945846558,
"logps/chosen": -407.8912048339844,
"logps/rejected": -474.907958984375,
"loss": 0.5206,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.9090505838394165,
"rewards/margins": 0.8707038164138794,
"rewards/rejected": -1.779754400253296,
"step": 160
},
{
"epoch": 0.44,
"grad_norm": 20.612672549801676,
"learning_rate": 3.406243295461325e-07,
"logits/chosen": -1.4334670305252075,
"logits/rejected": -1.1878687143325806,
"logps/chosen": -390.4436340332031,
"logps/rejected": -457.1796875,
"loss": 0.5124,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.7781120538711548,
"rewards/margins": 0.8819589614868164,
"rewards/rejected": -1.6600710153579712,
"step": 170
},
{
"epoch": 0.47,
"grad_norm": 29.50134057519578,
"learning_rate": 3.189337951302639e-07,
"logits/chosen": -1.2944433689117432,
"logits/rejected": -0.9970762133598328,
"logps/chosen": -345.94415283203125,
"logps/rejected": -390.71673583984375,
"loss": 0.514,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -0.8080419301986694,
"rewards/margins": 0.8096262216567993,
"rewards/rejected": -1.6176681518554688,
"step": 180
},
{
"epoch": 0.5,
"grad_norm": 22.184389019959962,
"learning_rate": 2.966653775207444e-07,
"logits/chosen": -1.5276657342910767,
"logits/rejected": -1.1025655269622803,
"logps/chosen": -404.1734924316406,
"logps/rejected": -435.48089599609375,
"loss": 0.4989,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.9301355481147766,
"rewards/margins": 0.8291870355606079,
"rewards/rejected": -1.7593225240707397,
"step": 190
},
{
"epoch": 0.52,
"grad_norm": 26.733564014369115,
"learning_rate": 2.7400575647692043e-07,
"logits/chosen": -1.406765341758728,
"logits/rejected": -1.11467444896698,
"logps/chosen": -400.4964904785156,
"logps/rejected": -441.97998046875,
"loss": 0.5291,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -1.223042607307434,
"rewards/margins": 0.6574328541755676,
"rewards/rejected": -1.880475401878357,
"step": 200
},
{
"epoch": 0.52,
"eval_logits/chosen": -1.5008797645568848,
"eval_logits/rejected": -1.159541130065918,
"eval_logps/chosen": -410.1354675292969,
"eval_logps/rejected": -465.21136474609375,
"eval_loss": 0.5176023244857788,
"eval_rewards/accuracies": 0.75,
"eval_rewards/chosen": -1.2026103734970093,
"eval_rewards/margins": 0.7679347395896912,
"eval_rewards/rejected": -1.9705451726913452,
"eval_runtime": 694.1376,
"eval_samples_per_second": 2.881,
"eval_steps_per_second": 0.144,
"step": 200
},
{
"epoch": 0.55,
"grad_norm": 22.51552046396425,
"learning_rate": 2.511448912800265e-07,
"logits/chosen": -1.538048505783081,
"logits/rejected": -1.2279856204986572,
"logps/chosen": -392.5513610839844,
"logps/rejected": -486.5321350097656,
"loss": 0.4967,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -1.1226943731307983,
"rewards/margins": 0.8473464250564575,
"rewards/rejected": -1.9700406789779663,
"step": 210
},
{
"epoch": 0.58,
"grad_norm": 23.041728264521645,
"learning_rate": 2.2827442827369772e-07,
"logits/chosen": -1.632845163345337,
"logits/rejected": -1.380772352218628,
"logps/chosen": -346.6684875488281,
"logps/rejected": -421.88739013671875,
"loss": 0.5141,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.9023996591567993,
"rewards/margins": 0.7315413951873779,
"rewards/rejected": -1.6339410543441772,
"step": 220
},
{
"epoch": 0.6,
"grad_norm": 25.751992215511777,
"learning_rate": 2.0558609426156343e-07,
"logits/chosen": -1.5758168697357178,
"logits/rejected": -1.392135500907898,
"logps/chosen": -378.9245300292969,
"logps/rejected": -444.28936767578125,
"loss": 0.5208,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -1.1253713369369507,
"rewards/margins": 0.6441680192947388,
"rewards/rejected": -1.7695392370224,
"step": 230
},
{
"epoch": 0.63,
"grad_norm": 23.086940301819613,
"learning_rate": 1.8327008923033222e-07,
"logits/chosen": -1.6344903707504272,
"logits/rejected": -1.4062269926071167,
"logps/chosen": -433.552001953125,
"logps/rejected": -481.857421875,
"loss": 0.5213,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -1.1788504123687744,
"rewards/margins": 0.7093037962913513,
"rewards/rejected": -1.8881542682647705,
"step": 240
},
{
"epoch": 0.65,
"grad_norm": 22.246705559005484,
"learning_rate": 1.6151349187243063e-07,
"logits/chosen": -1.5954548120498657,
"logits/rejected": -1.1250216960906982,
"logps/chosen": -444.52001953125,
"logps/rejected": -509.41302490234375,
"loss": 0.4941,
"rewards/accuracies": 0.8125,
"rewards/chosen": -1.2779477834701538,
"rewards/margins": 0.9330456852912903,
"rewards/rejected": -2.210993528366089,
"step": 250
},
{
"epoch": 0.68,
"grad_norm": 25.840093284837668,
"learning_rate": 1.4049869127495884e-07,
"logits/chosen": -1.4928535223007202,
"logits/rejected": -0.7584124803543091,
"logps/chosen": -401.40374755859375,
"logps/rejected": -504.8365173339844,
"loss": 0.4984,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -1.2281973361968994,
"rewards/margins": 1.0200105905532837,
"rewards/rejected": -2.2482082843780518,
"step": 260
},
{
"epoch": 0.71,
"grad_norm": 23.793534917882216,
"learning_rate": 1.2040185792236873e-07,
"logits/chosen": -1.413051962852478,
"logits/rejected": -0.8309895396232605,
"logps/chosen": -416.62255859375,
"logps/rejected": -475.1249084472656,
"loss": 0.4978,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -1.3103489875793457,
"rewards/margins": 0.874372124671936,
"rewards/rejected": -2.1847212314605713,
"step": 270
},
{
"epoch": 0.73,
"grad_norm": 27.621259089208138,
"learning_rate": 1.0139146683069728e-07,
"logits/chosen": -1.5253907442092896,
"logits/rejected": -1.0778144598007202,
"logps/chosen": -385.1762390136719,
"logps/rejected": -462.73834228515625,
"loss": 0.5159,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -1.0836073160171509,
"rewards/margins": 0.8540776371955872,
"rewards/rejected": -1.9376850128173828,
"step": 280
},
{
"epoch": 0.76,
"grad_norm": 23.12152900338625,
"learning_rate": 8.362688519416084e-08,
"logits/chosen": -1.374618649482727,
"logits/rejected": -1.0819616317749023,
"logps/chosen": -367.61090087890625,
"logps/rejected": -503.74835205078125,
"loss": 0.4882,
"rewards/accuracies": 0.8125,
"rewards/chosen": -1.0010521411895752,
"rewards/margins": 1.0948959589004517,
"rewards/rejected": -2.0959479808807373,
"step": 290
},
{
"epoch": 0.79,
"grad_norm": 19.50201171164751,
"learning_rate": 6.725703638409861e-08,
"logits/chosen": -1.3707311153411865,
"logits/rejected": -1.0541932582855225,
"logps/chosen": -394.03924560546875,
"logps/rejected": -464.50640869140625,
"loss": 0.4891,
"rewards/accuracies": 0.75,
"rewards/chosen": -1.1756898164749146,
"rewards/margins": 0.8380070924758911,
"rewards/rejected": -2.0136971473693848,
"step": 300
},
{
"epoch": 0.79,
"eval_logits/chosen": -1.4685231447219849,
"eval_logits/rejected": -0.9395467638969421,
"eval_logps/chosen": -402.9643859863281,
"eval_logps/rejected": -464.9228515625,
"eval_loss": 0.5042179822921753,
"eval_rewards/accuracies": 0.7549999952316284,
"eval_rewards/chosen": -1.1308995485305786,
"eval_rewards/margins": 0.8367605805397034,
"eval_rewards/rejected": -1.9676600694656372,
"eval_runtime": 693.8368,
"eval_samples_per_second": 2.883,
"eval_steps_per_second": 0.144,
"step": 300
},
{
"epoch": 0.81,
"grad_norm": 22.978115464291324,
"learning_rate": 5.2419151500177395e-08,
"logits/chosen": -1.3883640766143799,
"logits/rejected": -0.8084322214126587,
"logps/chosen": -390.07672119140625,
"logps/rejected": -441.5680236816406,
"loss": 0.5031,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -1.1122825145721436,
"rewards/margins": 0.8595865964889526,
"rewards/rejected": -1.971868872642517,
"step": 310
},
{
"epoch": 0.84,
"grad_norm": 22.683444984114566,
"learning_rate": 3.9237618939808646e-08,
"logits/chosen": -1.3592180013656616,
"logits/rejected": -0.9010286331176758,
"logps/chosen": -427.95184326171875,
"logps/rejected": -494.72137451171875,
"loss": 0.5004,
"rewards/accuracies": 0.768750011920929,
"rewards/chosen": -1.145861029624939,
"rewards/margins": 0.888739287853241,
"rewards/rejected": -2.0346004962921143,
"step": 320
},
{
"epoch": 0.86,
"grad_norm": 22.356295615100926,
"learning_rate": 2.7822941630022605e-08,
"logits/chosen": -1.4484529495239258,
"logits/rejected": -1.012138843536377,
"logps/chosen": -386.97357177734375,
"logps/rejected": -459.3680725097656,
"loss": 0.4987,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -1.110649585723877,
"rewards/margins": 0.8449923396110535,
"rewards/rejected": -1.9556419849395752,
"step": 330
},
{
"epoch": 0.89,
"grad_norm": 21.4095540436173,
"learning_rate": 1.827081066349459e-08,
"logits/chosen": -1.372106671333313,
"logits/rejected": -1.1249427795410156,
"logps/chosen": -384.5665283203125,
"logps/rejected": -453.394287109375,
"loss": 0.5067,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -1.1582924127578735,
"rewards/margins": 0.6956581473350525,
"rewards/rejected": -1.8539505004882812,
"step": 340
},
{
"epoch": 0.92,
"grad_norm": 20.33149833976292,
"learning_rate": 1.0661303104582881e-08,
"logits/chosen": -1.3133699893951416,
"logits/rejected": -0.8347970247268677,
"logps/chosen": -401.160888671875,
"logps/rejected": -450.82843017578125,
"loss": 0.486,
"rewards/accuracies": 0.7562500238418579,
"rewards/chosen": -1.1163744926452637,
"rewards/margins": 0.9114343523979187,
"rewards/rejected": -2.027808666229248,
"step": 350
},
{
"epoch": 0.94,
"grad_norm": 27.107299279864698,
"learning_rate": 5.058210690300463e-09,
"logits/chosen": -1.40584397315979,
"logits/rejected": -1.0146383047103882,
"logps/chosen": -394.41607666015625,
"logps/rejected": -467.156982421875,
"loss": 0.5043,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -1.0669889450073242,
"rewards/margins": 0.8511545062065125,
"rewards/rejected": -1.9181435108184814,
"step": 360
},
{
"epoch": 0.97,
"grad_norm": 23.223735379023285,
"learning_rate": 1.5085050538290456e-09,
"logits/chosen": -1.3387047052383423,
"logits/rejected": -1.1372339725494385,
"logps/chosen": -393.8738708496094,
"logps/rejected": -456.54046630859375,
"loss": 0.5213,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -1.13485586643219,
"rewards/margins": 0.6930190324783325,
"rewards/rejected": -1.827874779701233,
"step": 370
},
{
"epoch": 0.99,
"grad_norm": 22.565920583773593,
"learning_rate": 4.19439536940569e-11,
"logits/chosen": -1.2898640632629395,
"logits/rejected": -0.9960281252861023,
"logps/chosen": -395.0779724121094,
"logps/rejected": -449.18646240234375,
"loss": 0.4955,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -1.2446048259735107,
"rewards/margins": 0.7065111398696899,
"rewards/rejected": -1.9511159658432007,
"step": 380
},
{
"epoch": 1.0,
"step": 382,
"total_flos": 0.0,
"train_loss": 0.5410362510156881,
"train_runtime": 45025.8645,
"train_samples_per_second": 1.358,
"train_steps_per_second": 0.008
}
],
"logging_steps": 10,
"max_steps": 382,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}