|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9996728819103696, |
|
"eval_steps": 100, |
|
"global_step": 382, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 8.037226671121555, |
|
"learning_rate": 1.282051282051282e-08, |
|
"logits/chosen": -2.9984583854675293, |
|
"logits/rejected": -3.047058343887329, |
|
"logps/chosen": -440.9766845703125, |
|
"logps/rejected": -253.47076416015625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 9.369508939204481, |
|
"learning_rate": 1.2820512820512818e-07, |
|
"logits/chosen": -2.851626396179199, |
|
"logits/rejected": -2.840857982635498, |
|
"logps/chosen": -292.8236999511719, |
|
"logps/rejected": -243.9152069091797, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4583333432674408, |
|
"rewards/chosen": 0.0002360683138249442, |
|
"rewards/margins": -7.22050535841845e-05, |
|
"rewards/rejected": 0.00030827338923700154, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 7.770546674426242, |
|
"learning_rate": 2.5641025641025636e-07, |
|
"logits/chosen": -2.836233139038086, |
|
"logits/rejected": -2.8243448734283447, |
|
"logps/chosen": -269.3387145996094, |
|
"logps/rejected": -226.77444458007812, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0006391379283741117, |
|
"rewards/margins": 0.0022494590375572443, |
|
"rewards/rejected": -0.0016103212255984545, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 7.555168855947224, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"logits/chosen": -2.8013336658477783, |
|
"logits/rejected": -2.8021349906921387, |
|
"logps/chosen": -273.3194274902344, |
|
"logps/rejected": -246.2598114013672, |
|
"loss": 0.6851, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.01441938616335392, |
|
"rewards/margins": 0.014810544438660145, |
|
"rewards/rejected": -0.0003911592939402908, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 8.28569270937212, |
|
"learning_rate": 4.999895137916554e-07, |
|
"logits/chosen": -2.814171552658081, |
|
"logits/rejected": -2.832319974899292, |
|
"logps/chosen": -276.9251403808594, |
|
"logps/rejected": -268.66461181640625, |
|
"loss": 0.6723, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.04223880544304848, |
|
"rewards/margins": 0.04603661596775055, |
|
"rewards/rejected": -0.0037978135515004396, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 10.06207588110713, |
|
"learning_rate": 4.987322328603843e-07, |
|
"logits/chosen": -2.771031141281128, |
|
"logits/rejected": -2.7740721702575684, |
|
"logps/chosen": -280.4728088378906, |
|
"logps/rejected": -270.3973083496094, |
|
"loss": 0.6424, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.010499795898795128, |
|
"rewards/margins": 0.13346262276172638, |
|
"rewards/rejected": -0.1229628473520279, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 11.452381967640083, |
|
"learning_rate": 4.953897892477663e-07, |
|
"logits/chosen": -2.7735002040863037, |
|
"logits/rejected": -2.7375593185424805, |
|
"logps/chosen": -275.349853515625, |
|
"logps/rejected": -285.12603759765625, |
|
"loss": 0.6166, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.11094622313976288, |
|
"rewards/margins": 0.23263141512870789, |
|
"rewards/rejected": -0.34357762336730957, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 12.969857186189097, |
|
"learning_rate": 4.899902032011388e-07, |
|
"logits/chosen": -2.6529109477996826, |
|
"logits/rejected": -2.628537893295288, |
|
"logps/chosen": -302.51910400390625, |
|
"logps/rejected": -317.7935791015625, |
|
"loss": 0.5948, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.10311013460159302, |
|
"rewards/margins": 0.31482020020484924, |
|
"rewards/rejected": -0.4179303050041199, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 19.524374589693302, |
|
"learning_rate": 4.825787403267712e-07, |
|
"logits/chosen": -2.5461225509643555, |
|
"logits/rejected": -2.522418737411499, |
|
"logps/chosen": -298.61798095703125, |
|
"logps/rejected": -317.22503662109375, |
|
"loss": 0.5781, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.3027142286300659, |
|
"rewards/margins": 0.3840213418006897, |
|
"rewards/rejected": -0.6867355108261108, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 14.061479301034291, |
|
"learning_rate": 4.732175321209399e-07, |
|
"logits/chosen": -2.397352933883667, |
|
"logits/rejected": -2.424654722213745, |
|
"logps/chosen": -268.97369384765625, |
|
"logps/rejected": -313.6884460449219, |
|
"loss": 0.5836, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.3002493977546692, |
|
"rewards/margins": 0.41001391410827637, |
|
"rewards/rejected": -0.7102633118629456, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 17.956912133781355, |
|
"learning_rate": 4.619850551115105e-07, |
|
"logits/chosen": -2.1157875061035156, |
|
"logits/rejected": -2.0495009422302246, |
|
"logps/chosen": -304.60968017578125, |
|
"logps/rejected": -369.248779296875, |
|
"loss": 0.5474, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.5611704587936401, |
|
"rewards/margins": 0.6696933507919312, |
|
"rewards/rejected": -1.2308636903762817, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.308899164199829, |
|
"eval_logits/rejected": -2.2413949966430664, |
|
"eval_logps/chosen": -365.31329345703125, |
|
"eval_logps/rejected": -384.8892822265625, |
|
"eval_loss": 0.5624693632125854, |
|
"eval_rewards/accuracies": 0.6850000023841858, |
|
"eval_rewards/chosen": -0.7543885707855225, |
|
"eval_rewards/margins": 0.41293570399284363, |
|
"eval_rewards/rejected": -1.167324423789978, |
|
"eval_runtime": 693.8722, |
|
"eval_samples_per_second": 2.882, |
|
"eval_steps_per_second": 0.144, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 26.962688754701436, |
|
"learning_rate": 4.4897547297646633e-07, |
|
"logits/chosen": -1.9299335479736328, |
|
"logits/rejected": -1.8247636556625366, |
|
"logps/chosen": -396.6432189941406, |
|
"logps/rejected": -426.43658447265625, |
|
"loss": 0.5435, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.9001098871231079, |
|
"rewards/margins": 0.6095530390739441, |
|
"rewards/rejected": -1.5096628665924072, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 24.58898873320785, |
|
"learning_rate": 4.3429784715451177e-07, |
|
"logits/chosen": -1.8942654132843018, |
|
"logits/rejected": -1.5966886281967163, |
|
"logps/chosen": -327.930419921875, |
|
"logps/rejected": -381.9439697265625, |
|
"loss": 0.5262, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.6237712502479553, |
|
"rewards/margins": 0.8189311027526855, |
|
"rewards/rejected": -1.442702293395996, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 22.502297297962723, |
|
"learning_rate": 4.180752225653292e-07, |
|
"logits/chosen": -1.746715784072876, |
|
"logits/rejected": -1.6937605142593384, |
|
"logps/chosen": -372.397705078125, |
|
"logps/rejected": -407.4283142089844, |
|
"loss": 0.5297, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8563397526741028, |
|
"rewards/margins": 0.5461622476577759, |
|
"rewards/rejected": -1.4025019407272339, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 30.209865465712408, |
|
"learning_rate": 4.00443596104045e-07, |
|
"logits/chosen": -1.5750809907913208, |
|
"logits/rejected": -1.1808230876922607, |
|
"logps/chosen": -404.4322509765625, |
|
"logps/rejected": -471.23883056640625, |
|
"loss": 0.5301, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.0831854343414307, |
|
"rewards/margins": 0.7117680907249451, |
|
"rewards/rejected": -1.79495370388031, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 24.47710068833958, |
|
"learning_rate": 3.815507765571832e-07, |
|
"logits/chosen": -1.8257108926773071, |
|
"logits/rejected": -1.4285178184509277, |
|
"logps/chosen": -398.15509033203125, |
|
"logps/rejected": -433.53375244140625, |
|
"loss": 0.5141, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.8729391098022461, |
|
"rewards/margins": 0.7921355366706848, |
|
"rewards/rejected": -1.6650745868682861, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 21.92961027984523, |
|
"learning_rate": 3.615551454976164e-07, |
|
"logits/chosen": -1.5830988883972168, |
|
"logits/rejected": -0.9580858945846558, |
|
"logps/chosen": -407.8912048339844, |
|
"logps/rejected": -474.907958984375, |
|
"loss": 0.5206, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.9090505838394165, |
|
"rewards/margins": 0.8707038164138794, |
|
"rewards/rejected": -1.779754400253296, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 20.612672549801676, |
|
"learning_rate": 3.406243295461325e-07, |
|
"logits/chosen": -1.4334670305252075, |
|
"logits/rejected": -1.1878687143325806, |
|
"logps/chosen": -390.4436340332031, |
|
"logps/rejected": -457.1796875, |
|
"loss": 0.5124, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.7781120538711548, |
|
"rewards/margins": 0.8819589614868164, |
|
"rewards/rejected": -1.6600710153579712, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 29.50134057519578, |
|
"learning_rate": 3.189337951302639e-07, |
|
"logits/chosen": -1.2944433689117432, |
|
"logits/rejected": -0.9970762133598328, |
|
"logps/chosen": -345.94415283203125, |
|
"logps/rejected": -390.71673583984375, |
|
"loss": 0.514, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.8080419301986694, |
|
"rewards/margins": 0.8096262216567993, |
|
"rewards/rejected": -1.6176681518554688, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 22.184389019959962, |
|
"learning_rate": 2.966653775207444e-07, |
|
"logits/chosen": -1.5276657342910767, |
|
"logits/rejected": -1.1025655269622803, |
|
"logps/chosen": -404.1734924316406, |
|
"logps/rejected": -435.48089599609375, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.9301355481147766, |
|
"rewards/margins": 0.8291870355606079, |
|
"rewards/rejected": -1.7593225240707397, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 26.733564014369115, |
|
"learning_rate": 2.7400575647692043e-07, |
|
"logits/chosen": -1.406765341758728, |
|
"logits/rejected": -1.11467444896698, |
|
"logps/chosen": -400.4964904785156, |
|
"logps/rejected": -441.97998046875, |
|
"loss": 0.5291, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.223042607307434, |
|
"rewards/margins": 0.6574328541755676, |
|
"rewards/rejected": -1.880475401878357, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -1.5008797645568848, |
|
"eval_logits/rejected": -1.159541130065918, |
|
"eval_logps/chosen": -410.1354675292969, |
|
"eval_logps/rejected": -465.21136474609375, |
|
"eval_loss": 0.5176023244857788, |
|
"eval_rewards/accuracies": 0.75, |
|
"eval_rewards/chosen": -1.2026103734970093, |
|
"eval_rewards/margins": 0.7679347395896912, |
|
"eval_rewards/rejected": -1.9705451726913452, |
|
"eval_runtime": 694.1376, |
|
"eval_samples_per_second": 2.881, |
|
"eval_steps_per_second": 0.144, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 22.51552046396425, |
|
"learning_rate": 2.511448912800265e-07, |
|
"logits/chosen": -1.538048505783081, |
|
"logits/rejected": -1.2279856204986572, |
|
"logps/chosen": -392.5513610839844, |
|
"logps/rejected": -486.5321350097656, |
|
"loss": 0.4967, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1226943731307983, |
|
"rewards/margins": 0.8473464250564575, |
|
"rewards/rejected": -1.9700406789779663, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 23.041728264521645, |
|
"learning_rate": 2.2827442827369772e-07, |
|
"logits/chosen": -1.632845163345337, |
|
"logits/rejected": -1.380772352218628, |
|
"logps/chosen": -346.6684875488281, |
|
"logps/rejected": -421.88739013671875, |
|
"loss": 0.5141, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.9023996591567993, |
|
"rewards/margins": 0.7315413951873779, |
|
"rewards/rejected": -1.6339410543441772, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 25.751992215511777, |
|
"learning_rate": 2.0558609426156343e-07, |
|
"logits/chosen": -1.5758168697357178, |
|
"logits/rejected": -1.392135500907898, |
|
"logps/chosen": -378.9245300292969, |
|
"logps/rejected": -444.28936767578125, |
|
"loss": 0.5208, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1253713369369507, |
|
"rewards/margins": 0.6441680192947388, |
|
"rewards/rejected": -1.7695392370224, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 23.086940301819613, |
|
"learning_rate": 1.8327008923033222e-07, |
|
"logits/chosen": -1.6344903707504272, |
|
"logits/rejected": -1.4062269926071167, |
|
"logps/chosen": -433.552001953125, |
|
"logps/rejected": -481.857421875, |
|
"loss": 0.5213, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.1788504123687744, |
|
"rewards/margins": 0.7093037962913513, |
|
"rewards/rejected": -1.8881542682647705, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 22.246705559005484, |
|
"learning_rate": 1.6151349187243063e-07, |
|
"logits/chosen": -1.5954548120498657, |
|
"logits/rejected": -1.1250216960906982, |
|
"logps/chosen": -444.52001953125, |
|
"logps/rejected": -509.41302490234375, |
|
"loss": 0.4941, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.2779477834701538, |
|
"rewards/margins": 0.9330456852912903, |
|
"rewards/rejected": -2.210993528366089, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 25.840093284837668, |
|
"learning_rate": 1.4049869127495884e-07, |
|
"logits/chosen": -1.4928535223007202, |
|
"logits/rejected": -0.7584124803543091, |
|
"logps/chosen": -401.40374755859375, |
|
"logps/rejected": -504.8365173339844, |
|
"loss": 0.4984, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.2281973361968994, |
|
"rewards/margins": 1.0200105905532837, |
|
"rewards/rejected": -2.2482082843780518, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 23.793534917882216, |
|
"learning_rate": 1.2040185792236873e-07, |
|
"logits/chosen": -1.413051962852478, |
|
"logits/rejected": -0.8309895396232605, |
|
"logps/chosen": -416.62255859375, |
|
"logps/rejected": -475.1249084472656, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.3103489875793457, |
|
"rewards/margins": 0.874372124671936, |
|
"rewards/rejected": -2.1847212314605713, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 27.621259089208138, |
|
"learning_rate": 1.0139146683069728e-07, |
|
"logits/chosen": -1.5253907442092896, |
|
"logits/rejected": -1.0778144598007202, |
|
"logps/chosen": -385.1762390136719, |
|
"logps/rejected": -462.73834228515625, |
|
"loss": 0.5159, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.0836073160171509, |
|
"rewards/margins": 0.8540776371955872, |
|
"rewards/rejected": -1.9376850128173828, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 23.12152900338625, |
|
"learning_rate": 8.362688519416084e-08, |
|
"logits/chosen": -1.374618649482727, |
|
"logits/rejected": -1.0819616317749023, |
|
"logps/chosen": -367.61090087890625, |
|
"logps/rejected": -503.74835205078125, |
|
"loss": 0.4882, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -1.0010521411895752, |
|
"rewards/margins": 1.0948959589004517, |
|
"rewards/rejected": -2.0959479808807373, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 19.50201171164751, |
|
"learning_rate": 6.725703638409861e-08, |
|
"logits/chosen": -1.3707311153411865, |
|
"logits/rejected": -1.0541932582855225, |
|
"logps/chosen": -394.03924560546875, |
|
"logps/rejected": -464.50640869140625, |
|
"loss": 0.4891, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1756898164749146, |
|
"rewards/margins": 0.8380070924758911, |
|
"rewards/rejected": -2.0136971473693848, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_logits/chosen": -1.4685231447219849, |
|
"eval_logits/rejected": -0.9395467638969421, |
|
"eval_logps/chosen": -402.9643859863281, |
|
"eval_logps/rejected": -464.9228515625, |
|
"eval_loss": 0.5042179822921753, |
|
"eval_rewards/accuracies": 0.7549999952316284, |
|
"eval_rewards/chosen": -1.1308995485305786, |
|
"eval_rewards/margins": 0.8367605805397034, |
|
"eval_rewards/rejected": -1.9676600694656372, |
|
"eval_runtime": 693.8368, |
|
"eval_samples_per_second": 2.883, |
|
"eval_steps_per_second": 0.144, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 22.978115464291324, |
|
"learning_rate": 5.2419151500177395e-08, |
|
"logits/chosen": -1.3883640766143799, |
|
"logits/rejected": -0.8084322214126587, |
|
"logps/chosen": -390.07672119140625, |
|
"logps/rejected": -441.5680236816406, |
|
"loss": 0.5031, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.1122825145721436, |
|
"rewards/margins": 0.8595865964889526, |
|
"rewards/rejected": -1.971868872642517, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 22.683444984114566, |
|
"learning_rate": 3.9237618939808646e-08, |
|
"logits/chosen": -1.3592180013656616, |
|
"logits/rejected": -0.9010286331176758, |
|
"logps/chosen": -427.95184326171875, |
|
"logps/rejected": -494.72137451171875, |
|
"loss": 0.5004, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.145861029624939, |
|
"rewards/margins": 0.888739287853241, |
|
"rewards/rejected": -2.0346004962921143, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 22.356295615100926, |
|
"learning_rate": 2.7822941630022605e-08, |
|
"logits/chosen": -1.4484529495239258, |
|
"logits/rejected": -1.012138843536377, |
|
"logps/chosen": -386.97357177734375, |
|
"logps/rejected": -459.3680725097656, |
|
"loss": 0.4987, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.110649585723877, |
|
"rewards/margins": 0.8449923396110535, |
|
"rewards/rejected": -1.9556419849395752, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 21.4095540436173, |
|
"learning_rate": 1.827081066349459e-08, |
|
"logits/chosen": -1.372106671333313, |
|
"logits/rejected": -1.1249427795410156, |
|
"logps/chosen": -384.5665283203125, |
|
"logps/rejected": -453.394287109375, |
|
"loss": 0.5067, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.1582924127578735, |
|
"rewards/margins": 0.6956581473350525, |
|
"rewards/rejected": -1.8539505004882812, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 20.33149833976292, |
|
"learning_rate": 1.0661303104582881e-08, |
|
"logits/chosen": -1.3133699893951416, |
|
"logits/rejected": -0.8347970247268677, |
|
"logps/chosen": -401.160888671875, |
|
"logps/rejected": -450.82843017578125, |
|
"loss": 0.486, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.1163744926452637, |
|
"rewards/margins": 0.9114343523979187, |
|
"rewards/rejected": -2.027808666229248, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 27.107299279864698, |
|
"learning_rate": 5.058210690300463e-09, |
|
"logits/chosen": -1.40584397315979, |
|
"logits/rejected": -1.0146383047103882, |
|
"logps/chosen": -394.41607666015625, |
|
"logps/rejected": -467.156982421875, |
|
"loss": 0.5043, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.0669889450073242, |
|
"rewards/margins": 0.8511545062065125, |
|
"rewards/rejected": -1.9181435108184814, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 23.223735379023285, |
|
"learning_rate": 1.5085050538290456e-09, |
|
"logits/chosen": -1.3387047052383423, |
|
"logits/rejected": -1.1372339725494385, |
|
"logps/chosen": -393.8738708496094, |
|
"logps/rejected": -456.54046630859375, |
|
"loss": 0.5213, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.13485586643219, |
|
"rewards/margins": 0.6930190324783325, |
|
"rewards/rejected": -1.827874779701233, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 22.565920583773593, |
|
"learning_rate": 4.19439536940569e-11, |
|
"logits/chosen": -1.2898640632629395, |
|
"logits/rejected": -0.9960281252861023, |
|
"logps/chosen": -395.0779724121094, |
|
"logps/rejected": -449.18646240234375, |
|
"loss": 0.4955, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.2446048259735107, |
|
"rewards/margins": 0.7065111398696899, |
|
"rewards/rejected": -1.9511159658432007, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 382, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5410362510156881, |
|
"train_runtime": 45025.8645, |
|
"train_samples_per_second": 1.358, |
|
"train_steps_per_second": 0.008 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 382, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|