|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.994495412844037, |
|
"eval_steps": 500, |
|
"global_step": 408, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.014678899082568808, |
|
"grad_norm": 0.19690614938735962, |
|
"learning_rate": 2.439024390243903e-07, |
|
"logits/chosen": -0.9879676103591919, |
|
"logits/rejected": -1.9993298053741455, |
|
"logps/chosen": -269.27239990234375, |
|
"logps/rejected": -186.47621154785156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.029357798165137616, |
|
"grad_norm": 0.19419735670089722, |
|
"learning_rate": 4.878048780487805e-07, |
|
"logits/chosen": -1.0356377363204956, |
|
"logits/rejected": -1.989586591720581, |
|
"logps/chosen": -290.71826171875, |
|
"logps/rejected": -204.54940795898438, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0007679759874008596, |
|
"rewards/margins": 0.0017074686475098133, |
|
"rewards/rejected": -0.0009394925436936319, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.044036697247706424, |
|
"grad_norm": 0.2066972851753235, |
|
"learning_rate": 7.317073170731707e-07, |
|
"logits/chosen": -1.116999864578247, |
|
"logits/rejected": -2.1228256225585938, |
|
"logps/chosen": -295.96392822265625, |
|
"logps/rejected": -203.14102172851562, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.0002199566224589944, |
|
"rewards/margins": 0.0003147660754621029, |
|
"rewards/rejected": -9.480951121076941e-05, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.05871559633027523, |
|
"grad_norm": 0.19454629719257355, |
|
"learning_rate": 9.75609756097561e-07, |
|
"logits/chosen": -1.254880666732788, |
|
"logits/rejected": -2.107330322265625, |
|
"logps/chosen": -252.34690856933594, |
|
"logps/rejected": -164.38314819335938, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.0023813091684132814, |
|
"rewards/margins": 0.001523678540252149, |
|
"rewards/rejected": 0.0008576306281611323, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.07339449541284404, |
|
"grad_norm": 0.22062012553215027, |
|
"learning_rate": 1.2195121951219514e-06, |
|
"logits/chosen": -1.0731412172317505, |
|
"logits/rejected": -2.202488660812378, |
|
"logps/chosen": -306.49981689453125, |
|
"logps/rejected": -158.7795867919922, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.0011761488858610392, |
|
"rewards/margins": 0.001570393331348896, |
|
"rewards/rejected": -0.00039424479473382235, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08807339449541285, |
|
"grad_norm": 0.24035769701004028, |
|
"learning_rate": 1.4634146341463414e-06, |
|
"logits/chosen": -1.014756441116333, |
|
"logits/rejected": -1.9809092283248901, |
|
"logps/chosen": -344.25067138671875, |
|
"logps/rejected": -223.39735412597656, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -0.0003797918325290084, |
|
"rewards/margins": -0.00048368636635132134, |
|
"rewards/rejected": 0.0001038945047184825, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.10275229357798166, |
|
"grad_norm": 0.21700163185596466, |
|
"learning_rate": 1.707317073170732e-06, |
|
"logits/chosen": -0.9738011360168457, |
|
"logits/rejected": -1.969935655593872, |
|
"logps/chosen": -259.6055908203125, |
|
"logps/rejected": -167.9296875, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": 0.0001553670153953135, |
|
"rewards/margins": -3.077203291468322e-05, |
|
"rewards/rejected": 0.0001861391356214881, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.11743119266055047, |
|
"grad_norm": 0.21383042633533478, |
|
"learning_rate": 1.951219512195122e-06, |
|
"logits/chosen": -1.1494945287704468, |
|
"logits/rejected": -2.0312893390655518, |
|
"logps/chosen": -269.7684020996094, |
|
"logps/rejected": -186.76539611816406, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.00033674034057185054, |
|
"rewards/margins": 0.0006319936946965754, |
|
"rewards/rejected": -0.0002952533832285553, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.13211009174311927, |
|
"grad_norm": 0.22477228939533234, |
|
"learning_rate": 2.1951219512195125e-06, |
|
"logits/chosen": -1.026399850845337, |
|
"logits/rejected": -2.088264226913452, |
|
"logps/chosen": -313.89373779296875, |
|
"logps/rejected": -197.96157836914062, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0008114746888168156, |
|
"rewards/margins": 0.0017674455884844065, |
|
"rewards/rejected": -0.0009559708414599299, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.14678899082568808, |
|
"grad_norm": 0.21596066653728485, |
|
"learning_rate": 2.4390243902439027e-06, |
|
"logits/chosen": -1.0682005882263184, |
|
"logits/rejected": -2.089970111846924, |
|
"logps/chosen": -308.960205078125, |
|
"logps/rejected": -156.67080688476562, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.421875, |
|
"rewards/chosen": 1.0994495823979378e-05, |
|
"rewards/margins": -0.00042359798680990934, |
|
"rewards/rejected": 0.00043459233711473644, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1614678899082569, |
|
"grad_norm": 0.21404975652694702, |
|
"learning_rate": 2.682926829268293e-06, |
|
"logits/chosen": -1.2156116962432861, |
|
"logits/rejected": -2.2619664669036865, |
|
"logps/chosen": -298.9400634765625, |
|
"logps/rejected": -186.75558471679688, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.484375, |
|
"rewards/chosen": -0.00021735014161095023, |
|
"rewards/margins": -0.0002488284953869879, |
|
"rewards/rejected": 3.1478411983698606e-05, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.1761467889908257, |
|
"grad_norm": 0.2232733964920044, |
|
"learning_rate": 2.926829268292683e-06, |
|
"logits/chosen": -1.0221598148345947, |
|
"logits/rejected": -2.1875596046447754, |
|
"logps/chosen": -370.7825622558594, |
|
"logps/rejected": -156.96913146972656, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": 0.00025787827325984836, |
|
"rewards/margins": 0.0002967125328723341, |
|
"rewards/rejected": -3.883423050865531e-05, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1908256880733945, |
|
"grad_norm": 0.20744489133358002, |
|
"learning_rate": 3.1707317073170736e-06, |
|
"logits/chosen": -1.2444679737091064, |
|
"logits/rejected": -2.1652672290802, |
|
"logps/chosen": -326.87286376953125, |
|
"logps/rejected": -182.16775512695312, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.00034455297281965613, |
|
"rewards/margins": 0.0005561637226492167, |
|
"rewards/rejected": -0.0002116107352776453, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.20550458715596331, |
|
"grad_norm": 0.20301216840744019, |
|
"learning_rate": 3.414634146341464e-06, |
|
"logits/chosen": -1.1698203086853027, |
|
"logits/rejected": -2.2098355293273926, |
|
"logps/chosen": -284.3738708496094, |
|
"logps/rejected": -165.10675048828125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": 0.0008304309449158609, |
|
"rewards/margins": 0.00014648195065092295, |
|
"rewards/rejected": 0.0006839490379206836, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.22018348623853212, |
|
"grad_norm": 0.18828697502613068, |
|
"learning_rate": 3.6585365853658537e-06, |
|
"logits/chosen": -1.0651720762252808, |
|
"logits/rejected": -2.114262342453003, |
|
"logps/chosen": -289.60650634765625, |
|
"logps/rejected": -197.43728637695312, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 7.248943438753486e-05, |
|
"rewards/margins": 0.0003572917776182294, |
|
"rewards/rejected": -0.00028480234323069453, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.23486238532110093, |
|
"grad_norm": 0.22061792016029358, |
|
"learning_rate": 3.902439024390244e-06, |
|
"logits/chosen": -1.175024390220642, |
|
"logits/rejected": -2.1456820964813232, |
|
"logps/chosen": -288.66009521484375, |
|
"logps/rejected": -163.61434936523438, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.000822424772195518, |
|
"rewards/margins": 0.0007216277299448848, |
|
"rewards/rejected": 0.00010079706407850608, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.24954128440366974, |
|
"grad_norm": 0.23261404037475586, |
|
"learning_rate": 4.146341463414634e-06, |
|
"logits/chosen": -1.14915132522583, |
|
"logits/rejected": -2.2037534713745117, |
|
"logps/chosen": -324.6670837402344, |
|
"logps/rejected": -164.40541076660156, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.001368533237837255, |
|
"rewards/margins": 0.0020064578857272863, |
|
"rewards/rejected": -0.0006379245314747095, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.26422018348623855, |
|
"grad_norm": 0.20986895263195038, |
|
"learning_rate": 4.390243902439025e-06, |
|
"logits/chosen": -1.1692521572113037, |
|
"logits/rejected": -2.0869853496551514, |
|
"logps/chosen": -311.4503479003906, |
|
"logps/rejected": -204.41900634765625, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.0017893682233989239, |
|
"rewards/margins": 0.0020368697587400675, |
|
"rewards/rejected": -0.00024750170996412635, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.27889908256880735, |
|
"grad_norm": 0.22282101213932037, |
|
"learning_rate": 4.634146341463416e-06, |
|
"logits/chosen": -1.1961044073104858, |
|
"logits/rejected": -2.1981520652770996, |
|
"logps/chosen": -294.0665283203125, |
|
"logps/rejected": -200.33531188964844, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.002415674738585949, |
|
"rewards/margins": 0.0022758746054023504, |
|
"rewards/rejected": 0.00013980030780658126, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.29357798165137616, |
|
"grad_norm": 0.21077555418014526, |
|
"learning_rate": 4.8780487804878055e-06, |
|
"logits/chosen": -1.0440386533737183, |
|
"logits/rejected": -2.0407447814941406, |
|
"logps/chosen": -345.90582275390625, |
|
"logps/rejected": -181.31021118164062, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.0031323533039540052, |
|
"rewards/margins": 0.003835040610283613, |
|
"rewards/rejected": -0.0007026874809525907, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.30825688073394497, |
|
"grad_norm": 0.22342857718467712, |
|
"learning_rate": 4.999908404322799e-06, |
|
"logits/chosen": -1.0360839366912842, |
|
"logits/rejected": -2.229090690612793, |
|
"logps/chosen": -319.34405517578125, |
|
"logps/rejected": -172.6101531982422, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.003162128385156393, |
|
"rewards/margins": 0.004134417977184057, |
|
"rewards/rejected": -0.0009722901741042733, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.3229357798165138, |
|
"grad_norm": 0.20710445940494537, |
|
"learning_rate": 4.999175679175577e-06, |
|
"logits/chosen": -1.1114940643310547, |
|
"logits/rejected": -2.134546995162964, |
|
"logps/chosen": -251.9826202392578, |
|
"logps/rejected": -161.28729248046875, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.0036410389002412558, |
|
"rewards/margins": 0.004786026664078236, |
|
"rewards/rejected": -0.0011449878802523017, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.3376146788990826, |
|
"grad_norm": 0.20841620862483978, |
|
"learning_rate": 4.997710443643461e-06, |
|
"logits/chosen": -1.1724720001220703, |
|
"logits/rejected": -2.0726871490478516, |
|
"logps/chosen": -259.8139343261719, |
|
"logps/rejected": -206.3842010498047, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.00497157359495759, |
|
"rewards/margins": 0.0053414045833051205, |
|
"rewards/rejected": -0.0003698311629705131, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.3522935779816514, |
|
"grad_norm": 0.26362714171409607, |
|
"learning_rate": 4.995513127188151e-06, |
|
"logits/chosen": -1.083440899848938, |
|
"logits/rejected": -2.2180700302124023, |
|
"logps/chosen": -365.6690368652344, |
|
"logps/rejected": -183.11676025390625, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.006695316638797522, |
|
"rewards/margins": 0.007123172748833895, |
|
"rewards/rejected": -0.00042785535333678126, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.3669724770642202, |
|
"grad_norm": 0.24791677296161652, |
|
"learning_rate": 4.992584373844853e-06, |
|
"logits/chosen": -1.2088677883148193, |
|
"logits/rejected": -2.0830483436584473, |
|
"logps/chosen": -345.7132263183594, |
|
"logps/rejected": -184.2142791748047, |
|
"loss": 0.6892, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.006996008101850748, |
|
"rewards/margins": 0.007980192080140114, |
|
"rewards/rejected": -0.0009841847931966186, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.381651376146789, |
|
"grad_norm": 0.2443704605102539, |
|
"learning_rate": 4.98892504203351e-06, |
|
"logits/chosen": -1.2231440544128418, |
|
"logits/rejected": -2.1294138431549072, |
|
"logps/chosen": -281.9193420410156, |
|
"logps/rejected": -158.81373596191406, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.008386782370507717, |
|
"rewards/margins": 0.008862405084073544, |
|
"rewards/rejected": -0.0004756227135658264, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3963302752293578, |
|
"grad_norm": 0.22939813137054443, |
|
"learning_rate": 4.9845362043071925e-06, |
|
"logits/chosen": -1.022881269454956, |
|
"logits/rejected": -2.0713906288146973, |
|
"logps/chosen": -290.619140625, |
|
"logps/rejected": -163.58676147460938, |
|
"loss": 0.6883, |
|
"rewards/accuracies": 0.890625, |
|
"rewards/chosen": 0.008205468766391277, |
|
"rewards/margins": 0.00983144249767065, |
|
"rewards/rejected": -0.0016259729163721204, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.41100917431192663, |
|
"grad_norm": 0.23653681576251984, |
|
"learning_rate": 4.97941914703774e-06, |
|
"logits/chosen": -1.1475605964660645, |
|
"logits/rejected": -2.1536898612976074, |
|
"logps/chosen": -287.8155517578125, |
|
"logps/rejected": -201.2643585205078, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.01042763702571392, |
|
"rewards/margins": 0.011164907366037369, |
|
"rewards/rejected": -0.0007372696418315172, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.42568807339449544, |
|
"grad_norm": 0.27519503235816956, |
|
"learning_rate": 4.973575370038718e-06, |
|
"logits/chosen": -1.0696501731872559, |
|
"logits/rejected": -2.0487396717071533, |
|
"logps/chosen": -305.0591735839844, |
|
"logps/rejected": -193.36380004882812, |
|
"loss": 0.6845, |
|
"rewards/accuracies": 0.921875, |
|
"rewards/chosen": 0.015121238306164742, |
|
"rewards/margins": 0.017421720549464226, |
|
"rewards/rejected": -0.0023004834074527025, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.44036697247706424, |
|
"grad_norm": 0.2350296527147293, |
|
"learning_rate": 4.967006586125827e-06, |
|
"logits/chosen": -1.2415204048156738, |
|
"logits/rejected": -2.0798888206481934, |
|
"logps/chosen": -301.13470458984375, |
|
"logps/rejected": -186.6119384765625, |
|
"loss": 0.6839, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.01728799380362034, |
|
"rewards/margins": 0.018711544573307037, |
|
"rewards/rejected": -0.0014235521666705608, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.45504587155963305, |
|
"grad_norm": 0.27493444085121155, |
|
"learning_rate": 4.959714720614871e-06, |
|
"logits/chosen": -1.1752407550811768, |
|
"logits/rejected": -2.2229394912719727, |
|
"logps/chosen": -318.9490051269531, |
|
"logps/rejected": -184.07748413085938, |
|
"loss": 0.6818, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.018663793802261353, |
|
"rewards/margins": 0.02279941365122795, |
|
"rewards/rejected": -0.004135618917644024, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.46972477064220186, |
|
"grad_norm": 0.23241819441318512, |
|
"learning_rate": 4.951701910757446e-06, |
|
"logits/chosen": -1.1613270044326782, |
|
"logits/rejected": -2.067774772644043, |
|
"logps/chosen": -253.791748046875, |
|
"logps/rejected": -188.93663024902344, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.01876826584339142, |
|
"rewards/margins": 0.02326786518096924, |
|
"rewards/rejected": -0.004499598406255245, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.48440366972477067, |
|
"grad_norm": 0.29473960399627686, |
|
"learning_rate": 4.942970505114514e-06, |
|
"logits/chosen": -1.042543888092041, |
|
"logits/rejected": -2.1168742179870605, |
|
"logps/chosen": -308.21478271484375, |
|
"logps/rejected": -176.24462890625, |
|
"loss": 0.6806, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.02199753001332283, |
|
"rewards/margins": 0.025236235931515694, |
|
"rewards/rejected": -0.0032387052197009325, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4990825688073395, |
|
"grad_norm": 0.22980758547782898, |
|
"learning_rate": 4.933523062868033e-06, |
|
"logits/chosen": -1.0735752582550049, |
|
"logits/rejected": -2.1697797775268555, |
|
"logps/chosen": -269.1987609863281, |
|
"logps/rejected": -164.76675415039062, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.02460363134741783, |
|
"rewards/margins": 0.028698520734906197, |
|
"rewards/rejected": -0.004094891715794802, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.5137614678899083, |
|
"grad_norm": 0.25644201040267944, |
|
"learning_rate": 4.923362353070859e-06, |
|
"logits/chosen": -0.8905975222587585, |
|
"logits/rejected": -2.159343957901001, |
|
"logps/chosen": -286.9671325683594, |
|
"logps/rejected": -159.90756225585938, |
|
"loss": 0.6754, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.02774330973625183, |
|
"rewards/margins": 0.03586244583129883, |
|
"rewards/rejected": -0.008119137957692146, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5284403669724771, |
|
"grad_norm": 0.23490914702415466, |
|
"learning_rate": 4.912491353835138e-06, |
|
"logits/chosen": -1.160065770149231, |
|
"logits/rejected": -2.0964155197143555, |
|
"logps/chosen": -259.6600036621094, |
|
"logps/rejected": -185.36654663085938, |
|
"loss": 0.6766, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.02729903906583786, |
|
"rewards/margins": 0.033542755991220474, |
|
"rewards/rejected": -0.0062437159940600395, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.5431192660550459, |
|
"grad_norm": 0.23095114529132843, |
|
"learning_rate": 4.900913251459418e-06, |
|
"logits/chosen": -1.0740145444869995, |
|
"logits/rejected": -2.0510926246643066, |
|
"logps/chosen": -264.42852783203125, |
|
"logps/rejected": -173.16954040527344, |
|
"loss": 0.6737, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03046741709113121, |
|
"rewards/margins": 0.039318714290857315, |
|
"rewards/rejected": -0.008851297199726105, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.5577981651376147, |
|
"grad_norm": 0.2698145806789398, |
|
"learning_rate": 4.8886314394947396e-06, |
|
"logits/chosen": -0.9856863021850586, |
|
"logits/rejected": -2.0757381916046143, |
|
"logps/chosen": -278.1323547363281, |
|
"logps/rejected": -185.96359252929688, |
|
"loss": 0.6685, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.03979150950908661, |
|
"rewards/margins": 0.05023376643657684, |
|
"rewards/rejected": -0.010442256927490234, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.5724770642201835, |
|
"grad_norm": 0.28155529499053955, |
|
"learning_rate": 4.875649517749985e-06, |
|
"logits/chosen": -1.0364511013031006, |
|
"logits/rejected": -2.184748411178589, |
|
"logps/chosen": -281.234375, |
|
"logps/rejected": -191.4117431640625, |
|
"loss": 0.6669, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.040829725563526154, |
|
"rewards/margins": 0.05347009003162384, |
|
"rewards/rejected": -0.012640362605452538, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.5871559633027523, |
|
"grad_norm": 0.24884890019893646, |
|
"learning_rate": 4.861971291236772e-06, |
|
"logits/chosen": -1.1326780319213867, |
|
"logits/rejected": -2.0564959049224854, |
|
"logps/chosen": -327.68255615234375, |
|
"logps/rejected": -191.6634979248047, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 0.04658803716301918, |
|
"rewards/margins": 0.051849693059921265, |
|
"rewards/rejected": -0.005261647514998913, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6018348623853211, |
|
"grad_norm": 0.30091699957847595, |
|
"learning_rate": 4.847600769054201e-06, |
|
"logits/chosen": -1.1696847677230835, |
|
"logits/rejected": -2.07926607131958, |
|
"logps/chosen": -364.4068603515625, |
|
"logps/rejected": -221.42953491210938, |
|
"loss": 0.6624, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 0.05471267178654671, |
|
"rewards/margins": 0.06273850053548813, |
|
"rewards/rejected": -0.008025825023651123, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.6165137614678899, |
|
"grad_norm": 0.25710350275039673, |
|
"learning_rate": 4.832542163213787e-06, |
|
"logits/chosen": -1.0127124786376953, |
|
"logits/rejected": -2.2070302963256836, |
|
"logps/chosen": -260.21435546875, |
|
"logps/rejected": -155.8712158203125, |
|
"loss": 0.6609, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.04979401081800461, |
|
"rewards/margins": 0.06581854820251465, |
|
"rewards/rejected": -0.016024531796574593, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.6311926605504588, |
|
"grad_norm": 0.2573588192462921, |
|
"learning_rate": 4.816799887404911e-06, |
|
"logits/chosen": -1.214523196220398, |
|
"logits/rejected": -2.1630001068115234, |
|
"logps/chosen": -299.292724609375, |
|
"logps/rejected": -185.86978149414062, |
|
"loss": 0.6605, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.053634677082300186, |
|
"rewards/margins": 0.06668587028980255, |
|
"rewards/rejected": -0.013051198795437813, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.6458715596330276, |
|
"grad_norm": 0.27079451084136963, |
|
"learning_rate": 4.800378555701168e-06, |
|
"logits/chosen": -1.0461535453796387, |
|
"logits/rejected": -2.0189104080200195, |
|
"logps/chosen": -352.3293762207031, |
|
"logps/rejected": -186.85206604003906, |
|
"loss": 0.6556, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.06002563610672951, |
|
"rewards/margins": 0.07706265896558762, |
|
"rewards/rejected": -0.01703702099621296, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.6605504587155964, |
|
"grad_norm": 0.287859708070755, |
|
"learning_rate": 4.783282981207979e-06, |
|
"logits/chosen": -1.0909987688064575, |
|
"logits/rejected": -2.2945609092712402, |
|
"logps/chosen": -294.6068420410156, |
|
"logps/rejected": -169.92709350585938, |
|
"loss": 0.6542, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.06212657317519188, |
|
"rewards/margins": 0.07996082305908203, |
|
"rewards/rejected": -0.017834244295954704, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6752293577981652, |
|
"grad_norm": 0.27290210127830505, |
|
"learning_rate": 4.765518174651864e-06, |
|
"logits/chosen": -1.093838095664978, |
|
"logits/rejected": -2.0723648071289062, |
|
"logps/chosen": -284.00628662109375, |
|
"logps/rejected": -190.7758331298828, |
|
"loss": 0.6516, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.06472472846508026, |
|
"rewards/margins": 0.08546319603919983, |
|
"rewards/rejected": -0.020738467574119568, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.689908256880734, |
|
"grad_norm": 0.2709275186061859, |
|
"learning_rate": 4.747089342911793e-06, |
|
"logits/chosen": -0.9517545700073242, |
|
"logits/rejected": -2.1969153881073, |
|
"logps/chosen": -289.38702392578125, |
|
"logps/rejected": -175.60415649414062, |
|
"loss": 0.6465, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.07264785468578339, |
|
"rewards/margins": 0.09619369357824326, |
|
"rewards/rejected": -0.02354583889245987, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.7045871559633028, |
|
"grad_norm": 0.27490904927253723, |
|
"learning_rate": 4.728001887493048e-06, |
|
"logits/chosen": -0.9566600918769836, |
|
"logits/rejected": -2.1813409328460693, |
|
"logps/chosen": -297.1672668457031, |
|
"logps/rejected": -194.74151611328125, |
|
"loss": 0.6461, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.07736410200595856, |
|
"rewards/margins": 0.09716954082250595, |
|
"rewards/rejected": -0.019805438816547394, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.7192660550458716, |
|
"grad_norm": 0.28355419635772705, |
|
"learning_rate": 4.708261402944036e-06, |
|
"logits/chosen": -1.0457079410552979, |
|
"logits/rejected": -2.1566596031188965, |
|
"logps/chosen": -312.3387756347656, |
|
"logps/rejected": -188.9569549560547, |
|
"loss": 0.6373, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.08790802210569382, |
|
"rewards/margins": 0.11593043804168701, |
|
"rewards/rejected": -0.02802242711186409, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.7339449541284404, |
|
"grad_norm": 0.3128170073032379, |
|
"learning_rate": 4.687873675216522e-06, |
|
"logits/chosen": -0.9265126585960388, |
|
"logits/rejected": -1.9977868795394897, |
|
"logps/chosen": -299.97552490234375, |
|
"logps/rejected": -199.7147979736328, |
|
"loss": 0.6391, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": 0.0902346596121788, |
|
"rewards/margins": 0.11254666745662689, |
|
"rewards/rejected": -0.022312019020318985, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7486238532110092, |
|
"grad_norm": 0.2806454002857208, |
|
"learning_rate": 4.666844679969765e-06, |
|
"logits/chosen": -1.2644224166870117, |
|
"logits/rejected": -2.302584409713745, |
|
"logps/chosen": -296.40313720703125, |
|
"logps/rejected": -209.49896240234375, |
|
"loss": 0.6356, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.08069995045661926, |
|
"rewards/margins": 0.11945149302482605, |
|
"rewards/rejected": -0.03875154256820679, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.763302752293578, |
|
"grad_norm": 0.2888976037502289, |
|
"learning_rate": 4.6451805808190464e-06, |
|
"logits/chosen": -1.0132654905319214, |
|
"logits/rejected": -2.16083025932312, |
|
"logps/chosen": -281.1504821777344, |
|
"logps/rejected": -177.13720703125, |
|
"loss": 0.6312, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.08802622556686401, |
|
"rewards/margins": 0.12888850271701813, |
|
"rewards/rejected": -0.04086225852370262, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.7779816513761468, |
|
"grad_norm": 0.3031749427318573, |
|
"learning_rate": 4.622887727529104e-06, |
|
"logits/chosen": -1.026634693145752, |
|
"logits/rejected": -2.1324048042297363, |
|
"logps/chosen": -254.4738006591797, |
|
"logps/rejected": -208.37692260742188, |
|
"loss": 0.6281, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.08762703835964203, |
|
"rewards/margins": 0.1357976347208023, |
|
"rewards/rejected": -0.04817059263586998, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.7926605504587156, |
|
"grad_norm": 0.29786497354507446, |
|
"learning_rate": 4.599972654153018e-06, |
|
"logits/chosen": -0.8744025230407715, |
|
"logits/rejected": -2.115464925765991, |
|
"logps/chosen": -297.45904541015625, |
|
"logps/rejected": -174.9972686767578, |
|
"loss": 0.6238, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.10411246865987778, |
|
"rewards/margins": 0.1449991911649704, |
|
"rewards/rejected": -0.04088671877980232, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.8073394495412844, |
|
"grad_norm": 0.28555893898010254, |
|
"learning_rate": 4.5764420771170735e-06, |
|
"logits/chosen": -0.9228212237358093, |
|
"logits/rejected": -2.091989517211914, |
|
"logps/chosen": -274.9249572753906, |
|
"logps/rejected": -194.3198699951172, |
|
"loss": 0.6226, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09574063122272491, |
|
"rewards/margins": 0.14816924929618835, |
|
"rewards/rejected": -0.052428603172302246, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.8220183486238533, |
|
"grad_norm": 0.28240182995796204, |
|
"learning_rate": 4.552302893252166e-06, |
|
"logits/chosen": -1.1849777698516846, |
|
"logits/rejected": -2.233544111251831, |
|
"logps/chosen": -302.3770751953125, |
|
"logps/rejected": -206.6287384033203, |
|
"loss": 0.6247, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09496743977069855, |
|
"rewards/margins": 0.1430475115776062, |
|
"rewards/rejected": -0.04808007925748825, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.8366972477064221, |
|
"grad_norm": 0.2690318524837494, |
|
"learning_rate": 4.52756217777234e-06, |
|
"logits/chosen": -1.1899240016937256, |
|
"logits/rejected": -2.208632469177246, |
|
"logps/chosen": -307.2008056640625, |
|
"logps/rejected": -208.73226928710938, |
|
"loss": 0.622, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1049693301320076, |
|
"rewards/margins": 0.14956367015838623, |
|
"rewards/rejected": -0.04459436237812042, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.8513761467889909, |
|
"grad_norm": 0.2907615602016449, |
|
"learning_rate": 4.502227182201035e-06, |
|
"logits/chosen": -0.8904531598091125, |
|
"logits/rejected": -2.011884927749634, |
|
"logps/chosen": -260.03460693359375, |
|
"logps/rejected": -176.82861328125, |
|
"loss": 0.6151, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.10961556434631348, |
|
"rewards/margins": 0.1642199605703354, |
|
"rewards/rejected": -0.054604414850473404, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.8660550458715597, |
|
"grad_norm": 0.2946977913379669, |
|
"learning_rate": 4.476305332245662e-06, |
|
"logits/chosen": -1.0399869680404663, |
|
"logits/rejected": -2.351107358932495, |
|
"logps/chosen": -310.02154541015625, |
|
"logps/rejected": -154.79171752929688, |
|
"loss": 0.6114, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.10798949748277664, |
|
"rewards/margins": 0.17239324748516083, |
|
"rewards/rejected": -0.06440375000238419, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.8807339449541285, |
|
"grad_norm": 0.2917464077472687, |
|
"learning_rate": 4.449804225621116e-06, |
|
"logits/chosen": -0.9798471927642822, |
|
"logits/rejected": -2.118800640106201, |
|
"logps/chosen": -274.9358215332031, |
|
"logps/rejected": -182.828369140625, |
|
"loss": 0.6166, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.10332541167736053, |
|
"rewards/margins": 0.16179130971431732, |
|
"rewards/rejected": -0.05846590921282768, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8954128440366973, |
|
"grad_norm": 0.2943665683269501, |
|
"learning_rate": 4.422731629822887e-06, |
|
"logits/chosen": -0.8896728754043579, |
|
"logits/rejected": -2.0205516815185547, |
|
"logps/chosen": -310.06146240234375, |
|
"logps/rejected": -196.7396697998047, |
|
"loss": 0.6105, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.109949491918087, |
|
"rewards/margins": 0.1750711053609848, |
|
"rewards/rejected": -0.0651216208934784, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.9100917431192661, |
|
"grad_norm": 0.27942806482315063, |
|
"learning_rate": 4.395095479850396e-06, |
|
"logits/chosen": -0.9086670875549316, |
|
"logits/rejected": -1.9397008419036865, |
|
"logps/chosen": -283.2035827636719, |
|
"logps/rejected": -189.4768524169922, |
|
"loss": 0.6109, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.11041680723428726, |
|
"rewards/margins": 0.17488707602024078, |
|
"rewards/rejected": -0.06447026133537292, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.9247706422018349, |
|
"grad_norm": 0.2816947400569916, |
|
"learning_rate": 4.366903875881243e-06, |
|
"logits/chosen": -1.0257644653320312, |
|
"logits/rejected": -2.3893752098083496, |
|
"logps/chosen": -270.8984375, |
|
"logps/rejected": -168.3092803955078, |
|
"loss": 0.6005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.10698913782835007, |
|
"rewards/margins": 0.1973370462656021, |
|
"rewards/rejected": -0.09034790843725204, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.9394495412844037, |
|
"grad_norm": 0.2816489040851593, |
|
"learning_rate": 4.3381650808970365e-06, |
|
"logits/chosen": -0.9650391340255737, |
|
"logits/rejected": -2.0026278495788574, |
|
"logps/chosen": -250.3959197998047, |
|
"logps/rejected": -189.07101440429688, |
|
"loss": 0.608, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.10601907968521118, |
|
"rewards/margins": 0.18029268085956573, |
|
"rewards/rejected": -0.07427360117435455, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.9541284403669725, |
|
"grad_norm": 0.312788188457489, |
|
"learning_rate": 4.308887518261507e-06, |
|
"logits/chosen": -0.8028702735900879, |
|
"logits/rejected": -1.9657632112503052, |
|
"logps/chosen": -273.3324890136719, |
|
"logps/rejected": -198.6640167236328, |
|
"loss": 0.6011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12132798135280609, |
|
"rewards/margins": 0.19634239375591278, |
|
"rewards/rejected": -0.07501440495252609, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.9688073394495413, |
|
"grad_norm": 0.3127909302711487, |
|
"learning_rate": 4.279079769251617e-06, |
|
"logits/chosen": -1.140122652053833, |
|
"logits/rejected": -2.2408370971679688, |
|
"logps/chosen": -345.58453369140625, |
|
"logps/rejected": -215.017578125, |
|
"loss": 0.596, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.12369377911090851, |
|
"rewards/margins": 0.20813299715518951, |
|
"rewards/rejected": -0.08443920314311981, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.9834862385321101, |
|
"grad_norm": 0.2701089680194855, |
|
"learning_rate": 4.248750570542373e-06, |
|
"logits/chosen": -0.9123460054397583, |
|
"logits/rejected": -2.11623215675354, |
|
"logps/chosen": -267.0220031738281, |
|
"logps/rejected": -184.01718139648438, |
|
"loss": 0.5983, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1171736866235733, |
|
"rewards/margins": 0.20216205716133118, |
|
"rewards/rejected": -0.08498836308717728, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.998165137614679, |
|
"grad_norm": 0.33086276054382324, |
|
"learning_rate": 4.21790881164611e-06, |
|
"logits/chosen": -0.8444420099258423, |
|
"logits/rejected": -2.1506567001342773, |
|
"logps/chosen": -277.3634033203125, |
|
"logps/rejected": -200.18048095703125, |
|
"loss": 0.5837, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.12353068590164185, |
|
"rewards/margins": 0.23676438629627228, |
|
"rewards/rejected": -0.11323369294404984, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.0128440366972478, |
|
"grad_norm": 0.2951512932777405, |
|
"learning_rate": 4.186563532306957e-06, |
|
"logits/chosen": -0.8253529667854309, |
|
"logits/rejected": -2.100139617919922, |
|
"logps/chosen": -284.3374328613281, |
|
"logps/rejected": -174.05772399902344, |
|
"loss": 0.5889, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11334549635648727, |
|
"rewards/margins": 0.2235983908176422, |
|
"rewards/rejected": -0.11025289446115494, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.0275229357798166, |
|
"grad_norm": 0.27380526065826416, |
|
"learning_rate": 4.154723919851291e-06, |
|
"logits/chosen": -1.0042250156402588, |
|
"logits/rejected": -2.136974811553955, |
|
"logps/chosen": -285.943603515625, |
|
"logps/rejected": -179.10626220703125, |
|
"loss": 0.5958, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.10538989305496216, |
|
"rewards/margins": 0.20918627083301544, |
|
"rewards/rejected": -0.10379637777805328, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0422018348623854, |
|
"grad_norm": 0.3004053235054016, |
|
"learning_rate": 4.122399306494918e-06, |
|
"logits/chosen": -1.0141961574554443, |
|
"logits/rejected": -2.293670654296875, |
|
"logps/chosen": -331.0174865722656, |
|
"logps/rejected": -204.6294403076172, |
|
"loss": 0.5867, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.11831127107143402, |
|
"rewards/margins": 0.2288443148136139, |
|
"rewards/rejected": -0.11053304374217987, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.0568807339449542, |
|
"grad_norm": 0.32684940099716187, |
|
"learning_rate": 4.089599166607794e-06, |
|
"logits/chosen": -0.983501136302948, |
|
"logits/rejected": -2.0373613834381104, |
|
"logps/chosen": -286.7935791015625, |
|
"logps/rejected": -195.14013671875, |
|
"loss": 0.5708, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11853000521659851, |
|
"rewards/margins": 0.2656952738761902, |
|
"rewards/rejected": -0.14716526865959167, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.071559633027523, |
|
"grad_norm": 0.2967604994773865, |
|
"learning_rate": 4.05633311393708e-06, |
|
"logits/chosen": -0.8452234864234924, |
|
"logits/rejected": -2.047391891479492, |
|
"logps/chosen": -252.7887725830078, |
|
"logps/rejected": -178.90480041503906, |
|
"loss": 0.5854, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11606443673372269, |
|
"rewards/margins": 0.23208481073379517, |
|
"rewards/rejected": -0.11602037400007248, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.0862385321100918, |
|
"grad_norm": 0.3046511709690094, |
|
"learning_rate": 4.022610898789349e-06, |
|
"logits/chosen": -0.882270872592926, |
|
"logits/rejected": -2.117722272872925, |
|
"logps/chosen": -260.86444091796875, |
|
"logps/rejected": -194.1956787109375, |
|
"loss": 0.5746, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.1244753897190094, |
|
"rewards/margins": 0.2567788362503052, |
|
"rewards/rejected": -0.13230343163013458, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.1009174311926606, |
|
"grad_norm": 0.3329361081123352, |
|
"learning_rate": 3.988442405172755e-06, |
|
"logits/chosen": -0.7351129055023193, |
|
"logits/rejected": -2.0105910301208496, |
|
"logps/chosen": -274.9642639160156, |
|
"logps/rejected": -209.6830291748047, |
|
"loss": 0.5678, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14153896272182465, |
|
"rewards/margins": 0.27290502190589905, |
|
"rewards/rejected": -0.1313660740852356, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.1155963302752294, |
|
"grad_norm": 0.336039274930954, |
|
"learning_rate": 3.953837647900031e-06, |
|
"logits/chosen": -0.8156127333641052, |
|
"logits/rejected": -2.1167821884155273, |
|
"logps/chosen": -267.0653991699219, |
|
"logps/rejected": -204.4987030029297, |
|
"loss": 0.5624, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14284199476242065, |
|
"rewards/margins": 0.2878842055797577, |
|
"rewards/rejected": -0.14504222571849823, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.1302752293577982, |
|
"grad_norm": 0.2933123707771301, |
|
"learning_rate": 3.918806769653135e-06, |
|
"logits/chosen": -0.7190850377082825, |
|
"logits/rejected": -2.0248467922210693, |
|
"logps/chosen": -312.485107421875, |
|
"logps/rejected": -204.29519653320312, |
|
"loss": 0.5681, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.13536058366298676, |
|
"rewards/margins": 0.2748079001903534, |
|
"rewards/rejected": -0.13944728672504425, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.144954128440367, |
|
"grad_norm": 0.2955363988876343, |
|
"learning_rate": 3.88336003801042e-06, |
|
"logits/chosen": -0.7899962663650513, |
|
"logits/rejected": -2.054001808166504, |
|
"logps/chosen": -249.49229431152344, |
|
"logps/rejected": -186.16180419921875, |
|
"loss": 0.5765, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11917533725500107, |
|
"rewards/margins": 0.2534089684486389, |
|
"rewards/rejected": -0.13423362374305725, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.1596330275229358, |
|
"grad_norm": 0.32821422815322876, |
|
"learning_rate": 3.847507842437205e-06, |
|
"logits/chosen": -0.6861492395401001, |
|
"logits/rejected": -2.101705312728882, |
|
"logps/chosen": -290.5328063964844, |
|
"logps/rejected": -182.29885864257812, |
|
"loss": 0.5538, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.13685588538646698, |
|
"rewards/margins": 0.307420015335083, |
|
"rewards/rejected": -0.17056412994861603, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.1743119266055047, |
|
"grad_norm": 0.33539363741874695, |
|
"learning_rate": 3.811260691240604e-06, |
|
"logits/chosen": -0.7100223302841187, |
|
"logits/rejected": -2.099154472351074, |
|
"logps/chosen": -332.88043212890625, |
|
"logps/rejected": -198.502685546875, |
|
"loss": 0.5534, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1523381769657135, |
|
"rewards/margins": 0.3066955804824829, |
|
"rewards/rejected": -0.15435738861560822, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.1889908256880735, |
|
"grad_norm": 0.2996244430541992, |
|
"learning_rate": 3.774629208489547e-06, |
|
"logits/chosen": -0.7699471116065979, |
|
"logits/rejected": -2.086440086364746, |
|
"logps/chosen": -236.2914581298828, |
|
"logps/rejected": -182.8726348876953, |
|
"loss": 0.5679, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1200590655207634, |
|
"rewards/margins": 0.272464394569397, |
|
"rewards/rejected": -0.15240532159805298, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.2036697247706423, |
|
"grad_norm": 0.32394227385520935, |
|
"learning_rate": 3.7376241309008433e-06, |
|
"logits/chosen": -0.921927809715271, |
|
"logits/rejected": -2.105692148208618, |
|
"logps/chosen": -320.51068115234375, |
|
"logps/rejected": -194.18077087402344, |
|
"loss": 0.5553, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1409882754087448, |
|
"rewards/margins": 0.3050813674926758, |
|
"rewards/rejected": -0.16409309208393097, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.218348623853211, |
|
"grad_norm": 0.3441508710384369, |
|
"learning_rate": 3.7002563046922502e-06, |
|
"logits/chosen": -0.891487181186676, |
|
"logits/rejected": -2.220118761062622, |
|
"logps/chosen": -320.64642333984375, |
|
"logps/rejected": -186.1455841064453, |
|
"loss": 0.5417, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1372089684009552, |
|
"rewards/margins": 0.3370189666748047, |
|
"rewards/rejected": -0.19981002807617188, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.2330275229357799, |
|
"grad_norm": 0.356285035610199, |
|
"learning_rate": 3.6625366824034337e-06, |
|
"logits/chosen": -0.6430750489234924, |
|
"logits/rejected": -2.0550131797790527, |
|
"logps/chosen": -272.8852233886719, |
|
"logps/rejected": -221.4967498779297, |
|
"loss": 0.53, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14860178530216217, |
|
"rewards/margins": 0.36680835485458374, |
|
"rewards/rejected": -0.21820658445358276, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.2477064220183487, |
|
"grad_norm": 0.3269821107387543, |
|
"learning_rate": 3.6244763196857714e-06, |
|
"logits/chosen": -0.7699840068817139, |
|
"logits/rejected": -2.1032745838165283, |
|
"logps/chosen": -290.3015441894531, |
|
"logps/rejected": -195.3329620361328, |
|
"loss": 0.5383, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14162112772464752, |
|
"rewards/margins": 0.3490352928638458, |
|
"rewards/rejected": -0.20741413533687592, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.2623853211009175, |
|
"grad_norm": 0.3313334584236145, |
|
"learning_rate": 3.5860863720619333e-06, |
|
"logits/chosen": -0.7985893487930298, |
|
"logits/rejected": -2.0475993156433105, |
|
"logps/chosen": -281.7800598144531, |
|
"logps/rejected": -196.2750701904297, |
|
"loss": 0.5446, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1597270667552948, |
|
"rewards/margins": 0.3294825851917267, |
|
"rewards/rejected": -0.16975551843643188, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.2770642201834863, |
|
"grad_norm": 0.3373997211456299, |
|
"learning_rate": 3.547378091656186e-06, |
|
"logits/chosen": -0.6589763760566711, |
|
"logits/rejected": -2.0597751140594482, |
|
"logps/chosen": -288.1597595214844, |
|
"logps/rejected": -187.80250549316406, |
|
"loss": 0.5396, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12963534891605377, |
|
"rewards/margins": 0.341688334941864, |
|
"rewards/rejected": -0.21205295622348785, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.2917431192660551, |
|
"grad_norm": 0.3771114945411682, |
|
"learning_rate": 3.5083628238963913e-06, |
|
"logits/chosen": -0.8436675667762756, |
|
"logits/rejected": -1.894881010055542, |
|
"logps/chosen": -229.3706512451172, |
|
"logps/rejected": -188.755615234375, |
|
"loss": 0.5512, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12475646287202835, |
|
"rewards/margins": 0.31724295020103455, |
|
"rewards/rejected": -0.19248650968074799, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.306422018348624, |
|
"grad_norm": 0.34316545724868774, |
|
"learning_rate": 3.4690520041886473e-06, |
|
"logits/chosen": -0.6456696391105652, |
|
"logits/rejected": -1.9966225624084473, |
|
"logps/chosen": -269.55963134765625, |
|
"logps/rejected": -226.95050048828125, |
|
"loss": 0.5392, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.13535434007644653, |
|
"rewards/margins": 0.34239569306373596, |
|
"rewards/rejected": -0.20704133808612823, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.3211009174311927, |
|
"grad_norm": 0.3454464077949524, |
|
"learning_rate": 3.4294571545655653e-06, |
|
"logits/chosen": -0.642143726348877, |
|
"logits/rejected": -2.1874210834503174, |
|
"logps/chosen": -286.9853515625, |
|
"logps/rejected": -197.84173583984375, |
|
"loss": 0.5183, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.14739646017551422, |
|
"rewards/margins": 0.3938574492931366, |
|
"rewards/rejected": -0.24646097421646118, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.3357798165137615, |
|
"grad_norm": 0.35638609528541565, |
|
"learning_rate": 3.38958988030915e-06, |
|
"logits/chosen": -0.9664332270622253, |
|
"logits/rejected": -2.0060455799102783, |
|
"logps/chosen": -269.21246337890625, |
|
"logps/rejected": -241.5162353515625, |
|
"loss": 0.5321, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": 0.12982626259326935, |
|
"rewards/margins": 0.3629656732082367, |
|
"rewards/rejected": -0.23313936591148376, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.3504587155963304, |
|
"grad_norm": 0.3154853284358978, |
|
"learning_rate": 3.3494618665492833e-06, |
|
"logits/chosen": -0.8542514443397522, |
|
"logits/rejected": -1.9480448961257935, |
|
"logps/chosen": -251.843994140625, |
|
"logps/rejected": -207.665771484375, |
|
"loss": 0.5528, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.10288684070110321, |
|
"rewards/margins": 0.31177228689193726, |
|
"rewards/rejected": -0.20888544619083405, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.3651376146788992, |
|
"grad_norm": 0.35757479071617126, |
|
"learning_rate": 3.3090848748388042e-06, |
|
"logits/chosen": -0.7120774388313293, |
|
"logits/rejected": -2.0478219985961914, |
|
"logps/chosen": -348.96759033203125, |
|
"logps/rejected": -213.79104614257812, |
|
"loss": 0.515, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11942586302757263, |
|
"rewards/margins": 0.40438294410705566, |
|
"rewards/rejected": -0.2849571108818054, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.379816513761468, |
|
"grad_norm": 0.3314104676246643, |
|
"learning_rate": 3.2684707397061887e-06, |
|
"logits/chosen": -0.7999655604362488, |
|
"logits/rejected": -1.9970111846923828, |
|
"logps/chosen": -287.744384765625, |
|
"logps/rejected": -191.04306030273438, |
|
"loss": 0.5302, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1301116943359375, |
|
"rewards/margins": 0.36544448137283325, |
|
"rewards/rejected": -0.23533278703689575, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.3944954128440368, |
|
"grad_norm": 0.32805782556533813, |
|
"learning_rate": 3.2276313651868364e-06, |
|
"logits/chosen": -0.6480334997177124, |
|
"logits/rejected": -2.014680862426758, |
|
"logps/chosen": -293.04827880859375, |
|
"logps/rejected": -181.7431640625, |
|
"loss": 0.5244, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.119876429438591, |
|
"rewards/margins": 0.38110417127609253, |
|
"rewards/rejected": -0.2612277865409851, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.4091743119266056, |
|
"grad_norm": 0.3541790544986725, |
|
"learning_rate": 3.1865787213339926e-06, |
|
"logits/chosen": -0.6706480383872986, |
|
"logits/rejected": -1.988142490386963, |
|
"logps/chosen": -276.2215270996094, |
|
"logps/rejected": -207.83692932128906, |
|
"loss": 0.5108, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.13069698214530945, |
|
"rewards/margins": 0.4180029034614563, |
|
"rewards/rejected": -0.28730592131614685, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.4238532110091744, |
|
"grad_norm": 0.3416215181350708, |
|
"learning_rate": 3.1453248407103156e-06, |
|
"logits/chosen": -0.7083946466445923, |
|
"logits/rejected": -2.0228724479675293, |
|
"logps/chosen": -284.3505859375, |
|
"logps/rejected": -190.29495239257812, |
|
"loss": 0.5255, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.10038997232913971, |
|
"rewards/margins": 0.3794632852077484, |
|
"rewards/rejected": -0.2790733575820923, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.4385321100917432, |
|
"grad_norm": 0.34125635027885437, |
|
"learning_rate": 3.1038818148611178e-06, |
|
"logits/chosen": -0.6999146938323975, |
|
"logits/rejected": -1.8979852199554443, |
|
"logps/chosen": -307.41461181640625, |
|
"logps/rejected": -203.9014434814453, |
|
"loss": 0.5117, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12564200162887573, |
|
"rewards/margins": 0.4146888852119446, |
|
"rewards/rejected": -0.28904691338539124, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.453211009174312, |
|
"grad_norm": 0.33174267411231995, |
|
"learning_rate": 3.062261790770331e-06, |
|
"logits/chosen": -0.5950466394424438, |
|
"logits/rejected": -1.8337745666503906, |
|
"logps/chosen": -255.2974090576172, |
|
"logps/rejected": -201.39553833007812, |
|
"loss": 0.5311, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.11827966570854187, |
|
"rewards/margins": 0.3665844798088074, |
|
"rewards/rejected": -0.2483048141002655, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.4678899082568808, |
|
"grad_norm": 0.3481040894985199, |
|
"learning_rate": 3.0204769673002123e-06, |
|
"logits/chosen": -0.6397859454154968, |
|
"logits/rejected": -1.9714891910552979, |
|
"logps/chosen": -329.48492431640625, |
|
"logps/rejected": -220.13035583496094, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12427037209272385, |
|
"rewards/margins": 0.4087342917919159, |
|
"rewards/rejected": -0.28446391224861145, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.4825688073394496, |
|
"grad_norm": 0.390900582075119, |
|
"learning_rate": 2.978539591615848e-06, |
|
"logits/chosen": -0.7218199968338013, |
|
"logits/rejected": -1.7457867860794067, |
|
"logps/chosen": -296.8045959472656, |
|
"logps/rejected": -222.34521484375, |
|
"loss": 0.5065, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.09192880988121033, |
|
"rewards/margins": 0.42625176906585693, |
|
"rewards/rejected": -0.3343229591846466, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.4972477064220184, |
|
"grad_norm": 0.351463258266449, |
|
"learning_rate": 2.936461955595501e-06, |
|
"logits/chosen": -0.7173420786857605, |
|
"logits/rejected": -1.977159023284912, |
|
"logps/chosen": -294.3747863769531, |
|
"logps/rejected": -215.15768432617188, |
|
"loss": 0.5078, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12666739523410797, |
|
"rewards/margins": 0.42647331953048706, |
|
"rewards/rejected": -0.2998059093952179, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.5119266055045872, |
|
"grad_norm": 0.3610781133174896, |
|
"learning_rate": 2.8942563922278487e-06, |
|
"logits/chosen": -0.6648725867271423, |
|
"logits/rejected": -1.9700089693069458, |
|
"logps/chosen": -283.52105712890625, |
|
"logps/rejected": -224.07928466796875, |
|
"loss": 0.4952, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.10612186789512634, |
|
"rewards/margins": 0.4603241980075836, |
|
"rewards/rejected": -0.3542023003101349, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.526605504587156, |
|
"grad_norm": 0.3653653562068939, |
|
"learning_rate": 2.8519352719971783e-06, |
|
"logits/chosen": -0.7331939935684204, |
|
"logits/rejected": -1.900336503982544, |
|
"logps/chosen": -314.0116271972656, |
|
"logps/rejected": -227.59866333007812, |
|
"loss": 0.5053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11153300106525421, |
|
"rewards/margins": 0.4361909329891205, |
|
"rewards/rejected": -0.32465797662734985, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.5412844036697249, |
|
"grad_norm": 0.36628058552742004, |
|
"learning_rate": 2.8095109992575824e-06, |
|
"logits/chosen": -0.5989956855773926, |
|
"logits/rejected": -1.909377098083496, |
|
"logps/chosen": -323.9544372558594, |
|
"logps/rejected": -227.3318328857422, |
|
"loss": 0.4931, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.13806498050689697, |
|
"rewards/margins": 0.4600377082824707, |
|
"rewards/rejected": -0.3219727873802185, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.5559633027522937, |
|
"grad_norm": 0.36416521668434143, |
|
"learning_rate": 2.7669960085972407e-06, |
|
"logits/chosen": -0.5558376908302307, |
|
"logits/rejected": -2.039389133453369, |
|
"logps/chosen": -349.69097900390625, |
|
"logps/rejected": -247.94447326660156, |
|
"loss": 0.496, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.10622513294219971, |
|
"rewards/margins": 0.45604854822158813, |
|
"rewards/rejected": -0.3498234152793884, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.5706422018348625, |
|
"grad_norm": 0.3883703351020813, |
|
"learning_rate": 2.7244027611938247e-06, |
|
"logits/chosen": -0.4841197729110718, |
|
"logits/rejected": -1.7593891620635986, |
|
"logps/chosen": -248.69354248046875, |
|
"logps/rejected": -248.91748046875, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1012047603726387, |
|
"rewards/margins": 0.4565024971961975, |
|
"rewards/rejected": -0.3552977442741394, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.5853211009174313, |
|
"grad_norm": 0.3964632451534271, |
|
"learning_rate": 2.6817437411621194e-06, |
|
"logits/chosen": -0.5776757597923279, |
|
"logits/rejected": -1.8276855945587158, |
|
"logps/chosen": -341.3336486816406, |
|
"logps/rejected": -265.1352844238281, |
|
"loss": 0.5008, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.09857098758220673, |
|
"rewards/margins": 0.4445044994354248, |
|
"rewards/rejected": -0.3459335267543793, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 0.37495648860931396, |
|
"learning_rate": 2.639031451894923e-06, |
|
"logits/chosen": -0.6398189067840576, |
|
"logits/rejected": -1.6685264110565186, |
|
"logps/chosen": -326.729736328125, |
|
"logps/rejected": -252.25091552734375, |
|
"loss": 0.4828, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11856982856988907, |
|
"rewards/margins": 0.4900485873222351, |
|
"rewards/rejected": -0.371478796005249, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.614678899082569, |
|
"grad_norm": 0.3848208785057068, |
|
"learning_rate": 2.5962784123982843e-06, |
|
"logits/chosen": -0.6401384472846985, |
|
"logits/rejected": -1.92448890209198, |
|
"logps/chosen": -304.608154296875, |
|
"logps/rejected": -231.53329467773438, |
|
"loss": 0.4815, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.08971206098794937, |
|
"rewards/margins": 0.49811363220214844, |
|
"rewards/rejected": -0.4084015488624573, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.6293577981651377, |
|
"grad_norm": 0.3663283884525299, |
|
"learning_rate": 2.5534971536221804e-06, |
|
"logits/chosen": -0.44060736894607544, |
|
"logits/rejected": -1.6944139003753662, |
|
"logps/chosen": -268.5877685546875, |
|
"logps/rejected": -223.1809539794922, |
|
"loss": 0.4908, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.07550681382417679, |
|
"rewards/margins": 0.4683816432952881, |
|
"rewards/rejected": -0.3928748369216919, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.6440366972477065, |
|
"grad_norm": 0.4043430984020233, |
|
"learning_rate": 2.5107002147876814e-06, |
|
"logits/chosen": -0.5844863057136536, |
|
"logits/rejected": -1.6070818901062012, |
|
"logps/chosen": -261.3151550292969, |
|
"logps/rejected": -239.7775421142578, |
|
"loss": 0.4773, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09637773036956787, |
|
"rewards/margins": 0.5067303776741028, |
|
"rewards/rejected": -0.4103526473045349, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.6587155963302753, |
|
"grad_norm": 0.3989478051662445, |
|
"learning_rate": 2.467900139711693e-06, |
|
"logits/chosen": -0.6412825584411621, |
|
"logits/rejected": -1.7481966018676758, |
|
"logps/chosen": -274.21600341796875, |
|
"logps/rejected": -230.42076110839844, |
|
"loss": 0.4953, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.05686591565608978, |
|
"rewards/margins": 0.4652402400970459, |
|
"rewards/rejected": -0.4083743095397949, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.6733944954128441, |
|
"grad_norm": 0.40395867824554443, |
|
"learning_rate": 2.4251094731303586e-06, |
|
"logits/chosen": -0.49992677569389343, |
|
"logits/rejected": -1.8136967420578003, |
|
"logps/chosen": -288.6466979980469, |
|
"logps/rejected": -212.03469848632812, |
|
"loss": 0.478, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11203654110431671, |
|
"rewards/margins": 0.5017505288124084, |
|
"rewards/rejected": -0.38971400260925293, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.688073394495413, |
|
"grad_norm": 0.40771210193634033, |
|
"learning_rate": 2.3823407570221812e-06, |
|
"logits/chosen": -0.4164493978023529, |
|
"logits/rejected": -1.732940912246704, |
|
"logps/chosen": -299.05352783203125, |
|
"logps/rejected": -209.460693359375, |
|
"loss": 0.4758, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09490346908569336, |
|
"rewards/margins": 0.5081382989883423, |
|
"rewards/rejected": -0.4132348895072937, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.7027522935779817, |
|
"grad_norm": 0.43389517068862915, |
|
"learning_rate": 2.3396065269319655e-06, |
|
"logits/chosen": -0.5535441040992737, |
|
"logits/rejected": -1.8134104013442993, |
|
"logps/chosen": -299.3083801269531, |
|
"logps/rejected": -208.33255004882812, |
|
"loss": 0.4719, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0902726948261261, |
|
"rewards/margins": 0.5190740823745728, |
|
"rewards/rejected": -0.42880135774612427, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.7174311926605506, |
|
"grad_norm": 0.424278199672699, |
|
"learning_rate": 2.2969193082966353e-06, |
|
"logits/chosen": -0.41578805446624756, |
|
"logits/rejected": -1.7346811294555664, |
|
"logps/chosen": -285.720703125, |
|
"logps/rejected": -229.79200744628906, |
|
"loss": 0.4571, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.07972488552331924, |
|
"rewards/margins": 0.5599175691604614, |
|
"rewards/rejected": -0.48019272089004517, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.7321100917431194, |
|
"grad_norm": 0.42984944581985474, |
|
"learning_rate": 2.2542916127740194e-06, |
|
"logits/chosen": -0.41974660754203796, |
|
"logits/rejected": -1.4251604080200195, |
|
"logps/chosen": -312.57513427734375, |
|
"logps/rejected": -272.7965393066406, |
|
"loss": 0.4653, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.08319795876741409, |
|
"rewards/margins": 0.5447360873222351, |
|
"rewards/rejected": -0.4615381360054016, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.7467889908256882, |
|
"grad_norm": 0.4242051839828491, |
|
"learning_rate": 2.211735934575674e-06, |
|
"logits/chosen": -0.5058940649032593, |
|
"logits/rejected": -1.8528114557266235, |
|
"logps/chosen": -284.7939453125, |
|
"logps/rejected": -203.8343048095703, |
|
"loss": 0.4837, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.035887014120817184, |
|
"rewards/margins": 0.49387410283088684, |
|
"rewards/rejected": -0.45798707008361816, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.761467889908257, |
|
"grad_norm": 0.4273104667663574, |
|
"learning_rate": 2.1692647468048235e-06, |
|
"logits/chosen": -0.6344825625419617, |
|
"logits/rejected": -1.6885712146759033, |
|
"logps/chosen": -309.73126220703125, |
|
"logps/rejected": -255.3787078857422, |
|
"loss": 0.4323, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0560072623193264, |
|
"rewards/margins": 0.6476345658302307, |
|
"rewards/rejected": -0.5916274189949036, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.7761467889908258, |
|
"grad_norm": 0.4034501612186432, |
|
"learning_rate": 2.126890497800477e-06, |
|
"logits/chosen": -0.6291913986206055, |
|
"logits/rejected": -1.620194673538208, |
|
"logps/chosen": -299.7173767089844, |
|
"logps/rejected": -240.67295837402344, |
|
"loss": 0.4772, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.05461275577545166, |
|
"rewards/margins": 0.514678955078125, |
|
"rewards/rejected": -0.46006619930267334, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.7908256880733946, |
|
"grad_norm": 0.4300025701522827, |
|
"learning_rate": 2.084625607488816e-06, |
|
"logits/chosen": -0.463968962430954, |
|
"logits/rejected": -1.6872165203094482, |
|
"logps/chosen": -276.1863708496094, |
|
"logps/rejected": -232.17691040039062, |
|
"loss": 0.4475, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.07805097103118896, |
|
"rewards/margins": 0.6000757217407227, |
|
"rewards/rejected": -0.5220247507095337, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.8055045871559634, |
|
"grad_norm": 0.45860350131988525, |
|
"learning_rate": 2.0424824637428995e-06, |
|
"logits/chosen": -0.46060910820961, |
|
"logits/rejected": -1.8364752531051636, |
|
"logps/chosen": -269.51629638671875, |
|
"logps/rejected": -215.93104553222656, |
|
"loss": 0.4525, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.05690962076187134, |
|
"rewards/margins": 0.5782269835472107, |
|
"rewards/rejected": -0.5213173627853394, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.8201834862385322, |
|
"grad_norm": 0.42100799083709717, |
|
"learning_rate": 2.0004734187517744e-06, |
|
"logits/chosen": -0.63045334815979, |
|
"logits/rejected": -1.585779070854187, |
|
"logps/chosen": -322.1896667480469, |
|
"logps/rejected": -217.5279083251953, |
|
"loss": 0.4711, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.044022977352142334, |
|
"rewards/margins": 0.5300399661064148, |
|
"rewards/rejected": -0.48601701855659485, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.834862385321101, |
|
"grad_norm": 0.4985113739967346, |
|
"learning_rate": 1.9586107854000327e-06, |
|
"logits/chosen": -0.6236332654953003, |
|
"logits/rejected": -1.8319783210754395, |
|
"logps/chosen": -300.3497619628906, |
|
"logps/rejected": -212.712890625, |
|
"loss": 0.4587, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.043223101645708084, |
|
"rewards/margins": 0.5587003827095032, |
|
"rewards/rejected": -0.5154772996902466, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.8495412844036698, |
|
"grad_norm": 0.4770970940589905, |
|
"learning_rate": 1.916906833658899e-06, |
|
"logits/chosen": -0.4269503355026245, |
|
"logits/rejected": -1.6954264640808105, |
|
"logps/chosen": -327.66046142578125, |
|
"logps/rejected": -265.7694091796875, |
|
"loss": 0.4397, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.047602906823158264, |
|
"rewards/margins": 0.6201290488243103, |
|
"rewards/rejected": -0.5725261569023132, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.8642201834862386, |
|
"grad_norm": 0.5241467952728271, |
|
"learning_rate": 1.8753737869898921e-06, |
|
"logits/chosen": -0.5074346661567688, |
|
"logits/rejected": -1.632070541381836, |
|
"logps/chosen": -253.24539184570312, |
|
"logps/rejected": -237.83157348632812, |
|
"loss": 0.4282, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.025548625737428665, |
|
"rewards/margins": 0.6568491458892822, |
|
"rewards/rejected": -0.6313005089759827, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.8788990825688074, |
|
"grad_norm": 0.4719642102718353, |
|
"learning_rate": 1.8340238187621185e-06, |
|
"logits/chosen": -0.4057133197784424, |
|
"logits/rejected": -1.6097254753112793, |
|
"logps/chosen": -266.84173583984375, |
|
"logps/rejected": -219.50741577148438, |
|
"loss": 0.4631, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.03513256460428238, |
|
"rewards/margins": 0.5480987429618835, |
|
"rewards/rejected": -0.5129661560058594, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.8935779816513763, |
|
"grad_norm": 0.5006693005561829, |
|
"learning_rate": 1.7928690486842438e-06, |
|
"logits/chosen": -0.5508302450180054, |
|
"logits/rejected": -1.654528021812439, |
|
"logps/chosen": -256.12451171875, |
|
"logps/rejected": -206.68545532226562, |
|
"loss": 0.4476, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.05161134898662567, |
|
"rewards/margins": 0.5913605093955994, |
|
"rewards/rejected": -0.5397491455078125, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.908256880733945, |
|
"grad_norm": 0.5244173407554626, |
|
"learning_rate": 1.7519215392522026e-06, |
|
"logits/chosen": -0.4551970958709717, |
|
"logits/rejected": -1.7113275527954102, |
|
"logps/chosen": -287.8810729980469, |
|
"logps/rejected": -219.74664306640625, |
|
"loss": 0.4325, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": 0.029543515294790268, |
|
"rewards/margins": 0.6417171359062195, |
|
"rewards/rejected": -0.6121735572814941, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.9229357798165139, |
|
"grad_norm": 0.4658794105052948, |
|
"learning_rate": 1.7111932922136715e-06, |
|
"logits/chosen": -0.5095583200454712, |
|
"logits/rejected": -1.3981832265853882, |
|
"logps/chosen": -258.70489501953125, |
|
"logps/rejected": -256.4411315917969, |
|
"loss": 0.4334, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01712799444794655, |
|
"rewards/margins": 0.6389345526695251, |
|
"rewards/rejected": -0.6218065619468689, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.9376146788990827, |
|
"grad_norm": 0.5127808451652527, |
|
"learning_rate": 1.6706962450503408e-06, |
|
"logits/chosen": -0.2782766819000244, |
|
"logits/rejected": -1.653388261795044, |
|
"logps/chosen": -287.389892578125, |
|
"logps/rejected": -248.96743774414062, |
|
"loss": 0.4097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.030764762312173843, |
|
"rewards/margins": 0.7133948802947998, |
|
"rewards/rejected": -0.6826301217079163, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.9522935779816515, |
|
"grad_norm": 0.480231910943985, |
|
"learning_rate": 1.630442267479034e-06, |
|
"logits/chosen": -0.27020618319511414, |
|
"logits/rejected": -1.4845508337020874, |
|
"logps/chosen": -270.973388671875, |
|
"logps/rejected": -255.1273193359375, |
|
"loss": 0.4194, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.03783358633518219, |
|
"rewards/margins": 0.683772623538971, |
|
"rewards/rejected": -0.6459389925003052, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.9669724770642203, |
|
"grad_norm": 0.5269174575805664, |
|
"learning_rate": 1.5904431579726837e-06, |
|
"logits/chosen": -0.44177648425102234, |
|
"logits/rejected": -1.6360937356948853, |
|
"logps/chosen": -302.95953369140625, |
|
"logps/rejected": -224.1250762939453, |
|
"loss": 0.4249, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0007203577551990747, |
|
"rewards/margins": 0.6711003184318542, |
|
"rewards/rejected": -0.6703798770904541, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.981651376146789, |
|
"grad_norm": 0.48144176602363586, |
|
"learning_rate": 1.5507106403021897e-06, |
|
"logits/chosen": -0.3459257483482361, |
|
"logits/rejected": -1.630345344543457, |
|
"logps/chosen": -332.8574523925781, |
|
"logps/rejected": -263.53619384765625, |
|
"loss": 0.409, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.062386758625507355, |
|
"rewards/margins": 0.7201792597770691, |
|
"rewards/rejected": -0.6577924489974976, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.996330275229358, |
|
"grad_norm": 0.548345685005188, |
|
"learning_rate": 1.511256360100171e-06, |
|
"logits/chosen": -0.39235666394233704, |
|
"logits/rejected": -1.5491695404052734, |
|
"logps/chosen": -301.23876953125, |
|
"logps/rejected": -251.07362365722656, |
|
"loss": 0.4173, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.01144776027649641, |
|
"rewards/margins": 0.6998730897903442, |
|
"rewards/rejected": -0.6884253621101379, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 2.0110091743119267, |
|
"grad_norm": 0.5223273038864136, |
|
"learning_rate": 1.4720918814476234e-06, |
|
"logits/chosen": -0.5719589591026306, |
|
"logits/rejected": -1.5785191059112549, |
|
"logps/chosen": -262.3441162109375, |
|
"logps/rejected": -245.92208862304688, |
|
"loss": 0.4012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0027564354240894318, |
|
"rewards/margins": 0.7612056732177734, |
|
"rewards/rejected": -0.7584491968154907, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 2.0256880733944955, |
|
"grad_norm": 0.4672469198703766, |
|
"learning_rate": 1.4332286834844792e-06, |
|
"logits/chosen": -0.541429877281189, |
|
"logits/rejected": -1.5906240940093994, |
|
"logps/chosen": -297.6129150390625, |
|
"logps/rejected": -249.35211181640625, |
|
"loss": 0.4338, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.03363136574625969, |
|
"rewards/margins": 0.6569401621818542, |
|
"rewards/rejected": -0.6905715465545654, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 2.0403669724770643, |
|
"grad_norm": 0.5325461030006409, |
|
"learning_rate": 1.3946781570450563e-06, |
|
"logits/chosen": -0.4165026545524597, |
|
"logits/rejected": -1.6033610105514526, |
|
"logps/chosen": -310.6376647949219, |
|
"logps/rejected": -258.42279052734375, |
|
"loss": 0.4167, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.016281984746456146, |
|
"rewards/margins": 0.69865882396698, |
|
"rewards/rejected": -0.6823768615722656, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 2.055045871559633, |
|
"grad_norm": 0.575222373008728, |
|
"learning_rate": 1.3564516013194023e-06, |
|
"logits/chosen": -0.281613826751709, |
|
"logits/rejected": -1.4067842960357666, |
|
"logps/chosen": -275.6092224121094, |
|
"logps/rejected": -254.6931915283203, |
|
"loss": 0.3969, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.013200275599956512, |
|
"rewards/margins": 0.7706252336502075, |
|
"rewards/rejected": -0.7838254570960999, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.069724770642202, |
|
"grad_norm": 0.5203925371170044, |
|
"learning_rate": 1.3185602205414894e-06, |
|
"logits/chosen": -0.44925788044929504, |
|
"logits/rejected": -1.5694738626480103, |
|
"logps/chosen": -278.7287292480469, |
|
"logps/rejected": -236.7759552001953, |
|
"loss": 0.4159, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.00868800189346075, |
|
"rewards/margins": 0.7027506828308105, |
|
"rewards/rejected": -0.7114386558532715, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 2.0844036697247708, |
|
"grad_norm": 0.6152932047843933, |
|
"learning_rate": 1.2810151207052465e-06, |
|
"logits/chosen": -0.43063974380493164, |
|
"logits/rejected": -1.480262041091919, |
|
"logps/chosen": -350.267333984375, |
|
"logps/rejected": -292.267578125, |
|
"loss": 0.4115, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.06882180273532867, |
|
"rewards/margins": 0.7171844244003296, |
|
"rewards/rejected": -0.7860062122344971, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 2.0990825688073396, |
|
"grad_norm": 0.5659114122390747, |
|
"learning_rate": 1.2438273063093811e-06, |
|
"logits/chosen": -0.35572049021720886, |
|
"logits/rejected": -1.484337568283081, |
|
"logps/chosen": -289.94287109375, |
|
"logps/rejected": -233.532958984375, |
|
"loss": 0.4249, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.05071975663304329, |
|
"rewards/margins": 0.6716474890708923, |
|
"rewards/rejected": -0.7223672866821289, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 2.1137614678899084, |
|
"grad_norm": 0.4600561857223511, |
|
"learning_rate": 1.2070076771319536e-06, |
|
"logits/chosen": -0.535216212272644, |
|
"logits/rejected": -1.3858704566955566, |
|
"logps/chosen": -367.2267761230469, |
|
"logps/rejected": -261.8463134765625, |
|
"loss": 0.4411, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.05313180014491081, |
|
"rewards/margins": 0.6290912628173828, |
|
"rewards/rejected": -0.6822231411933899, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 2.128440366972477, |
|
"grad_norm": 0.576239824295044, |
|
"learning_rate": 1.1705670250356417e-06, |
|
"logits/chosen": -0.3410319685935974, |
|
"logits/rejected": -1.4600416421890259, |
|
"logps/chosen": -320.59185791015625, |
|
"logps/rejected": -264.9356689453125, |
|
"loss": 0.3927, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.00027285143733024597, |
|
"rewards/margins": 0.7705951929092407, |
|
"rewards/rejected": -0.770322322845459, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.143119266055046, |
|
"grad_norm": 0.54137122631073, |
|
"learning_rate": 1.1345160308046413e-06, |
|
"logits/chosen": -0.3377179205417633, |
|
"logits/rejected": -1.772587537765503, |
|
"logps/chosen": -398.6950378417969, |
|
"logps/rejected": -283.0660400390625, |
|
"loss": 0.3851, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.06633389741182327, |
|
"rewards/margins": 0.8007892966270447, |
|
"rewards/rejected": -0.8671231269836426, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 2.157798165137615, |
|
"grad_norm": 0.5551949143409729, |
|
"learning_rate": 1.0988652610141154e-06, |
|
"logits/chosen": -0.45835474133491516, |
|
"logits/rejected": -1.3974124193191528, |
|
"logps/chosen": -288.3085632324219, |
|
"logps/rejected": -282.3466491699219, |
|
"loss": 0.4036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.04320283234119415, |
|
"rewards/margins": 0.7347319722175598, |
|
"rewards/rejected": -0.7779347896575928, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 2.1724770642201836, |
|
"grad_norm": 0.6021239757537842, |
|
"learning_rate": 1.063625164933124e-06, |
|
"logits/chosen": -0.3272395730018616, |
|
"logits/rejected": -1.5343236923217773, |
|
"logps/chosen": -342.50823974609375, |
|
"logps/rejected": -292.23687744140625, |
|
"loss": 0.3531, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.02801601216197014, |
|
"rewards/margins": 0.9027827382087708, |
|
"rewards/rejected": -0.9307987093925476, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 2.1871559633027524, |
|
"grad_norm": 0.5641785860061646, |
|
"learning_rate": 1.0288060714619359e-06, |
|
"logits/chosen": -0.5544046759605408, |
|
"logits/rejected": -1.6515761613845825, |
|
"logps/chosen": -329.2125244140625, |
|
"logps/rejected": -242.6419677734375, |
|
"loss": 0.3905, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.032897885888814926, |
|
"rewards/margins": 0.794459879398346, |
|
"rewards/rejected": -0.8273577690124512, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 2.2018348623853212, |
|
"grad_norm": 0.5238489508628845, |
|
"learning_rate": 9.944181861046188e-07, |
|
"logits/chosen": -0.36284640431404114, |
|
"logits/rejected": -1.429746389389038, |
|
"logps/chosen": -352.5851745605469, |
|
"logps/rejected": -284.362060546875, |
|
"loss": 0.3944, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.10142821073532104, |
|
"rewards/margins": 0.8211413621902466, |
|
"rewards/rejected": -0.9225695133209229, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.21651376146789, |
|
"grad_norm": 0.5187690258026123, |
|
"learning_rate": 9.604715879777986e-07, |
|
"logits/chosen": -0.4002646505832672, |
|
"logits/rejected": -1.6734257936477661, |
|
"logps/chosen": -292.17169189453125, |
|
"logps/rejected": -226.01405334472656, |
|
"loss": 0.3973, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.04072728008031845, |
|
"rewards/margins": 0.7619785666465759, |
|
"rewards/rejected": -0.8027058243751526, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 2.231192660550459, |
|
"grad_norm": 0.4873553514480591, |
|
"learning_rate": 9.269762268564616e-07, |
|
"logits/chosen": -0.4840170443058014, |
|
"logits/rejected": -1.6344892978668213, |
|
"logps/chosen": -267.25799560546875, |
|
"logps/rejected": -232.37481689453125, |
|
"loss": 0.4002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.039373546838760376, |
|
"rewards/margins": 0.7559720277786255, |
|
"rewards/rejected": -0.7953456044197083, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 2.2458715596330276, |
|
"grad_norm": 0.5291929841041565, |
|
"learning_rate": 8.939419202576694e-07, |
|
"logits/chosen": -0.3162109851837158, |
|
"logits/rejected": -1.258612036705017, |
|
"logps/chosen": -271.66522216796875, |
|
"logps/rejected": -249.94686889648438, |
|
"loss": 0.4307, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.05568050593137741, |
|
"rewards/margins": 0.6604134440422058, |
|
"rewards/rejected": -0.7160939574241638, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 2.2605504587155965, |
|
"grad_norm": 0.589246928691864, |
|
"learning_rate": 8.61378350563033e-07, |
|
"logits/chosen": -0.4362238645553589, |
|
"logits/rejected": -1.4185831546783447, |
|
"logps/chosen": -263.8197937011719, |
|
"logps/rejected": -271.3200988769531, |
|
"loss": 0.3881, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.05827101320028305, |
|
"rewards/margins": 0.7928776741027832, |
|
"rewards/rejected": -0.8511487245559692, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.2752293577981653, |
|
"grad_norm": 0.5542588233947754, |
|
"learning_rate": 8.292950621808022e-07, |
|
"logits/chosen": -0.4912061393260956, |
|
"logits/rejected": -1.502698540687561, |
|
"logps/chosen": -298.4087829589844, |
|
"logps/rejected": -272.3042297363281, |
|
"loss": 0.3722, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.046152256429195404, |
|
"rewards/margins": 0.8406662344932556, |
|
"rewards/rejected": -0.8868184685707092, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.289908256880734, |
|
"grad_norm": 0.6038070917129517, |
|
"learning_rate": 7.977014587483925e-07, |
|
"logits/chosen": -0.47845709323883057, |
|
"logits/rejected": -1.4373188018798828, |
|
"logps/chosen": -289.3947448730469, |
|
"logps/rejected": -312.0728454589844, |
|
"loss": 0.3736, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.07383082807064056, |
|
"rewards/margins": 0.8470745086669922, |
|
"rewards/rejected": -0.9209052920341492, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 2.304587155963303, |
|
"grad_norm": 0.5219136476516724, |
|
"learning_rate": 7.666068003761684e-07, |
|
"logits/chosen": -0.44936680793762207, |
|
"logits/rejected": -1.5418882369995117, |
|
"logps/chosen": -314.66888427734375, |
|
"logps/rejected": -251.60569763183594, |
|
"loss": 0.3822, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.09824804216623306, |
|
"rewards/margins": 0.8173399567604065, |
|
"rewards/rejected": -0.9155880212783813, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 2.3192660550458717, |
|
"grad_norm": 0.6003636717796326, |
|
"learning_rate": 7.360202009332993e-07, |
|
"logits/chosen": -0.550973653793335, |
|
"logits/rejected": -1.536906361579895, |
|
"logps/chosen": -312.51824951171875, |
|
"logps/rejected": -269.9045715332031, |
|
"loss": 0.3766, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.08020561188459396, |
|
"rewards/margins": 0.8511077165603638, |
|
"rewards/rejected": -0.9313133358955383, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 2.3339449541284405, |
|
"grad_norm": 0.5844477415084839, |
|
"learning_rate": 7.059506253764773e-07, |
|
"logits/chosen": -0.3962401747703552, |
|
"logits/rejected": -1.4225605726242065, |
|
"logps/chosen": -331.3130187988281, |
|
"logps/rejected": -285.5632629394531, |
|
"loss": 0.3588, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.09098455309867859, |
|
"rewards/margins": 0.9076490998268127, |
|
"rewards/rejected": -0.998633623123169, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 2.3486238532110093, |
|
"grad_norm": 0.6208101511001587, |
|
"learning_rate": 6.764068871222825e-07, |
|
"logits/chosen": -0.20838147401809692, |
|
"logits/rejected": -1.3723992109298706, |
|
"logps/chosen": -301.42413330078125, |
|
"logps/rejected": -268.25189208984375, |
|
"loss": 0.3804, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.05734700709581375, |
|
"rewards/margins": 0.8165179491043091, |
|
"rewards/rejected": -0.8738648295402527, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.363302752293578, |
|
"grad_norm": 0.5378314256668091, |
|
"learning_rate": 6.473976454639608e-07, |
|
"logits/chosen": -0.42506375908851624, |
|
"logits/rejected": -1.539266586303711, |
|
"logps/chosen": -309.27655029296875, |
|
"logps/rejected": -254.3873291015625, |
|
"loss": 0.3688, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.06409917771816254, |
|
"rewards/margins": 0.875389039516449, |
|
"rewards/rejected": -0.9394882917404175, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.377981651376147, |
|
"grad_norm": 0.589598536491394, |
|
"learning_rate": 6.189314030333796e-07, |
|
"logits/chosen": -0.27296969294548035, |
|
"logits/rejected": -1.2785528898239136, |
|
"logps/chosen": -298.090087890625, |
|
"logps/rejected": -310.42254638671875, |
|
"loss": 0.3605, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.09839644283056259, |
|
"rewards/margins": 0.8959672451019287, |
|
"rewards/rejected": -0.9943636655807495, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 2.3926605504587157, |
|
"grad_norm": 0.5901959538459778, |
|
"learning_rate": 5.910165033089e-07, |
|
"logits/chosen": -0.3646131157875061, |
|
"logits/rejected": -1.4721994400024414, |
|
"logps/chosen": -334.2178955078125, |
|
"logps/rejected": -286.6039123535156, |
|
"loss": 0.3747, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.08981014043092728, |
|
"rewards/margins": 0.8417661190032959, |
|
"rewards/rejected": -0.9315763711929321, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 2.4073394495412845, |
|
"grad_norm": 0.5413662195205688, |
|
"learning_rate": 5.636611281698956e-07, |
|
"logits/chosen": -0.4042501449584961, |
|
"logits/rejected": -1.3470209836959839, |
|
"logps/chosen": -279.89361572265625, |
|
"logps/rejected": -267.4847717285156, |
|
"loss": 0.3875, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.09789961576461792, |
|
"rewards/margins": 0.8031109571456909, |
|
"rewards/rejected": -0.9010105133056641, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 2.4220183486238533, |
|
"grad_norm": 0.5607224702835083, |
|
"learning_rate": 5.368732954986389e-07, |
|
"logits/chosen": -0.5203760266304016, |
|
"logits/rejected": -1.5128971338272095, |
|
"logps/chosen": -297.7373352050781, |
|
"logps/rejected": -281.8901672363281, |
|
"loss": 0.3811, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.10567890852689743, |
|
"rewards/margins": 0.8281629681587219, |
|
"rewards/rejected": -0.9338418245315552, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.436697247706422, |
|
"grad_norm": 0.5843138098716736, |
|
"learning_rate": 5.106608568302504e-07, |
|
"logits/chosen": -0.5107908248901367, |
|
"logits/rejected": -1.3955906629562378, |
|
"logps/chosen": -279.21630859375, |
|
"logps/rejected": -292.3779296875, |
|
"loss": 0.3586, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.13425037264823914, |
|
"rewards/margins": 0.9213627576828003, |
|
"rewards/rejected": -1.0556131601333618, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 2.451376146788991, |
|
"grad_norm": 0.5397231578826904, |
|
"learning_rate": 4.850314950514124e-07, |
|
"logits/chosen": -0.36430057883262634, |
|
"logits/rejected": -1.3340245485305786, |
|
"logps/chosen": -299.36865234375, |
|
"logps/rejected": -284.7265319824219, |
|
"loss": 0.361, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.09645633399486542, |
|
"rewards/margins": 0.9097772836685181, |
|
"rewards/rejected": -1.0062335729599, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 2.4660550458715598, |
|
"grad_norm": 0.5240079760551453, |
|
"learning_rate": 4.599927221485034e-07, |
|
"logits/chosen": -0.32706713676452637, |
|
"logits/rejected": -1.453523874282837, |
|
"logps/chosen": -295.8548278808594, |
|
"logps/rejected": -258.05938720703125, |
|
"loss": 0.383, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.10441353917121887, |
|
"rewards/margins": 0.8487686514854431, |
|
"rewards/rejected": -0.9531821608543396, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.4807339449541286, |
|
"grad_norm": 0.5769848823547363, |
|
"learning_rate": 4.3555187700583175e-07, |
|
"logits/chosen": -0.4174026846885681, |
|
"logits/rejected": -1.4489986896514893, |
|
"logps/chosen": -282.514892578125, |
|
"logps/rejected": -281.2804260253906, |
|
"loss": 0.3509, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.09293614327907562, |
|
"rewards/margins": 0.9244270324707031, |
|
"rewards/rejected": -1.01736319065094, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 2.4954128440366974, |
|
"grad_norm": 0.5703207850456238, |
|
"learning_rate": 4.1171612325460244e-07, |
|
"logits/chosen": -0.38725194334983826, |
|
"logits/rejected": -1.357424020767212, |
|
"logps/chosen": -300.54241943359375, |
|
"logps/rejected": -271.0777282714844, |
|
"loss": 0.3873, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.14016719162464142, |
|
"rewards/margins": 0.8042227029800415, |
|
"rewards/rejected": -0.9443899393081665, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.510091743119266, |
|
"grad_norm": 0.5701383352279663, |
|
"learning_rate": 3.8849244717325206e-07, |
|
"logits/chosen": -0.31784820556640625, |
|
"logits/rejected": -1.2639849185943604, |
|
"logps/chosen": -284.1618957519531, |
|
"logps/rejected": -297.38214111328125, |
|
"loss": 0.3455, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.06797848641872406, |
|
"rewards/margins": 0.9692531228065491, |
|
"rewards/rejected": -1.037231683731079, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 2.524770642201835, |
|
"grad_norm": 0.5302276611328125, |
|
"learning_rate": 3.658876556397628e-07, |
|
"logits/chosen": -0.5857473015785217, |
|
"logits/rejected": -1.5368033647537231, |
|
"logps/chosen": -277.2721252441406, |
|
"logps/rejected": -261.3997802734375, |
|
"loss": 0.379, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.13553106784820557, |
|
"rewards/margins": 0.8463405966758728, |
|
"rewards/rejected": -0.9818716049194336, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 2.539449541284404, |
|
"grad_norm": 0.5698922276496887, |
|
"learning_rate": 3.4390837413656256e-07, |
|
"logits/chosen": -0.46655187010765076, |
|
"logits/rejected": -1.444958209991455, |
|
"logps/chosen": -296.5066833496094, |
|
"logps/rejected": -297.473388671875, |
|
"loss": 0.3594, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.09718722105026245, |
|
"rewards/margins": 0.9200912714004517, |
|
"rewards/rejected": -1.0172785520553589, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 2.5541284403669726, |
|
"grad_norm": 0.5573806166648865, |
|
"learning_rate": 3.225610448085903e-07, |
|
"logits/chosen": -0.4520176649093628, |
|
"logits/rejected": -1.3598614931106567, |
|
"logps/chosen": -291.6710205078125, |
|
"logps/rejected": -278.5926818847656, |
|
"loss": 0.3636, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.1283694952726364, |
|
"rewards/margins": 0.9049345254898071, |
|
"rewards/rejected": -1.03330397605896, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 2.5688073394495414, |
|
"grad_norm": 0.5719032287597656, |
|
"learning_rate": 3.018519245750989e-07, |
|
"logits/chosen": -0.40388399362564087, |
|
"logits/rejected": -1.2858245372772217, |
|
"logps/chosen": -343.05096435546875, |
|
"logps/rejected": -316.9675598144531, |
|
"loss": 0.366, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.13811713457107544, |
|
"rewards/margins": 0.8857739567756653, |
|
"rewards/rejected": -1.0238910913467407, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.5834862385321102, |
|
"grad_norm": 0.5600932836532593, |
|
"learning_rate": 2.817870832957459e-07, |
|
"logits/chosen": -0.3744252026081085, |
|
"logits/rejected": -1.35723876953125, |
|
"logps/chosen": -280.0473937988281, |
|
"logps/rejected": -272.26171875, |
|
"loss": 0.3691, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.12319761514663696, |
|
"rewards/margins": 0.8849235773086548, |
|
"rewards/rejected": -1.008121132850647, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 2.598165137614679, |
|
"grad_norm": 0.546535849571228, |
|
"learning_rate": 2.6237240199151386e-07, |
|
"logits/chosen": -0.4391903281211853, |
|
"logits/rejected": -1.4896560907363892, |
|
"logps/chosen": -285.84893798828125, |
|
"logps/rejected": -258.9007568359375, |
|
"loss": 0.3916, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.1297137588262558, |
|
"rewards/margins": 0.8063170313835144, |
|
"rewards/rejected": -0.9360308647155762, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 2.612844036697248, |
|
"grad_norm": 0.5673303604125977, |
|
"learning_rate": 2.436135711209786e-07, |
|
"logits/chosen": -0.6936957240104675, |
|
"logits/rejected": -1.6270612478256226, |
|
"logps/chosen": -302.1131896972656, |
|
"logps/rejected": -250.82293701171875, |
|
"loss": 0.3934, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.14778339862823486, |
|
"rewards/margins": 0.7892367839813232, |
|
"rewards/rejected": -0.9370202422142029, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 2.6275229357798167, |
|
"grad_norm": 0.6546534895896912, |
|
"learning_rate": 2.2551608891243026e-07, |
|
"logits/chosen": -0.5974135398864746, |
|
"logits/rejected": -1.4793545007705688, |
|
"logps/chosen": -379.8813781738281, |
|
"logps/rejected": -303.40411376953125, |
|
"loss": 0.3921, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.19369718432426453, |
|
"rewards/margins": 0.7835862040519714, |
|
"rewards/rejected": -0.9772832989692688, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 2.6422018348623855, |
|
"grad_norm": 0.6076889634132385, |
|
"learning_rate": 2.0808525975233807e-07, |
|
"logits/chosen": -0.2836154103279114, |
|
"logits/rejected": -1.2979185581207275, |
|
"logps/chosen": -305.5050048828125, |
|
"logps/rejected": -292.86859130859375, |
|
"loss": 0.373, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.152679443359375, |
|
"rewards/margins": 0.8530596494674683, |
|
"rewards/rejected": -1.0057389736175537, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.6568807339449543, |
|
"grad_norm": 0.555210530757904, |
|
"learning_rate": 1.9132619263063144e-07, |
|
"logits/chosen": -0.300199031829834, |
|
"logits/rejected": -1.4027831554412842, |
|
"logps/chosen": -365.9290771484375, |
|
"logps/rejected": -310.5292053222656, |
|
"loss": 0.3499, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.10394760221242905, |
|
"rewards/margins": 0.9682717323303223, |
|
"rewards/rejected": -1.0722193717956543, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 2.671559633027523, |
|
"grad_norm": 0.5635836720466614, |
|
"learning_rate": 1.7524379964325155e-07, |
|
"logits/chosen": -0.4411580562591553, |
|
"logits/rejected": -1.4416756629943848, |
|
"logps/chosen": -352.4068603515625, |
|
"logps/rejected": -300.6695556640625, |
|
"loss": 0.3701, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.1655517965555191, |
|
"rewards/margins": 0.889607310295105, |
|
"rewards/rejected": -1.0551592111587524, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.686238532110092, |
|
"grad_norm": 0.5780466198921204, |
|
"learning_rate": 1.5984279455240975e-07, |
|
"logits/chosen": -0.43781769275665283, |
|
"logits/rejected": -1.4340150356292725, |
|
"logps/chosen": -301.8743896484375, |
|
"logps/rejected": -287.69879150390625, |
|
"loss": 0.3558, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.11059735715389252, |
|
"rewards/margins": 0.9329235553741455, |
|
"rewards/rejected": -1.0435209274291992, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 2.7009174311926607, |
|
"grad_norm": 0.5682902932167053, |
|
"learning_rate": 1.451276914049818e-07, |
|
"logits/chosen": -0.4304981231689453, |
|
"logits/rejected": -1.4089939594268799, |
|
"logps/chosen": -277.64056396484375, |
|
"logps/rejected": -270.2991027832031, |
|
"loss": 0.365, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.13452741503715515, |
|
"rewards/margins": 0.877845048904419, |
|
"rewards/rejected": -1.0123724937438965, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 2.7155963302752295, |
|
"grad_norm": 0.6522338390350342, |
|
"learning_rate": 1.3110280320943692e-07, |
|
"logits/chosen": -0.39882513880729675, |
|
"logits/rejected": -1.5327268838882446, |
|
"logps/chosen": -292.3040771484375, |
|
"logps/rejected": -268.6988830566406, |
|
"loss": 0.3549, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.12823128700256348, |
|
"rewards/margins": 0.9290120005607605, |
|
"rewards/rejected": -1.0572432279586792, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.7302752293577983, |
|
"grad_norm": 0.5501285195350647, |
|
"learning_rate": 1.1777224067169218e-07, |
|
"logits/chosen": -0.3276221454143524, |
|
"logits/rejected": -1.3283724784851074, |
|
"logps/chosen": -297.44525146484375, |
|
"logps/rejected": -289.14410400390625, |
|
"loss": 0.3445, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.09726225584745407, |
|
"rewards/margins": 0.9656534194946289, |
|
"rewards/rejected": -1.0629156827926636, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 2.744954128440367, |
|
"grad_norm": 0.544796884059906, |
|
"learning_rate": 1.0513991099025872e-07, |
|
"logits/chosen": -0.47547709941864014, |
|
"logits/rejected": -1.439399242401123, |
|
"logps/chosen": -344.34466552734375, |
|
"logps/rejected": -279.008544921875, |
|
"loss": 0.3888, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.12739048898220062, |
|
"rewards/margins": 0.8009309768676758, |
|
"rewards/rejected": -0.9283214807510376, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 2.759633027522936, |
|
"grad_norm": 0.5676019787788391, |
|
"learning_rate": 9.320951671104194e-08, |
|
"logits/chosen": -0.3190627694129944, |
|
"logits/rejected": -1.4392387866973877, |
|
"logps/chosen": -331.2420654296875, |
|
"logps/rejected": -280.6221923828125, |
|
"loss": 0.3629, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.07452848553657532, |
|
"rewards/margins": 0.8963598608970642, |
|
"rewards/rejected": -0.9708882570266724, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 2.7743119266055047, |
|
"grad_norm": 0.5123610496520996, |
|
"learning_rate": 8.198455464212108e-08, |
|
"logits/chosen": -0.49384158849716187, |
|
"logits/rejected": -1.3851827383041382, |
|
"logps/chosen": -312.5827331542969, |
|
"logps/rejected": -271.12469482421875, |
|
"loss": 0.3533, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.10122832655906677, |
|
"rewards/margins": 0.9414404630661011, |
|
"rewards/rejected": -1.0426688194274902, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.7889908256880735, |
|
"grad_norm": 0.5507712364196777, |
|
"learning_rate": 7.146831482883115e-08, |
|
"logits/chosen": -0.22636638581752777, |
|
"logits/rejected": -1.4128222465515137, |
|
"logps/chosen": -318.0820007324219, |
|
"logps/rejected": -273.08013916015625, |
|
"loss": 0.34, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.11986364424228668, |
|
"rewards/margins": 0.9853267669677734, |
|
"rewards/rejected": -1.1051905155181885, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.8036697247706424, |
|
"grad_norm": 0.5077707171440125, |
|
"learning_rate": 6.16638795894492e-08, |
|
"logits/chosen": -0.343766987323761, |
|
"logits/rejected": -1.3005136251449585, |
|
"logps/chosen": -281.5782775878906, |
|
"logps/rejected": -294.2169189453125, |
|
"loss": 0.3668, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.11225134879350662, |
|
"rewards/margins": 0.9098389148712158, |
|
"rewards/rejected": -1.022090196609497, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 2.818348623853211, |
|
"grad_norm": 0.609289288520813, |
|
"learning_rate": 5.257412261176375e-08, |
|
"logits/chosen": -0.5314547419548035, |
|
"logits/rejected": -1.3832324743270874, |
|
"logps/chosen": -294.449951171875, |
|
"logps/rejected": -284.65289306640625, |
|
"loss": 0.3605, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1251775324344635, |
|
"rewards/margins": 0.8922191858291626, |
|
"rewards/rejected": -1.0173966884613037, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.83302752293578, |
|
"grad_norm": 0.5588046312332153, |
|
"learning_rate": 4.4201708110795384e-08, |
|
"logits/chosen": -0.4476713240146637, |
|
"logits/rejected": -1.302433967590332, |
|
"logps/chosen": -314.71429443359375, |
|
"logps/rejected": -296.807373046875, |
|
"loss": 0.3729, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.1394634246826172, |
|
"rewards/margins": 0.8604406714439392, |
|
"rewards/rejected": -0.9999040365219116, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 2.847706422018349, |
|
"grad_norm": 0.6206101179122925, |
|
"learning_rate": 3.654909004791152e-08, |
|
"logits/chosen": -0.3860604763031006, |
|
"logits/rejected": -1.4739010334014893, |
|
"logps/chosen": -312.785400390625, |
|
"logps/rejected": -281.09820556640625, |
|
"loss": 0.3548, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.11538831889629364, |
|
"rewards/margins": 0.9405367374420166, |
|
"rewards/rejected": -1.0559251308441162, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 2.8623853211009176, |
|
"grad_norm": 0.6180599331855774, |
|
"learning_rate": 2.9618511411570462e-08, |
|
"logits/chosen": -0.4637264013290405, |
|
"logits/rejected": -1.3796968460083008, |
|
"logps/chosen": -310.7637023925781, |
|
"logps/rejected": -268.624755859375, |
|
"loss": 0.3783, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.19053927063941956, |
|
"rewards/margins": 0.8588754534721375, |
|
"rewards/rejected": -1.0494147539138794, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.8770642201834864, |
|
"grad_norm": 0.5578924417495728, |
|
"learning_rate": 2.3412003559898088e-08, |
|
"logits/chosen": -0.3917969763278961, |
|
"logits/rejected": -1.2537633180618286, |
|
"logps/chosen": -292.4007568359375, |
|
"logps/rejected": -298.84539794921875, |
|
"loss": 0.3728, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.1294248104095459, |
|
"rewards/margins": 0.8558358550071716, |
|
"rewards/rejected": -0.9852606654167175, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 2.891743119266055, |
|
"grad_norm": 0.5721162557601929, |
|
"learning_rate": 1.793138562529634e-08, |
|
"logits/chosen": -0.3834454417228699, |
|
"logits/rejected": -1.4521803855895996, |
|
"logps/chosen": -366.2247009277344, |
|
"logps/rejected": -271.80657958984375, |
|
"loss": 0.375, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.10283366590738297, |
|
"rewards/margins": 0.8488227128982544, |
|
"rewards/rejected": -0.9516564011573792, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 2.906422018348624, |
|
"grad_norm": 0.5580772757530212, |
|
"learning_rate": 1.317826398125277e-08, |
|
"logits/chosen": -0.4612494707107544, |
|
"logits/rejected": -1.4557621479034424, |
|
"logps/chosen": -314.24786376953125, |
|
"logps/rejected": -307.6085205078125, |
|
"loss": 0.3385, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.12358563393354416, |
|
"rewards/margins": 1.0029027462005615, |
|
"rewards/rejected": -1.1264883279800415, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 2.921100917431193, |
|
"grad_norm": 0.6028111577033997, |
|
"learning_rate": 9.15403177151275e-09, |
|
"logits/chosen": -0.41042178869247437, |
|
"logits/rejected": -1.234325647354126, |
|
"logps/chosen": -296.40911865234375, |
|
"logps/rejected": -316.9093322753906, |
|
"loss": 0.3416, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.11352317035198212, |
|
"rewards/margins": 0.9606138467788696, |
|
"rewards/rejected": -1.0741369724273682, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 2.9357798165137616, |
|
"grad_norm": 0.5219904780387878, |
|
"learning_rate": 5.85986850174608e-09, |
|
"logits/chosen": -0.33844003081321716, |
|
"logits/rejected": -1.5768249034881592, |
|
"logps/chosen": -332.9895935058594, |
|
"logps/rejected": -285.1691589355469, |
|
"loss": 0.3473, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.11483018100261688, |
|
"rewards/margins": 0.9699925780296326, |
|
"rewards/rejected": -1.0848227739334106, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.9504587155963304, |
|
"grad_norm": 0.5602099299430847, |
|
"learning_rate": 3.296739693834927e-09, |
|
"logits/chosen": -0.6037732362747192, |
|
"logits/rejected": -1.444136142730713, |
|
"logps/chosen": -334.375732421875, |
|
"logps/rejected": -274.39801025390625, |
|
"loss": 0.4044, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.21680763363838196, |
|
"rewards/margins": 0.7622767686843872, |
|
"rewards/rejected": -0.979084312915802, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 2.9651376146788992, |
|
"grad_norm": 0.6433922052383423, |
|
"learning_rate": 1.4653966028774225e-09, |
|
"logits/chosen": -0.3486376702785492, |
|
"logits/rejected": -1.3044434785842896, |
|
"logps/chosen": -336.37518310546875, |
|
"logps/rejected": -319.5438537597656, |
|
"loss": 0.3315, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.14535093307495117, |
|
"rewards/margins": 1.0080125331878662, |
|
"rewards/rejected": -1.1533633470535278, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 2.979816513761468, |
|
"grad_norm": 0.5690057277679443, |
|
"learning_rate": 3.6637599699351766e-10, |
|
"logits/chosen": -0.4578551948070526, |
|
"logits/rejected": -1.5770193338394165, |
|
"logps/chosen": -310.1904296875, |
|
"logps/rejected": -271.07672119140625, |
|
"loss": 0.3713, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.12008870393037796, |
|
"rewards/margins": 0.8647584915161133, |
|
"rewards/rejected": -0.9848471879959106, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.994495412844037, |
|
"grad_norm": 0.624344527721405, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.488542377948761, |
|
"logits/rejected": -1.3220264911651611, |
|
"logps/chosen": -341.4158630371094, |
|
"logps/rejected": -309.9995422363281, |
|
"loss": 0.355, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.12878134846687317, |
|
"rewards/margins": 0.9226263761520386, |
|
"rewards/rejected": -1.0514076948165894, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 2.994495412844037, |
|
"step": 408, |
|
"total_flos": 7.837376281021809e+17, |
|
"train_loss": 0.514469311225648, |
|
"train_runtime": 8073.8639, |
|
"train_samples_per_second": 1.619, |
|
"train_steps_per_second": 0.051 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 408, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.837376281021809e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|