|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 1563, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 42.15456008911133, |
|
"kl": 0.017818570137023926, |
|
"learning_rate": 1.592356687898089e-08, |
|
"logps/chosen": -285.75128173828125, |
|
"logps/rejected": -254.7062530517578, |
|
"loss": 0.4999, |
|
"rewards/chosen": 0.004669209010899067, |
|
"rewards/margins": 0.0025329389609396458, |
|
"rewards/rejected": 0.0021362705156207085, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 44.65619659423828, |
|
"kl": 0.05461766570806503, |
|
"learning_rate": 3.184713375796178e-08, |
|
"logps/chosen": -286.11944580078125, |
|
"logps/rejected": -276.1832580566406, |
|
"loss": 0.496, |
|
"rewards/chosen": 0.03117586299777031, |
|
"rewards/margins": 0.033454541116952896, |
|
"rewards/rejected": -0.00227867579087615, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 39.61723327636719, |
|
"kl": 0.44255906343460083, |
|
"learning_rate": 4.777070063694268e-08, |
|
"logps/chosen": -269.203125, |
|
"logps/rejected": -260.71966552734375, |
|
"loss": 0.4849, |
|
"rewards/chosen": 0.13460782170295715, |
|
"rewards/margins": 0.11511580646038055, |
|
"rewards/rejected": 0.0194920115172863, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 36.42387771606445, |
|
"kl": 0.871076226234436, |
|
"learning_rate": 6.369426751592356e-08, |
|
"logps/chosen": -244.55203247070312, |
|
"logps/rejected": -259.15496826171875, |
|
"loss": 0.4573, |
|
"rewards/chosen": 0.3324822187423706, |
|
"rewards/margins": 0.3418889045715332, |
|
"rewards/rejected": -0.00940666627138853, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 35.794044494628906, |
|
"kl": 0.3753414750099182, |
|
"learning_rate": 7.961783439490445e-08, |
|
"logps/chosen": -261.01800537109375, |
|
"logps/rejected": -271.74786376953125, |
|
"loss": 0.4164, |
|
"rewards/chosen": 0.48356980085372925, |
|
"rewards/margins": 0.7098164558410645, |
|
"rewards/rejected": -0.22624659538269043, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 34.50333786010742, |
|
"kl": 0.0, |
|
"learning_rate": 9.554140127388536e-08, |
|
"logps/chosen": -306.0127868652344, |
|
"logps/rejected": -244.1356658935547, |
|
"loss": 0.3649, |
|
"rewards/chosen": 0.746610164642334, |
|
"rewards/margins": 1.2027219533920288, |
|
"rewards/rejected": -0.4561118483543396, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 30.50051498413086, |
|
"kl": 0.0, |
|
"learning_rate": 1.1146496815286624e-07, |
|
"logps/chosen": -280.65692138671875, |
|
"logps/rejected": -262.2502136230469, |
|
"loss": 0.3171, |
|
"rewards/chosen": 0.774597704410553, |
|
"rewards/margins": 1.728833794593811, |
|
"rewards/rejected": -0.9542360305786133, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 29.806577682495117, |
|
"kl": 0.0, |
|
"learning_rate": 1.2738853503184713e-07, |
|
"logps/chosen": -276.9497375488281, |
|
"logps/rejected": -259.6419982910156, |
|
"loss": 0.2884, |
|
"rewards/chosen": 0.6780191659927368, |
|
"rewards/margins": 2.165379047393799, |
|
"rewards/rejected": -1.4873597621917725, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 22.508939743041992, |
|
"kl": 0.0, |
|
"learning_rate": 1.43312101910828e-07, |
|
"logps/chosen": -253.7576141357422, |
|
"logps/rejected": -268.8052062988281, |
|
"loss": 0.2596, |
|
"rewards/chosen": 1.205094575881958, |
|
"rewards/margins": 2.685385227203369, |
|
"rewards/rejected": -1.4802907705307007, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 29.29058074951172, |
|
"kl": 0.0, |
|
"learning_rate": 1.592356687898089e-07, |
|
"logps/chosen": -284.4584045410156, |
|
"logps/rejected": -254.226806640625, |
|
"loss": 0.2614, |
|
"rewards/chosen": 1.0118796825408936, |
|
"rewards/margins": 2.8269615173339844, |
|
"rewards/rejected": -1.8150818347930908, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 29.25771141052246, |
|
"kl": 0.0, |
|
"learning_rate": 1.7515923566878978e-07, |
|
"logps/chosen": -266.92254638671875, |
|
"logps/rejected": -260.49261474609375, |
|
"loss": 0.233, |
|
"rewards/chosen": 1.16305673122406, |
|
"rewards/margins": 3.4210219383239746, |
|
"rewards/rejected": -2.257965326309204, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 20.642614364624023, |
|
"kl": 0.0, |
|
"learning_rate": 1.9108280254777072e-07, |
|
"logps/chosen": -231.07168579101562, |
|
"logps/rejected": -258.5835876464844, |
|
"loss": 0.241, |
|
"rewards/chosen": 1.3277983665466309, |
|
"rewards/margins": 3.532343626022339, |
|
"rewards/rejected": -2.204545497894287, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 23.786853790283203, |
|
"kl": 0.0, |
|
"learning_rate": 2.070063694267516e-07, |
|
"logps/chosen": -251.5561981201172, |
|
"logps/rejected": -267.85986328125, |
|
"loss": 0.2376, |
|
"rewards/chosen": 1.2313438653945923, |
|
"rewards/margins": 3.837125062942505, |
|
"rewards/rejected": -2.605781316757202, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 24.48412322998047, |
|
"kl": 0.0, |
|
"learning_rate": 2.2292993630573247e-07, |
|
"logps/chosen": -244.3961639404297, |
|
"logps/rejected": -248.7190704345703, |
|
"loss": 0.2304, |
|
"rewards/chosen": 1.338683843612671, |
|
"rewards/margins": 3.6262855529785156, |
|
"rewards/rejected": -2.287601947784424, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 16.654287338256836, |
|
"kl": 0.0, |
|
"learning_rate": 2.388535031847134e-07, |
|
"logps/chosen": -261.95819091796875, |
|
"logps/rejected": -291.40020751953125, |
|
"loss": 0.2003, |
|
"rewards/chosen": 1.3882195949554443, |
|
"rewards/margins": 4.626662254333496, |
|
"rewards/rejected": -3.238443374633789, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 30.89764976501465, |
|
"kl": 0.0, |
|
"learning_rate": 2.5477707006369425e-07, |
|
"logps/chosen": -267.4027099609375, |
|
"logps/rejected": -271.3486328125, |
|
"loss": 0.1985, |
|
"rewards/chosen": 1.4253151416778564, |
|
"rewards/margins": 4.816943645477295, |
|
"rewards/rejected": -3.3916287422180176, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 55.57185363769531, |
|
"kl": 0.0, |
|
"learning_rate": 2.7070063694267513e-07, |
|
"logps/chosen": -289.416259765625, |
|
"logps/rejected": -282.303955078125, |
|
"loss": 0.2075, |
|
"rewards/chosen": 1.308215856552124, |
|
"rewards/margins": 5.0450119972229, |
|
"rewards/rejected": -3.7367959022521973, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 34.31930923461914, |
|
"kl": 0.0, |
|
"learning_rate": 2.86624203821656e-07, |
|
"logps/chosen": -255.4053192138672, |
|
"logps/rejected": -293.8216247558594, |
|
"loss": 0.1877, |
|
"rewards/chosen": 1.560572862625122, |
|
"rewards/margins": 5.238338470458984, |
|
"rewards/rejected": -3.6777656078338623, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 22.21489143371582, |
|
"kl": 0.0, |
|
"learning_rate": 3.0254777070063694e-07, |
|
"logps/chosen": -286.2869873046875, |
|
"logps/rejected": -271.2530212402344, |
|
"loss": 0.1985, |
|
"rewards/chosen": 1.478058099746704, |
|
"rewards/margins": 5.051484107971191, |
|
"rewards/rejected": -3.5734260082244873, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 29.9346981048584, |
|
"kl": 0.0, |
|
"learning_rate": 3.184713375796178e-07, |
|
"logps/chosen": -245.080810546875, |
|
"logps/rejected": -287.15435791015625, |
|
"loss": 0.1656, |
|
"rewards/chosen": 1.4543142318725586, |
|
"rewards/margins": 5.874560356140137, |
|
"rewards/rejected": -4.420246124267578, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 22.431930541992188, |
|
"kl": 0.0, |
|
"learning_rate": 3.343949044585987e-07, |
|
"logps/chosen": -259.4337463378906, |
|
"logps/rejected": -309.25164794921875, |
|
"loss": 0.1767, |
|
"rewards/chosen": 1.4657680988311768, |
|
"rewards/margins": 5.859877109527588, |
|
"rewards/rejected": -4.39410924911499, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 36.517799377441406, |
|
"kl": 0.0, |
|
"learning_rate": 3.5031847133757957e-07, |
|
"logps/chosen": -264.08306884765625, |
|
"logps/rejected": -295.64788818359375, |
|
"loss": 0.1599, |
|
"rewards/chosen": 1.291338562965393, |
|
"rewards/margins": 5.8188042640686035, |
|
"rewards/rejected": -4.5274658203125, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 25.18623924255371, |
|
"kl": 0.0, |
|
"learning_rate": 3.6624203821656045e-07, |
|
"logps/chosen": -268.96527099609375, |
|
"logps/rejected": -278.4827575683594, |
|
"loss": 0.1735, |
|
"rewards/chosen": 1.4270073175430298, |
|
"rewards/margins": 5.516595363616943, |
|
"rewards/rejected": -4.0895891189575195, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 24.899450302124023, |
|
"kl": 0.0, |
|
"learning_rate": 3.8216560509554143e-07, |
|
"logps/chosen": -257.46807861328125, |
|
"logps/rejected": -306.6875305175781, |
|
"loss": 0.1675, |
|
"rewards/chosen": 1.363029956817627, |
|
"rewards/margins": 5.9437384605407715, |
|
"rewards/rejected": -4.580708980560303, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 18.1636962890625, |
|
"kl": 0.0, |
|
"learning_rate": 3.980891719745223e-07, |
|
"logps/chosen": -274.8599548339844, |
|
"logps/rejected": -316.78057861328125, |
|
"loss": 0.1509, |
|
"rewards/chosen": 1.4044160842895508, |
|
"rewards/margins": 6.668715000152588, |
|
"rewards/rejected": -5.264299392700195, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 53.83689498901367, |
|
"kl": 0.0, |
|
"learning_rate": 4.140127388535032e-07, |
|
"logps/chosen": -316.65460205078125, |
|
"logps/rejected": -318.2517395019531, |
|
"loss": 0.1667, |
|
"rewards/chosen": 1.331627368927002, |
|
"rewards/margins": 6.6092848777771, |
|
"rewards/rejected": -5.277657508850098, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 16.44365692138672, |
|
"kl": 0.0, |
|
"learning_rate": 4.2993630573248406e-07, |
|
"logps/chosen": -242.02651977539062, |
|
"logps/rejected": -283.90106201171875, |
|
"loss": 0.1762, |
|
"rewards/chosen": 1.6127967834472656, |
|
"rewards/margins": 6.091832160949707, |
|
"rewards/rejected": -4.479035377502441, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 19.764270782470703, |
|
"kl": 0.0, |
|
"learning_rate": 4.4585987261146494e-07, |
|
"logps/chosen": -252.82180786132812, |
|
"logps/rejected": -319.73199462890625, |
|
"loss": 0.1468, |
|
"rewards/chosen": 1.5330384969711304, |
|
"rewards/margins": 6.702307224273682, |
|
"rewards/rejected": -5.169268608093262, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 21.560876846313477, |
|
"kl": 0.0, |
|
"learning_rate": 4.6178343949044587e-07, |
|
"logps/chosen": -247.28427124023438, |
|
"logps/rejected": -282.38848876953125, |
|
"loss": 0.1868, |
|
"rewards/chosen": 1.350294828414917, |
|
"rewards/margins": 5.6418867111206055, |
|
"rewards/rejected": -4.291592597961426, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 21.553871154785156, |
|
"kl": 0.0, |
|
"learning_rate": 4.777070063694267e-07, |
|
"logps/chosen": -286.14312744140625, |
|
"logps/rejected": -281.7044372558594, |
|
"loss": 0.168, |
|
"rewards/chosen": 1.5479779243469238, |
|
"rewards/margins": 6.192603588104248, |
|
"rewards/rejected": -4.644625663757324, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 24.488561630249023, |
|
"kl": 0.0, |
|
"learning_rate": 4.936305732484076e-07, |
|
"logps/chosen": -272.4622497558594, |
|
"logps/rejected": -323.91082763671875, |
|
"loss": 0.1648, |
|
"rewards/chosen": 1.3805065155029297, |
|
"rewards/margins": 6.456129550933838, |
|
"rewards/rejected": -5.07562255859375, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 21.614229202270508, |
|
"kl": 0.0, |
|
"learning_rate": 4.989331436699858e-07, |
|
"logps/chosen": -207.8739776611328, |
|
"logps/rejected": -324.73565673828125, |
|
"loss": 0.1435, |
|
"rewards/chosen": 1.465820074081421, |
|
"rewards/margins": 6.953179836273193, |
|
"rewards/rejected": -5.487359046936035, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 20.425729751586914, |
|
"kl": 0.0, |
|
"learning_rate": 4.971550497866287e-07, |
|
"logps/chosen": -287.088623046875, |
|
"logps/rejected": -310.1844787597656, |
|
"loss": 0.1568, |
|
"rewards/chosen": 1.5799640417099, |
|
"rewards/margins": 6.89548397064209, |
|
"rewards/rejected": -5.315520286560059, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 21.86062240600586, |
|
"kl": 0.0, |
|
"learning_rate": 4.953769559032717e-07, |
|
"logps/chosen": -282.91741943359375, |
|
"logps/rejected": -315.76470947265625, |
|
"loss": 0.1516, |
|
"rewards/chosen": 1.442077875137329, |
|
"rewards/margins": 7.074441432952881, |
|
"rewards/rejected": -5.632363796234131, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 20.31084632873535, |
|
"kl": 0.0, |
|
"learning_rate": 4.935988620199146e-07, |
|
"logps/chosen": -274.0014953613281, |
|
"logps/rejected": -310.8323059082031, |
|
"loss": 0.1477, |
|
"rewards/chosen": 1.4551314115524292, |
|
"rewards/margins": 6.99865198135376, |
|
"rewards/rejected": -5.543520450592041, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 20.067346572875977, |
|
"kl": 0.0, |
|
"learning_rate": 4.918207681365576e-07, |
|
"logps/chosen": -274.91705322265625, |
|
"logps/rejected": -330.29315185546875, |
|
"loss": 0.1491, |
|
"rewards/chosen": 1.5545661449432373, |
|
"rewards/margins": 7.674098014831543, |
|
"rewards/rejected": -6.119531631469727, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 26.703292846679688, |
|
"kl": 0.0, |
|
"learning_rate": 4.900426742532006e-07, |
|
"logps/chosen": -267.2247619628906, |
|
"logps/rejected": -308.6265563964844, |
|
"loss": 0.142, |
|
"rewards/chosen": 1.4495770931243896, |
|
"rewards/margins": 7.4614386558532715, |
|
"rewards/rejected": -6.011861324310303, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 22.81442642211914, |
|
"kl": 0.0, |
|
"learning_rate": 4.882645803698435e-07, |
|
"logps/chosen": -243.8881072998047, |
|
"logps/rejected": -318.4960632324219, |
|
"loss": 0.1679, |
|
"rewards/chosen": 1.2335823774337769, |
|
"rewards/margins": 7.564291954040527, |
|
"rewards/rejected": -6.330709934234619, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 17.875883102416992, |
|
"kl": 0.0, |
|
"learning_rate": 4.864864864864865e-07, |
|
"logps/chosen": -276.0986328125, |
|
"logps/rejected": -317.36163330078125, |
|
"loss": 0.1442, |
|
"rewards/chosen": 1.5666391849517822, |
|
"rewards/margins": 7.757128715515137, |
|
"rewards/rejected": -6.190489768981934, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 22.31620216369629, |
|
"kl": 0.0, |
|
"learning_rate": 4.847083926031294e-07, |
|
"logps/chosen": -273.843994140625, |
|
"logps/rejected": -329.43475341796875, |
|
"loss": 0.154, |
|
"rewards/chosen": 1.6012461185455322, |
|
"rewards/margins": 7.519808292388916, |
|
"rewards/rejected": -5.918562412261963, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 19.976451873779297, |
|
"kl": 0.0, |
|
"learning_rate": 4.829302987197724e-07, |
|
"logps/chosen": -284.374755859375, |
|
"logps/rejected": -326.24371337890625, |
|
"loss": 0.1378, |
|
"rewards/chosen": 1.5799314975738525, |
|
"rewards/margins": 7.78595495223999, |
|
"rewards/rejected": -6.206023216247559, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 23.919170379638672, |
|
"kl": 0.0, |
|
"learning_rate": 4.811522048364154e-07, |
|
"logps/chosen": -236.8705291748047, |
|
"logps/rejected": -316.30413818359375, |
|
"loss": 0.1523, |
|
"rewards/chosen": 1.5169861316680908, |
|
"rewards/margins": 7.513286590576172, |
|
"rewards/rejected": -5.99630069732666, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 25.041824340820312, |
|
"kl": 0.0, |
|
"learning_rate": 4.793741109530583e-07, |
|
"logps/chosen": -219.86117553710938, |
|
"logps/rejected": -331.60418701171875, |
|
"loss": 0.1516, |
|
"rewards/chosen": 1.5909864902496338, |
|
"rewards/margins": 7.9639410972595215, |
|
"rewards/rejected": -6.372954845428467, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 25.45568084716797, |
|
"kl": 0.0, |
|
"learning_rate": 4.775960170697012e-07, |
|
"logps/chosen": -271.017333984375, |
|
"logps/rejected": -333.2148132324219, |
|
"loss": 0.1628, |
|
"rewards/chosen": 1.5948388576507568, |
|
"rewards/margins": 7.8658246994018555, |
|
"rewards/rejected": -6.2709856033325195, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 22.259382247924805, |
|
"kl": 0.0, |
|
"learning_rate": 4.7581792318634425e-07, |
|
"logps/chosen": -253.55789184570312, |
|
"logps/rejected": -298.8450622558594, |
|
"loss": 0.1452, |
|
"rewards/chosen": 1.640355110168457, |
|
"rewards/margins": 8.043792724609375, |
|
"rewards/rejected": -6.403438568115234, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 26.135997772216797, |
|
"kl": 0.0, |
|
"learning_rate": 4.7403982930298717e-07, |
|
"logps/chosen": -235.11349487304688, |
|
"logps/rejected": -344.72369384765625, |
|
"loss": 0.1472, |
|
"rewards/chosen": 1.559660792350769, |
|
"rewards/margins": 7.8217010498046875, |
|
"rewards/rejected": -6.262040615081787, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 20.143089294433594, |
|
"kl": 0.0, |
|
"learning_rate": 4.7226173541963014e-07, |
|
"logps/chosen": -249.42196655273438, |
|
"logps/rejected": -277.12554931640625, |
|
"loss": 0.1529, |
|
"rewards/chosen": 1.6460959911346436, |
|
"rewards/margins": 7.348568916320801, |
|
"rewards/rejected": -5.702473163604736, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 23.44059181213379, |
|
"kl": 0.0, |
|
"learning_rate": 4.7048364153627306e-07, |
|
"logps/chosen": -281.3484802246094, |
|
"logps/rejected": -307.25457763671875, |
|
"loss": 0.1387, |
|
"rewards/chosen": 1.7489140033721924, |
|
"rewards/margins": 7.860198974609375, |
|
"rewards/rejected": -6.111284255981445, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 23.201915740966797, |
|
"kl": 0.0, |
|
"learning_rate": 4.6870554765291604e-07, |
|
"logps/chosen": -272.64031982421875, |
|
"logps/rejected": -310.50909423828125, |
|
"loss": 0.1356, |
|
"rewards/chosen": 1.6725397109985352, |
|
"rewards/margins": 8.012718200683594, |
|
"rewards/rejected": -6.340178489685059, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 19.45538902282715, |
|
"kl": 0.0, |
|
"learning_rate": 4.66927453769559e-07, |
|
"logps/chosen": -272.3523254394531, |
|
"logps/rejected": -338.20867919921875, |
|
"loss": 0.1246, |
|
"rewards/chosen": 1.727900743484497, |
|
"rewards/margins": 8.646397590637207, |
|
"rewards/rejected": -6.918497562408447, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 24.896251678466797, |
|
"kl": 0.0, |
|
"learning_rate": 4.65149359886202e-07, |
|
"logps/chosen": -269.20794677734375, |
|
"logps/rejected": -328.73486328125, |
|
"loss": 0.1281, |
|
"rewards/chosen": 1.6262487173080444, |
|
"rewards/margins": 9.223333358764648, |
|
"rewards/rejected": -7.597084999084473, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 20.800025939941406, |
|
"kl": 0.0, |
|
"learning_rate": 4.633712660028449e-07, |
|
"logps/chosen": -270.3649597167969, |
|
"logps/rejected": -330.872314453125, |
|
"loss": 0.1279, |
|
"rewards/chosen": 1.5803369283676147, |
|
"rewards/margins": 9.708114624023438, |
|
"rewards/rejected": -8.127778053283691, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 20.315540313720703, |
|
"kl": 0.0, |
|
"learning_rate": 4.615931721194879e-07, |
|
"logps/chosen": -277.21807861328125, |
|
"logps/rejected": -313.4653625488281, |
|
"loss": 0.1461, |
|
"rewards/chosen": 1.6411361694335938, |
|
"rewards/margins": 7.905195713043213, |
|
"rewards/rejected": -6.264059543609619, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 19.68859100341797, |
|
"kl": 0.0, |
|
"learning_rate": 4.5981507823613085e-07, |
|
"logps/chosen": -275.69696044921875, |
|
"logps/rejected": -295.79400634765625, |
|
"loss": 0.1547, |
|
"rewards/chosen": 1.6596572399139404, |
|
"rewards/margins": 7.9982404708862305, |
|
"rewards/rejected": -6.338583469390869, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 27.070371627807617, |
|
"kl": 0.0, |
|
"learning_rate": 4.580369843527738e-07, |
|
"logps/chosen": -272.72735595703125, |
|
"logps/rejected": -310.302734375, |
|
"loss": 0.1673, |
|
"rewards/chosen": 1.2246049642562866, |
|
"rewards/margins": 8.44649600982666, |
|
"rewards/rejected": -7.221890449523926, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 15.166617393493652, |
|
"kl": 0.0, |
|
"learning_rate": 4.562588904694168e-07, |
|
"logps/chosen": -267.9317932128906, |
|
"logps/rejected": -316.60479736328125, |
|
"loss": 0.1304, |
|
"rewards/chosen": 1.7944562435150146, |
|
"rewards/margins": 8.752721786499023, |
|
"rewards/rejected": -6.958265781402588, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 27.143291473388672, |
|
"kl": 0.0, |
|
"learning_rate": 4.544807965860597e-07, |
|
"logps/chosen": -257.7388916015625, |
|
"logps/rejected": -308.5174560546875, |
|
"loss": 0.1421, |
|
"rewards/chosen": 1.6366589069366455, |
|
"rewards/margins": 8.678224563598633, |
|
"rewards/rejected": -7.04156494140625, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 22.1202392578125, |
|
"kl": 0.0, |
|
"learning_rate": 4.5270270270270264e-07, |
|
"logps/chosen": -266.8901062011719, |
|
"logps/rejected": -322.6683654785156, |
|
"loss": 0.1447, |
|
"rewards/chosen": 1.787255883216858, |
|
"rewards/margins": 9.206243515014648, |
|
"rewards/rejected": -7.418987274169922, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 24.7782039642334, |
|
"kl": 0.0, |
|
"learning_rate": 4.509246088193456e-07, |
|
"logps/chosen": -274.30438232421875, |
|
"logps/rejected": -342.98541259765625, |
|
"loss": 0.1421, |
|
"rewards/chosen": 1.776602029800415, |
|
"rewards/margins": 9.053030967712402, |
|
"rewards/rejected": -7.276429176330566, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 23.54313087463379, |
|
"kl": 0.0, |
|
"learning_rate": 4.491465149359886e-07, |
|
"logps/chosen": -215.9270477294922, |
|
"logps/rejected": -303.5273742675781, |
|
"loss": 0.1438, |
|
"rewards/chosen": 1.868506669998169, |
|
"rewards/margins": 9.129117965698242, |
|
"rewards/rejected": -7.260611534118652, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 17.70969009399414, |
|
"kl": 0.0, |
|
"learning_rate": 4.4736842105263156e-07, |
|
"logps/chosen": -240.4123992919922, |
|
"logps/rejected": -336.1444396972656, |
|
"loss": 0.1403, |
|
"rewards/chosen": 1.7039588689804077, |
|
"rewards/margins": 9.00100326538086, |
|
"rewards/rejected": -7.297044277191162, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 19.908315658569336, |
|
"kl": 0.0, |
|
"learning_rate": 4.4559032716927454e-07, |
|
"logps/chosen": -212.1704864501953, |
|
"logps/rejected": -340.3092041015625, |
|
"loss": 0.1338, |
|
"rewards/chosen": 1.6462271213531494, |
|
"rewards/margins": 8.726274490356445, |
|
"rewards/rejected": -7.0800461769104, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 22.928499221801758, |
|
"kl": 0.0, |
|
"learning_rate": 4.438122332859175e-07, |
|
"logps/chosen": -243.7815704345703, |
|
"logps/rejected": -324.60772705078125, |
|
"loss": 0.1323, |
|
"rewards/chosen": 1.6998401880264282, |
|
"rewards/margins": 8.809240341186523, |
|
"rewards/rejected": -7.109400749206543, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 20.08190155029297, |
|
"kl": 0.0, |
|
"learning_rate": 4.420341394025605e-07, |
|
"logps/chosen": -263.37725830078125, |
|
"logps/rejected": -317.4496154785156, |
|
"loss": 0.1443, |
|
"rewards/chosen": 1.706053376197815, |
|
"rewards/margins": 8.708757400512695, |
|
"rewards/rejected": -7.002703666687012, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 25.728233337402344, |
|
"kl": 0.0, |
|
"learning_rate": 4.4025604551920335e-07, |
|
"logps/chosen": -282.44989013671875, |
|
"logps/rejected": -325.9697265625, |
|
"loss": 0.1351, |
|
"rewards/chosen": 1.6976518630981445, |
|
"rewards/margins": 9.016552925109863, |
|
"rewards/rejected": -7.318901062011719, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 22.311450958251953, |
|
"kl": 0.0, |
|
"learning_rate": 4.384779516358463e-07, |
|
"logps/chosen": -260.39447021484375, |
|
"logps/rejected": -309.620849609375, |
|
"loss": 0.1311, |
|
"rewards/chosen": 1.8077850341796875, |
|
"rewards/margins": 9.406209945678711, |
|
"rewards/rejected": -7.598425388336182, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 23.20633316040039, |
|
"kl": 0.0, |
|
"learning_rate": 4.366998577524893e-07, |
|
"logps/chosen": -302.47509765625, |
|
"logps/rejected": -318.767578125, |
|
"loss": 0.128, |
|
"rewards/chosen": 1.7027689218521118, |
|
"rewards/margins": 10.0870943069458, |
|
"rewards/rejected": -8.38432502746582, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 25.405933380126953, |
|
"kl": 0.0, |
|
"learning_rate": 4.3492176386913227e-07, |
|
"logps/chosen": -238.94827270507812, |
|
"logps/rejected": -326.12786865234375, |
|
"loss": 0.1505, |
|
"rewards/chosen": 1.7603483200073242, |
|
"rewards/margins": 9.71554183959961, |
|
"rewards/rejected": -7.955193519592285, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 22.41493034362793, |
|
"kl": 0.0, |
|
"learning_rate": 4.3314366998577524e-07, |
|
"logps/chosen": -253.826416015625, |
|
"logps/rejected": -330.7270812988281, |
|
"loss": 0.1413, |
|
"rewards/chosen": 1.8580402135849, |
|
"rewards/margins": 9.722744941711426, |
|
"rewards/rejected": -7.8647050857543945, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 26.315628051757812, |
|
"kl": 0.0, |
|
"learning_rate": 4.313655761024182e-07, |
|
"logps/chosen": -286.4155578613281, |
|
"logps/rejected": -342.62652587890625, |
|
"loss": 0.1422, |
|
"rewards/chosen": 1.7754218578338623, |
|
"rewards/margins": 10.7272367477417, |
|
"rewards/rejected": -8.951814651489258, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 22.07290267944336, |
|
"kl": 0.0, |
|
"learning_rate": 4.2958748221906114e-07, |
|
"logps/chosen": -239.4450225830078, |
|
"logps/rejected": -310.7589111328125, |
|
"loss": 0.1108, |
|
"rewards/chosen": 1.9980005025863647, |
|
"rewards/margins": 10.0792818069458, |
|
"rewards/rejected": -8.081281661987305, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 22.56456756591797, |
|
"kl": 0.0, |
|
"learning_rate": 4.278093883357041e-07, |
|
"logps/chosen": -269.43560791015625, |
|
"logps/rejected": -328.5326843261719, |
|
"loss": 0.1524, |
|
"rewards/chosen": 1.747078537940979, |
|
"rewards/margins": 9.907144546508789, |
|
"rewards/rejected": -8.160065650939941, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 24.159225463867188, |
|
"kl": 0.0, |
|
"learning_rate": 4.260312944523471e-07, |
|
"logps/chosen": -241.72286987304688, |
|
"logps/rejected": -353.99493408203125, |
|
"loss": 0.1185, |
|
"rewards/chosen": 2.0929551124572754, |
|
"rewards/margins": 10.861230850219727, |
|
"rewards/rejected": -8.768275260925293, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 21.119632720947266, |
|
"kl": 0.0, |
|
"learning_rate": 4.2425320056899e-07, |
|
"logps/chosen": -263.73370361328125, |
|
"logps/rejected": -343.69866943359375, |
|
"loss": 0.1164, |
|
"rewards/chosen": 1.9714298248291016, |
|
"rewards/margins": 10.647806167602539, |
|
"rewards/rejected": -8.676377296447754, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 23.59329605102539, |
|
"kl": 0.0, |
|
"learning_rate": 4.22475106685633e-07, |
|
"logps/chosen": -244.0771026611328, |
|
"logps/rejected": -348.35693359375, |
|
"loss": 0.1218, |
|
"rewards/chosen": 1.8124616146087646, |
|
"rewards/margins": 10.569868087768555, |
|
"rewards/rejected": -8.757406234741211, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 23.84940528869629, |
|
"kl": 0.0, |
|
"learning_rate": 4.2069701280227595e-07, |
|
"logps/chosen": -241.4117889404297, |
|
"logps/rejected": -336.4566345214844, |
|
"loss": 0.1294, |
|
"rewards/chosen": 1.8767503499984741, |
|
"rewards/margins": 10.489054679870605, |
|
"rewards/rejected": -8.612303733825684, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 16.005657196044922, |
|
"kl": 0.0, |
|
"learning_rate": 4.189189189189189e-07, |
|
"logps/chosen": -307.21600341796875, |
|
"logps/rejected": -341.0569152832031, |
|
"loss": 0.1154, |
|
"rewards/chosen": 1.901063323020935, |
|
"rewards/margins": 10.400434494018555, |
|
"rewards/rejected": -8.499369621276855, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 16.274534225463867, |
|
"kl": 0.0, |
|
"learning_rate": 4.1714082503556185e-07, |
|
"logps/chosen": -253.8114471435547, |
|
"logps/rejected": -333.0570983886719, |
|
"loss": 0.1327, |
|
"rewards/chosen": 1.955413818359375, |
|
"rewards/margins": 10.895428657531738, |
|
"rewards/rejected": -8.940014839172363, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 20.72968101501465, |
|
"kl": 0.0, |
|
"learning_rate": 4.153627311522048e-07, |
|
"logps/chosen": -261.703857421875, |
|
"logps/rejected": -339.4162902832031, |
|
"loss": 0.1308, |
|
"rewards/chosen": 1.7927815914154053, |
|
"rewards/margins": 10.807384490966797, |
|
"rewards/rejected": -9.014602661132812, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 21.173845291137695, |
|
"kl": 0.0, |
|
"learning_rate": 4.135846372688478e-07, |
|
"logps/chosen": -260.5010681152344, |
|
"logps/rejected": -359.41851806640625, |
|
"loss": 0.1131, |
|
"rewards/chosen": 1.8654544353485107, |
|
"rewards/margins": 11.833639144897461, |
|
"rewards/rejected": -9.968184471130371, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 17.008028030395508, |
|
"kl": 0.0, |
|
"learning_rate": 4.1180654338549077e-07, |
|
"logps/chosen": -234.20315551757812, |
|
"logps/rejected": -342.5408020019531, |
|
"loss": 0.1224, |
|
"rewards/chosen": 2.086550235748291, |
|
"rewards/margins": 10.627596855163574, |
|
"rewards/rejected": -8.541048049926758, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 19.24855613708496, |
|
"kl": 0.0, |
|
"learning_rate": 4.100284495021337e-07, |
|
"logps/chosen": -282.03106689453125, |
|
"logps/rejected": -347.0184631347656, |
|
"loss": 0.1212, |
|
"rewards/chosen": 1.9099693298339844, |
|
"rewards/margins": 10.274763107299805, |
|
"rewards/rejected": -8.36479377746582, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 19.914691925048828, |
|
"kl": 0.0, |
|
"learning_rate": 4.082503556187766e-07, |
|
"logps/chosen": -278.811767578125, |
|
"logps/rejected": -348.43096923828125, |
|
"loss": 0.1209, |
|
"rewards/chosen": 1.7562357187271118, |
|
"rewards/margins": 11.304061889648438, |
|
"rewards/rejected": -9.547826766967773, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 32.35190963745117, |
|
"kl": 0.0, |
|
"learning_rate": 4.064722617354196e-07, |
|
"logps/chosen": -272.9192199707031, |
|
"logps/rejected": -339.4073181152344, |
|
"loss": 0.1005, |
|
"rewards/chosen": 2.1069161891937256, |
|
"rewards/margins": 11.689797401428223, |
|
"rewards/rejected": -9.582880973815918, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 16.27385902404785, |
|
"kl": 0.0, |
|
"learning_rate": 4.0469416785206256e-07, |
|
"logps/chosen": -233.26083374023438, |
|
"logps/rejected": -329.8385314941406, |
|
"loss": 0.1135, |
|
"rewards/chosen": 2.1503446102142334, |
|
"rewards/margins": 10.886509895324707, |
|
"rewards/rejected": -8.736165046691895, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 22.10724449157715, |
|
"kl": 0.0, |
|
"learning_rate": 4.0291607396870553e-07, |
|
"logps/chosen": -267.9533996582031, |
|
"logps/rejected": -359.1152038574219, |
|
"loss": 0.1197, |
|
"rewards/chosen": 1.8767788410186768, |
|
"rewards/margins": 11.441519737243652, |
|
"rewards/rejected": -9.564741134643555, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 29.721330642700195, |
|
"kl": 0.0, |
|
"learning_rate": 4.011379800853485e-07, |
|
"logps/chosen": -268.9942932128906, |
|
"logps/rejected": -377.97308349609375, |
|
"loss": 0.1061, |
|
"rewards/chosen": 2.016845703125, |
|
"rewards/margins": 11.474628448486328, |
|
"rewards/rejected": -9.457781791687012, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 24.95069122314453, |
|
"kl": 0.0, |
|
"learning_rate": 3.993598862019915e-07, |
|
"logps/chosen": -279.73944091796875, |
|
"logps/rejected": -370.55474853515625, |
|
"loss": 0.1045, |
|
"rewards/chosen": 2.340444803237915, |
|
"rewards/margins": 11.733491897583008, |
|
"rewards/rejected": -9.393046379089355, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 26.99854278564453, |
|
"kl": 0.0, |
|
"learning_rate": 3.975817923186344e-07, |
|
"logps/chosen": -245.24417114257812, |
|
"logps/rejected": -336.7696533203125, |
|
"loss": 0.1234, |
|
"rewards/chosen": 2.2276217937469482, |
|
"rewards/margins": 10.63569164276123, |
|
"rewards/rejected": -8.40807056427002, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 22.343507766723633, |
|
"kl": 0.0, |
|
"learning_rate": 3.9580369843527737e-07, |
|
"logps/chosen": -250.68197631835938, |
|
"logps/rejected": -364.1970520019531, |
|
"loss": 0.1342, |
|
"rewards/chosen": 2.1794769763946533, |
|
"rewards/margins": 10.827537536621094, |
|
"rewards/rejected": -8.64806079864502, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 16.532583236694336, |
|
"kl": 0.0, |
|
"learning_rate": 3.940256045519203e-07, |
|
"logps/chosen": -246.8504638671875, |
|
"logps/rejected": -336.2493896484375, |
|
"loss": 0.1151, |
|
"rewards/chosen": 1.9471546411514282, |
|
"rewards/margins": 11.059728622436523, |
|
"rewards/rejected": -9.112573623657227, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 19.48154067993164, |
|
"kl": 0.0, |
|
"learning_rate": 3.9224751066856327e-07, |
|
"logps/chosen": -246.47671508789062, |
|
"logps/rejected": -328.21038818359375, |
|
"loss": 0.1156, |
|
"rewards/chosen": 2.022249221801758, |
|
"rewards/margins": 11.125974655151367, |
|
"rewards/rejected": -9.103724479675293, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 17.954296112060547, |
|
"kl": 0.0, |
|
"learning_rate": 3.9046941678520624e-07, |
|
"logps/chosen": -254.69808959960938, |
|
"logps/rejected": -347.45758056640625, |
|
"loss": 0.1254, |
|
"rewards/chosen": 2.0911641120910645, |
|
"rewards/margins": 11.680787086486816, |
|
"rewards/rejected": -9.58962345123291, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 21.16987419128418, |
|
"kl": 0.0, |
|
"learning_rate": 3.886913229018492e-07, |
|
"logps/chosen": -226.58358764648438, |
|
"logps/rejected": -334.48138427734375, |
|
"loss": 0.1437, |
|
"rewards/chosen": 1.8157398700714111, |
|
"rewards/margins": 10.399962425231934, |
|
"rewards/rejected": -8.584221839904785, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 18.404508590698242, |
|
"kl": 0.0, |
|
"learning_rate": 3.8691322901849213e-07, |
|
"logps/chosen": -263.36395263671875, |
|
"logps/rejected": -337.2552795410156, |
|
"loss": 0.1078, |
|
"rewards/chosen": 2.1132290363311768, |
|
"rewards/margins": 11.090356826782227, |
|
"rewards/rejected": -8.977127075195312, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 18.355857849121094, |
|
"kl": 0.0, |
|
"learning_rate": 3.851351351351351e-07, |
|
"logps/chosen": -256.04412841796875, |
|
"logps/rejected": -356.82073974609375, |
|
"loss": 0.1145, |
|
"rewards/chosen": 1.9654195308685303, |
|
"rewards/margins": 11.159137725830078, |
|
"rewards/rejected": -9.193717956542969, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 16.297401428222656, |
|
"kl": 0.0, |
|
"learning_rate": 3.833570412517781e-07, |
|
"logps/chosen": -330.457275390625, |
|
"logps/rejected": -367.7776794433594, |
|
"loss": 0.113, |
|
"rewards/chosen": 1.8794314861297607, |
|
"rewards/margins": 11.851155281066895, |
|
"rewards/rejected": -9.971722602844238, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 22.856266021728516, |
|
"kl": 0.0, |
|
"learning_rate": 3.8157894736842105e-07, |
|
"logps/chosen": -254.3659210205078, |
|
"logps/rejected": -343.2192687988281, |
|
"loss": 0.1138, |
|
"rewards/chosen": 2.034006118774414, |
|
"rewards/margins": 11.903759002685547, |
|
"rewards/rejected": -9.869752883911133, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 21.443723678588867, |
|
"kl": 0.0, |
|
"learning_rate": 3.7980085348506403e-07, |
|
"logps/chosen": -278.9993591308594, |
|
"logps/rejected": -333.3291320800781, |
|
"loss": 0.1287, |
|
"rewards/chosen": 1.906224012374878, |
|
"rewards/margins": 10.766765594482422, |
|
"rewards/rejected": -8.860541343688965, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 24.97509002685547, |
|
"kl": 0.0, |
|
"learning_rate": 3.7802275960170695e-07, |
|
"logps/chosen": -238.44290161132812, |
|
"logps/rejected": -338.4499206542969, |
|
"loss": 0.1142, |
|
"rewards/chosen": 2.103523015975952, |
|
"rewards/margins": 11.162189483642578, |
|
"rewards/rejected": -9.058666229248047, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 22.563114166259766, |
|
"kl": 0.0, |
|
"learning_rate": 3.7624466571834987e-07, |
|
"logps/chosen": -238.7085723876953, |
|
"logps/rejected": -352.23663330078125, |
|
"loss": 0.1396, |
|
"rewards/chosen": 1.8689464330673218, |
|
"rewards/margins": 10.945247650146484, |
|
"rewards/rejected": -9.076301574707031, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 17.017030715942383, |
|
"kl": 0.0, |
|
"learning_rate": 3.7446657183499284e-07, |
|
"logps/chosen": -241.0718231201172, |
|
"logps/rejected": -340.98968505859375, |
|
"loss": 0.1197, |
|
"rewards/chosen": 1.9388542175292969, |
|
"rewards/margins": 11.241914749145508, |
|
"rewards/rejected": -9.303060531616211, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 16.066030502319336, |
|
"kl": 0.0, |
|
"learning_rate": 3.726884779516358e-07, |
|
"logps/chosen": -224.85086059570312, |
|
"logps/rejected": -349.51800537109375, |
|
"loss": 0.1094, |
|
"rewards/chosen": 2.1027920246124268, |
|
"rewards/margins": 11.039840698242188, |
|
"rewards/rejected": -8.937047958374023, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 17.983535766601562, |
|
"kl": 0.0, |
|
"learning_rate": 3.709103840682788e-07, |
|
"logps/chosen": -269.41693115234375, |
|
"logps/rejected": -347.45477294921875, |
|
"loss": 0.1234, |
|
"rewards/chosen": 2.2909629344940186, |
|
"rewards/margins": 11.061802864074707, |
|
"rewards/rejected": -8.770838737487793, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 32.42300796508789, |
|
"kl": 0.0, |
|
"learning_rate": 3.6913229018492176e-07, |
|
"logps/chosen": -274.3470764160156, |
|
"logps/rejected": -335.10345458984375, |
|
"loss": 0.1186, |
|
"rewards/chosen": 2.1239845752716064, |
|
"rewards/margins": 11.3440523147583, |
|
"rewards/rejected": -9.220067977905273, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 16.601402282714844, |
|
"kl": 0.0, |
|
"learning_rate": 3.6735419630156474e-07, |
|
"logps/chosen": -232.43392944335938, |
|
"logps/rejected": -354.2206726074219, |
|
"loss": 0.1218, |
|
"rewards/chosen": 2.117833137512207, |
|
"rewards/margins": 11.192573547363281, |
|
"rewards/rejected": -9.07474136352539, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 24.04266357421875, |
|
"kl": 0.0, |
|
"learning_rate": 3.655761024182077e-07, |
|
"logps/chosen": -277.41241455078125, |
|
"logps/rejected": -334.04815673828125, |
|
"loss": 0.1025, |
|
"rewards/chosen": 2.3604273796081543, |
|
"rewards/margins": 11.987722396850586, |
|
"rewards/rejected": -9.627294540405273, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 15.607158660888672, |
|
"kl": 0.0, |
|
"learning_rate": 3.637980085348506e-07, |
|
"logps/chosen": -286.7027282714844, |
|
"logps/rejected": -319.68896484375, |
|
"loss": 0.1124, |
|
"rewards/chosen": 2.4556756019592285, |
|
"rewards/margins": 11.582249641418457, |
|
"rewards/rejected": -9.126574516296387, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 22.26024055480957, |
|
"kl": 0.0, |
|
"learning_rate": 3.6201991465149355e-07, |
|
"logps/chosen": -239.4503173828125, |
|
"logps/rejected": -345.2344970703125, |
|
"loss": 0.112, |
|
"rewards/chosen": 1.9915698766708374, |
|
"rewards/margins": 11.853796005249023, |
|
"rewards/rejected": -9.862226486206055, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 29.981088638305664, |
|
"kl": 0.0, |
|
"learning_rate": 3.602418207681365e-07, |
|
"logps/chosen": -241.8583984375, |
|
"logps/rejected": -365.6163024902344, |
|
"loss": 0.1207, |
|
"rewards/chosen": 1.9029747247695923, |
|
"rewards/margins": 11.538789749145508, |
|
"rewards/rejected": -9.635814666748047, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 17.248044967651367, |
|
"kl": 0.0, |
|
"learning_rate": 3.584637268847795e-07, |
|
"logps/chosen": -255.35635375976562, |
|
"logps/rejected": -348.6636962890625, |
|
"loss": 0.1267, |
|
"rewards/chosen": 2.0199761390686035, |
|
"rewards/margins": 11.458740234375, |
|
"rewards/rejected": -9.438763618469238, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 20.46364974975586, |
|
"kl": 0.0, |
|
"learning_rate": 3.5668563300142247e-07, |
|
"logps/chosen": -271.98846435546875, |
|
"logps/rejected": -319.4902038574219, |
|
"loss": 0.1209, |
|
"rewards/chosen": 2.140854597091675, |
|
"rewards/margins": 11.506881713867188, |
|
"rewards/rejected": -9.36602783203125, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 13.751622200012207, |
|
"kl": 0.0, |
|
"learning_rate": 3.5490753911806545e-07, |
|
"logps/chosen": -249.31838989257812, |
|
"logps/rejected": -363.65313720703125, |
|
"loss": 0.1066, |
|
"rewards/chosen": 2.302562713623047, |
|
"rewards/margins": 12.324169158935547, |
|
"rewards/rejected": -10.0216064453125, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 23.35749053955078, |
|
"kl": 0.0, |
|
"learning_rate": 3.5312944523470837e-07, |
|
"logps/chosen": -242.62533569335938, |
|
"logps/rejected": -326.2989807128906, |
|
"loss": 0.1213, |
|
"rewards/chosen": 2.164386749267578, |
|
"rewards/margins": 11.125585556030273, |
|
"rewards/rejected": -8.961198806762695, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 22.92085075378418, |
|
"kl": 0.0, |
|
"learning_rate": 3.5135135135135134e-07, |
|
"logps/chosen": -215.1080322265625, |
|
"logps/rejected": -343.92315673828125, |
|
"loss": 0.1096, |
|
"rewards/chosen": 2.084261417388916, |
|
"rewards/margins": 12.25261402130127, |
|
"rewards/rejected": -10.168352127075195, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 19.450267791748047, |
|
"kl": 0.0, |
|
"learning_rate": 3.495732574679943e-07, |
|
"logps/chosen": -217.7822723388672, |
|
"logps/rejected": -329.91375732421875, |
|
"loss": 0.1294, |
|
"rewards/chosen": 2.3254265785217285, |
|
"rewards/margins": 11.483414649963379, |
|
"rewards/rejected": -9.157987594604492, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 18.078920364379883, |
|
"kl": 0.0, |
|
"learning_rate": 3.4779516358463724e-07, |
|
"logps/chosen": -253.4072723388672, |
|
"logps/rejected": -344.491455078125, |
|
"loss": 0.0927, |
|
"rewards/chosen": 2.3925743103027344, |
|
"rewards/margins": 12.241676330566406, |
|
"rewards/rejected": -9.849101066589355, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 17.867704391479492, |
|
"kl": 0.0, |
|
"learning_rate": 3.460170697012802e-07, |
|
"logps/chosen": -253.86178588867188, |
|
"logps/rejected": -367.8619689941406, |
|
"loss": 0.1152, |
|
"rewards/chosen": 2.281512975692749, |
|
"rewards/margins": 12.078498840332031, |
|
"rewards/rejected": -9.79698657989502, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 19.863170623779297, |
|
"kl": 0.0, |
|
"learning_rate": 3.442389758179232e-07, |
|
"logps/chosen": -289.83990478515625, |
|
"logps/rejected": -346.80987548828125, |
|
"loss": 0.1098, |
|
"rewards/chosen": 2.1612110137939453, |
|
"rewards/margins": 12.505953788757324, |
|
"rewards/rejected": -10.344742774963379, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 14.594950675964355, |
|
"kl": 0.0, |
|
"learning_rate": 3.424608819345661e-07, |
|
"logps/chosen": -199.16708374023438, |
|
"logps/rejected": -352.14447021484375, |
|
"loss": 0.0929, |
|
"rewards/chosen": 1.9940685033798218, |
|
"rewards/margins": 12.916508674621582, |
|
"rewards/rejected": -10.922439575195312, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 20.205915451049805, |
|
"kl": 0.0, |
|
"learning_rate": 3.406827880512091e-07, |
|
"logps/chosen": -251.85546875, |
|
"logps/rejected": -347.35113525390625, |
|
"loss": 0.1137, |
|
"rewards/chosen": 2.3019704818725586, |
|
"rewards/margins": 12.349814414978027, |
|
"rewards/rejected": -10.047843933105469, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 21.62274742126465, |
|
"kl": 0.0, |
|
"learning_rate": 3.3890469416785205e-07, |
|
"logps/chosen": -241.4209442138672, |
|
"logps/rejected": -366.311767578125, |
|
"loss": 0.104, |
|
"rewards/chosen": 2.3162879943847656, |
|
"rewards/margins": 11.884176254272461, |
|
"rewards/rejected": -9.567889213562012, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 18.622377395629883, |
|
"kl": 0.0, |
|
"learning_rate": 3.37126600284495e-07, |
|
"logps/chosen": -270.2699279785156, |
|
"logps/rejected": -338.7982482910156, |
|
"loss": 0.1127, |
|
"rewards/chosen": 2.2165682315826416, |
|
"rewards/margins": 11.826861381530762, |
|
"rewards/rejected": -9.6102933883667, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 19.930374145507812, |
|
"kl": 0.0, |
|
"learning_rate": 3.35348506401138e-07, |
|
"logps/chosen": -265.74432373046875, |
|
"logps/rejected": -366.4137268066406, |
|
"loss": 0.1235, |
|
"rewards/chosen": 2.140533924102783, |
|
"rewards/margins": 12.077839851379395, |
|
"rewards/rejected": -9.937305450439453, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 20.78235626220703, |
|
"kl": 0.0, |
|
"learning_rate": 3.335704125177809e-07, |
|
"logps/chosen": -251.783935546875, |
|
"logps/rejected": -321.7081298828125, |
|
"loss": 0.1251, |
|
"rewards/chosen": 2.4168410301208496, |
|
"rewards/margins": 11.567142486572266, |
|
"rewards/rejected": -9.150300979614258, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 18.487886428833008, |
|
"kl": 0.0, |
|
"learning_rate": 3.3179231863442384e-07, |
|
"logps/chosen": -244.60250854492188, |
|
"logps/rejected": -386.168212890625, |
|
"loss": 0.0922, |
|
"rewards/chosen": 2.371992588043213, |
|
"rewards/margins": 13.357122421264648, |
|
"rewards/rejected": -10.985128402709961, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 16.354122161865234, |
|
"kl": 0.0, |
|
"learning_rate": 3.300142247510668e-07, |
|
"logps/chosen": -252.8701171875, |
|
"logps/rejected": -334.31744384765625, |
|
"loss": 0.1003, |
|
"rewards/chosen": 2.335937023162842, |
|
"rewards/margins": 12.5576753616333, |
|
"rewards/rejected": -10.2217378616333, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 16.126724243164062, |
|
"kl": 0.0, |
|
"learning_rate": 3.282361308677098e-07, |
|
"logps/chosen": -240.2421417236328, |
|
"logps/rejected": -346.31414794921875, |
|
"loss": 0.1069, |
|
"rewards/chosen": 2.2422432899475098, |
|
"rewards/margins": 11.99687671661377, |
|
"rewards/rejected": -9.754633903503418, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 21.066267013549805, |
|
"kl": 0.0, |
|
"learning_rate": 3.2645803698435276e-07, |
|
"logps/chosen": -264.81964111328125, |
|
"logps/rejected": -338.95379638671875, |
|
"loss": 0.1126, |
|
"rewards/chosen": 2.3289685249328613, |
|
"rewards/margins": 11.437704086303711, |
|
"rewards/rejected": -9.108736038208008, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 14.892672538757324, |
|
"kl": 0.0, |
|
"learning_rate": 3.2467994310099573e-07, |
|
"logps/chosen": -240.17236328125, |
|
"logps/rejected": -355.543212890625, |
|
"loss": 0.0884, |
|
"rewards/chosen": 2.1047472953796387, |
|
"rewards/margins": 12.319875717163086, |
|
"rewards/rejected": -10.215127944946289, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 22.722187042236328, |
|
"kl": 0.0, |
|
"learning_rate": 3.229018492176387e-07, |
|
"logps/chosen": -235.84341430664062, |
|
"logps/rejected": -363.74041748046875, |
|
"loss": 0.0958, |
|
"rewards/chosen": 2.262721300125122, |
|
"rewards/margins": 12.947868347167969, |
|
"rewards/rejected": -10.685147285461426, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 16.061914443969727, |
|
"kl": 0.0, |
|
"learning_rate": 3.211237553342817e-07, |
|
"logps/chosen": -232.480224609375, |
|
"logps/rejected": -363.93792724609375, |
|
"loss": 0.0947, |
|
"rewards/chosen": 2.245720863342285, |
|
"rewards/margins": 13.019895553588867, |
|
"rewards/rejected": -10.774174690246582, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 17.95708656311035, |
|
"kl": 0.0, |
|
"learning_rate": 3.193456614509246e-07, |
|
"logps/chosen": -266.2756652832031, |
|
"logps/rejected": -349.5681457519531, |
|
"loss": 0.0981, |
|
"rewards/chosen": 2.0552451610565186, |
|
"rewards/margins": 12.47476863861084, |
|
"rewards/rejected": -10.419523239135742, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 22.32909393310547, |
|
"kl": 0.0, |
|
"learning_rate": 3.175675675675675e-07, |
|
"logps/chosen": -262.5782165527344, |
|
"logps/rejected": -350.5921936035156, |
|
"loss": 0.1155, |
|
"rewards/chosen": 2.324476718902588, |
|
"rewards/margins": 12.327436447143555, |
|
"rewards/rejected": -10.002958297729492, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 14.884044647216797, |
|
"kl": 0.0, |
|
"learning_rate": 3.157894736842105e-07, |
|
"logps/chosen": -257.66094970703125, |
|
"logps/rejected": -344.09869384765625, |
|
"loss": 0.096, |
|
"rewards/chosen": 2.2759206295013428, |
|
"rewards/margins": 13.264117240905762, |
|
"rewards/rejected": -10.988197326660156, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 18.041053771972656, |
|
"kl": 0.0, |
|
"learning_rate": 3.1401137980085347e-07, |
|
"logps/chosen": -270.93646240234375, |
|
"logps/rejected": -360.5249938964844, |
|
"loss": 0.0958, |
|
"rewards/chosen": 2.366556167602539, |
|
"rewards/margins": 13.1105318069458, |
|
"rewards/rejected": -10.743974685668945, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 17.0706729888916, |
|
"kl": 0.0, |
|
"learning_rate": 3.1223328591749644e-07, |
|
"logps/chosen": -214.73690795898438, |
|
"logps/rejected": -342.7330017089844, |
|
"loss": 0.1107, |
|
"rewards/chosen": 2.152832508087158, |
|
"rewards/margins": 12.126806259155273, |
|
"rewards/rejected": -9.973973274230957, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 25.26626968383789, |
|
"kl": 0.0, |
|
"learning_rate": 3.104551920341394e-07, |
|
"logps/chosen": -293.3183898925781, |
|
"logps/rejected": -356.9164123535156, |
|
"loss": 0.0973, |
|
"rewards/chosen": 2.3354926109313965, |
|
"rewards/margins": 13.199078559875488, |
|
"rewards/rejected": -10.863585472106934, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 28.42453384399414, |
|
"kl": 0.0, |
|
"learning_rate": 3.0867709815078234e-07, |
|
"logps/chosen": -285.2272644042969, |
|
"logps/rejected": -367.3414001464844, |
|
"loss": 0.1091, |
|
"rewards/chosen": 2.12477707862854, |
|
"rewards/margins": 13.724832534790039, |
|
"rewards/rejected": -11.600054740905762, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 17.899433135986328, |
|
"kl": 0.0, |
|
"learning_rate": 3.068990042674253e-07, |
|
"logps/chosen": -262.5505065917969, |
|
"logps/rejected": -371.45025634765625, |
|
"loss": 0.1083, |
|
"rewards/chosen": 2.060147762298584, |
|
"rewards/margins": 12.760570526123047, |
|
"rewards/rejected": -10.700422286987305, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 19.646583557128906, |
|
"kl": 0.0, |
|
"learning_rate": 3.051209103840683e-07, |
|
"logps/chosen": -274.9856262207031, |
|
"logps/rejected": -360.3489685058594, |
|
"loss": 0.1105, |
|
"rewards/chosen": 2.4847919940948486, |
|
"rewards/margins": 13.168169975280762, |
|
"rewards/rejected": -10.683378219604492, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 14.362700462341309, |
|
"kl": 0.0, |
|
"learning_rate": 3.033428165007112e-07, |
|
"logps/chosen": -253.5523681640625, |
|
"logps/rejected": -336.803466796875, |
|
"loss": 0.1024, |
|
"rewards/chosen": 2.3810598850250244, |
|
"rewards/margins": 12.508206367492676, |
|
"rewards/rejected": -10.127145767211914, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 22.93763542175293, |
|
"kl": 0.0, |
|
"learning_rate": 3.015647226173542e-07, |
|
"logps/chosen": -229.1265869140625, |
|
"logps/rejected": -354.0566101074219, |
|
"loss": 0.1135, |
|
"rewards/chosen": 2.1574866771698, |
|
"rewards/margins": 12.193730354309082, |
|
"rewards/rejected": -10.036243438720703, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 22.314695358276367, |
|
"kl": 0.0, |
|
"learning_rate": 2.9978662873399715e-07, |
|
"logps/chosen": -274.2774353027344, |
|
"logps/rejected": -405.2040100097656, |
|
"loss": 0.0953, |
|
"rewards/chosen": 2.191157817840576, |
|
"rewards/margins": 13.628583908081055, |
|
"rewards/rejected": -11.43742561340332, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 20.03668975830078, |
|
"kl": 0.0, |
|
"learning_rate": 2.9800853485064007e-07, |
|
"logps/chosen": -250.0018768310547, |
|
"logps/rejected": -340.919677734375, |
|
"loss": 0.1022, |
|
"rewards/chosen": 2.4375367164611816, |
|
"rewards/margins": 12.8548583984375, |
|
"rewards/rejected": -10.417322158813477, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 20.21332359313965, |
|
"kl": 0.0, |
|
"learning_rate": 2.9623044096728305e-07, |
|
"logps/chosen": -262.5707092285156, |
|
"logps/rejected": -341.51556396484375, |
|
"loss": 0.113, |
|
"rewards/chosen": 2.2380969524383545, |
|
"rewards/margins": 12.677546501159668, |
|
"rewards/rejected": -10.439449310302734, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 18.686241149902344, |
|
"kl": 0.0, |
|
"learning_rate": 2.94452347083926e-07, |
|
"logps/chosen": -259.5292053222656, |
|
"logps/rejected": -340.3607482910156, |
|
"loss": 0.1188, |
|
"rewards/chosen": 2.1912286281585693, |
|
"rewards/margins": 12.15953540802002, |
|
"rewards/rejected": -9.968307495117188, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 19.728899002075195, |
|
"kl": 0.0, |
|
"learning_rate": 2.92674253200569e-07, |
|
"logps/chosen": -249.8451385498047, |
|
"logps/rejected": -381.3360290527344, |
|
"loss": 0.0967, |
|
"rewards/chosen": 2.2508890628814697, |
|
"rewards/margins": 13.840009689331055, |
|
"rewards/rejected": -11.589120864868164, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 17.338115692138672, |
|
"kl": 0.0, |
|
"learning_rate": 2.9089615931721197e-07, |
|
"logps/chosen": -268.9515380859375, |
|
"logps/rejected": -368.5078125, |
|
"loss": 0.1015, |
|
"rewards/chosen": 2.28879976272583, |
|
"rewards/margins": 13.396432876586914, |
|
"rewards/rejected": -11.107633590698242, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 19.87563133239746, |
|
"kl": 0.0, |
|
"learning_rate": 2.8911806543385494e-07, |
|
"logps/chosen": -293.7141418457031, |
|
"logps/rejected": -355.55535888671875, |
|
"loss": 0.1071, |
|
"rewards/chosen": 2.4517719745635986, |
|
"rewards/margins": 13.449313163757324, |
|
"rewards/rejected": -10.997541427612305, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 19.807565689086914, |
|
"kl": 0.0, |
|
"learning_rate": 2.873399715504978e-07, |
|
"logps/chosen": -248.6322784423828, |
|
"logps/rejected": -353.8228759765625, |
|
"loss": 0.1088, |
|
"rewards/chosen": 2.2985501289367676, |
|
"rewards/margins": 12.722993850708008, |
|
"rewards/rejected": -10.424444198608398, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 14.098715782165527, |
|
"kl": 0.0, |
|
"learning_rate": 2.855618776671408e-07, |
|
"logps/chosen": -240.1453857421875, |
|
"logps/rejected": -330.6163635253906, |
|
"loss": 0.0891, |
|
"rewards/chosen": 2.550762414932251, |
|
"rewards/margins": 12.813929557800293, |
|
"rewards/rejected": -10.263166427612305, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 16.266538619995117, |
|
"kl": 0.0, |
|
"learning_rate": 2.8378378378378376e-07, |
|
"logps/chosen": -236.1558380126953, |
|
"logps/rejected": -360.686767578125, |
|
"loss": 0.0973, |
|
"rewards/chosen": 2.2286760807037354, |
|
"rewards/margins": 13.117490768432617, |
|
"rewards/rejected": -10.888814926147461, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 16.414714813232422, |
|
"kl": 0.0, |
|
"learning_rate": 2.8200568990042673e-07, |
|
"logps/chosen": -283.36944580078125, |
|
"logps/rejected": -368.7194519042969, |
|
"loss": 0.0967, |
|
"rewards/chosen": 2.4076812267303467, |
|
"rewards/margins": 12.956690788269043, |
|
"rewards/rejected": -10.549009323120117, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 18.088735580444336, |
|
"kl": 0.0, |
|
"learning_rate": 2.802275960170697e-07, |
|
"logps/chosen": -302.5070495605469, |
|
"logps/rejected": -348.53289794921875, |
|
"loss": 0.1047, |
|
"rewards/chosen": 2.4432990550994873, |
|
"rewards/margins": 12.935566902160645, |
|
"rewards/rejected": -10.492268562316895, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 16.778833389282227, |
|
"kl": 0.0, |
|
"learning_rate": 2.784495021337127e-07, |
|
"logps/chosen": -243.6858367919922, |
|
"logps/rejected": -366.0279235839844, |
|
"loss": 0.0852, |
|
"rewards/chosen": 2.5511088371276855, |
|
"rewards/margins": 13.161443710327148, |
|
"rewards/rejected": -10.610334396362305, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 17.379335403442383, |
|
"kl": 0.0, |
|
"learning_rate": 2.766714082503556e-07, |
|
"logps/chosen": -279.8948669433594, |
|
"logps/rejected": -352.0287170410156, |
|
"loss": 0.0957, |
|
"rewards/chosen": 2.464888334274292, |
|
"rewards/margins": 13.103485107421875, |
|
"rewards/rejected": -10.638595581054688, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 21.737380981445312, |
|
"kl": 0.0, |
|
"learning_rate": 2.7489331436699857e-07, |
|
"logps/chosen": -258.93865966796875, |
|
"logps/rejected": -345.5055236816406, |
|
"loss": 0.1177, |
|
"rewards/chosen": 2.079983711242676, |
|
"rewards/margins": 12.90088176727295, |
|
"rewards/rejected": -10.820898056030273, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 15.227828025817871, |
|
"kl": 0.0, |
|
"learning_rate": 2.7311522048364154e-07, |
|
"logps/chosen": -233.96194458007812, |
|
"logps/rejected": -370.0096130371094, |
|
"loss": 0.1054, |
|
"rewards/chosen": 2.696063280105591, |
|
"rewards/margins": 13.426877975463867, |
|
"rewards/rejected": -10.730813980102539, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 13.938480377197266, |
|
"kl": 0.0, |
|
"learning_rate": 2.7133712660028446e-07, |
|
"logps/chosen": -243.730224609375, |
|
"logps/rejected": -350.44183349609375, |
|
"loss": 0.0976, |
|
"rewards/chosen": 2.3282103538513184, |
|
"rewards/margins": 13.082319259643555, |
|
"rewards/rejected": -10.754108428955078, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 29.015178680419922, |
|
"kl": 0.0, |
|
"learning_rate": 2.6955903271692744e-07, |
|
"logps/chosen": -216.66488647460938, |
|
"logps/rejected": -368.9279479980469, |
|
"loss": 0.1045, |
|
"rewards/chosen": 2.2219786643981934, |
|
"rewards/margins": 13.255975723266602, |
|
"rewards/rejected": -11.033994674682617, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 12.759780883789062, |
|
"kl": 0.0, |
|
"learning_rate": 2.677809388335704e-07, |
|
"logps/chosen": -232.0436248779297, |
|
"logps/rejected": -358.2416076660156, |
|
"loss": 0.0943, |
|
"rewards/chosen": 2.235215425491333, |
|
"rewards/margins": 12.901985168457031, |
|
"rewards/rejected": -10.666769027709961, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 19.0715389251709, |
|
"kl": 0.0, |
|
"learning_rate": 2.6600284495021333e-07, |
|
"logps/chosen": -245.0729217529297, |
|
"logps/rejected": -379.9324645996094, |
|
"loss": 0.1173, |
|
"rewards/chosen": 2.423140287399292, |
|
"rewards/margins": 13.03075885772705, |
|
"rewards/rejected": -10.60761833190918, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 15.53042984008789, |
|
"kl": 0.0, |
|
"learning_rate": 2.642247510668563e-07, |
|
"logps/chosen": -261.9339294433594, |
|
"logps/rejected": -327.780517578125, |
|
"loss": 0.0903, |
|
"rewards/chosen": 2.4564433097839355, |
|
"rewards/margins": 13.401847839355469, |
|
"rewards/rejected": -10.945404052734375, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 26.67053985595703, |
|
"kl": 0.0, |
|
"learning_rate": 2.624466571834993e-07, |
|
"logps/chosen": -248.34487915039062, |
|
"logps/rejected": -345.79083251953125, |
|
"loss": 0.1071, |
|
"rewards/chosen": 2.359476089477539, |
|
"rewards/margins": 13.113537788391113, |
|
"rewards/rejected": -10.754061698913574, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 22.24759292602539, |
|
"kl": 0.0, |
|
"learning_rate": 2.6066856330014225e-07, |
|
"logps/chosen": -262.4079895019531, |
|
"logps/rejected": -364.2686462402344, |
|
"loss": 0.1076, |
|
"rewards/chosen": 2.37235689163208, |
|
"rewards/margins": 13.127764701843262, |
|
"rewards/rejected": -10.75540828704834, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 17.638994216918945, |
|
"kl": 0.0, |
|
"learning_rate": 2.5889046941678523e-07, |
|
"logps/chosen": -273.74432373046875, |
|
"logps/rejected": -383.0593566894531, |
|
"loss": 0.0984, |
|
"rewards/chosen": 2.275177478790283, |
|
"rewards/margins": 14.033803939819336, |
|
"rewards/rejected": -11.758626937866211, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 15.510226249694824, |
|
"kl": 0.0, |
|
"learning_rate": 2.5711237553342815e-07, |
|
"logps/chosen": -268.3088073730469, |
|
"logps/rejected": -372.16876220703125, |
|
"loss": 0.075, |
|
"rewards/chosen": 2.7448737621307373, |
|
"rewards/margins": 14.005487442016602, |
|
"rewards/rejected": -11.260614395141602, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 18.002506256103516, |
|
"kl": 0.0, |
|
"learning_rate": 2.5533428165007107e-07, |
|
"logps/chosen": -214.6236572265625, |
|
"logps/rejected": -379.697265625, |
|
"loss": 0.1156, |
|
"rewards/chosen": 2.090531349182129, |
|
"rewards/margins": 14.037800788879395, |
|
"rewards/rejected": -11.947270393371582, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 18.8648681640625, |
|
"kl": 0.0, |
|
"learning_rate": 2.5355618776671404e-07, |
|
"logps/chosen": -230.46810913085938, |
|
"logps/rejected": -345.8827209472656, |
|
"loss": 0.1028, |
|
"rewards/chosen": 2.3623595237731934, |
|
"rewards/margins": 13.34724235534668, |
|
"rewards/rejected": -10.984883308410645, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 15.514137268066406, |
|
"kl": 0.0, |
|
"learning_rate": 2.51778093883357e-07, |
|
"logps/chosen": -224.1614227294922, |
|
"logps/rejected": -365.08477783203125, |
|
"loss": 0.0985, |
|
"rewards/chosen": 2.2946102619171143, |
|
"rewards/margins": 14.019018173217773, |
|
"rewards/rejected": -11.724408149719238, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 17.679288864135742, |
|
"kl": 0.0, |
|
"learning_rate": 2.5e-07, |
|
"logps/chosen": -231.95565795898438, |
|
"logps/rejected": -349.056640625, |
|
"loss": 0.0966, |
|
"rewards/chosen": 2.3342556953430176, |
|
"rewards/margins": 13.233372688293457, |
|
"rewards/rejected": -10.899115562438965, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 19.04916000366211, |
|
"kl": 0.0, |
|
"learning_rate": 2.4822190611664296e-07, |
|
"logps/chosen": -233.5281982421875, |
|
"logps/rejected": -374.4831848144531, |
|
"loss": 0.0922, |
|
"rewards/chosen": 2.58280873298645, |
|
"rewards/margins": 13.661170959472656, |
|
"rewards/rejected": -11.078360557556152, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 16.834497451782227, |
|
"kl": 0.0, |
|
"learning_rate": 2.4644381223328594e-07, |
|
"logps/chosen": -236.75106811523438, |
|
"logps/rejected": -353.3365478515625, |
|
"loss": 0.096, |
|
"rewards/chosen": 2.7401375770568848, |
|
"rewards/margins": 13.776086807250977, |
|
"rewards/rejected": -11.035948753356934, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 24.789772033691406, |
|
"kl": 0.0, |
|
"learning_rate": 2.4466571834992886e-07, |
|
"logps/chosen": -267.3653869628906, |
|
"logps/rejected": -370.1556091308594, |
|
"loss": 0.0949, |
|
"rewards/chosen": 2.2669379711151123, |
|
"rewards/margins": 13.626495361328125, |
|
"rewards/rejected": -11.35955810546875, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 12.911144256591797, |
|
"kl": 0.0, |
|
"learning_rate": 2.4288762446657183e-07, |
|
"logps/chosen": -250.23556518554688, |
|
"logps/rejected": -363.0490417480469, |
|
"loss": 0.0717, |
|
"rewards/chosen": 2.6297707557678223, |
|
"rewards/margins": 15.319549560546875, |
|
"rewards/rejected": -12.689778327941895, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 18.159881591796875, |
|
"kl": 0.0, |
|
"learning_rate": 2.411095305832148e-07, |
|
"logps/chosen": -276.918701171875, |
|
"logps/rejected": -351.54132080078125, |
|
"loss": 0.1062, |
|
"rewards/chosen": 2.5637714862823486, |
|
"rewards/margins": 13.2720365524292, |
|
"rewards/rejected": -10.708267211914062, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 16.24278450012207, |
|
"kl": 0.0, |
|
"learning_rate": 2.393314366998578e-07, |
|
"logps/chosen": -264.356689453125, |
|
"logps/rejected": -345.72064208984375, |
|
"loss": 0.1061, |
|
"rewards/chosen": 2.57415509223938, |
|
"rewards/margins": 13.096124649047852, |
|
"rewards/rejected": -10.521968841552734, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 15.353395462036133, |
|
"kl": 0.0, |
|
"learning_rate": 2.375533428165007e-07, |
|
"logps/chosen": -245.558349609375, |
|
"logps/rejected": -346.11151123046875, |
|
"loss": 0.1098, |
|
"rewards/chosen": 2.0979220867156982, |
|
"rewards/margins": 13.061151504516602, |
|
"rewards/rejected": -10.963228225708008, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 15.291166305541992, |
|
"kl": 0.0, |
|
"learning_rate": 2.3577524893314365e-07, |
|
"logps/chosen": -268.0149841308594, |
|
"logps/rejected": -359.99676513671875, |
|
"loss": 0.0935, |
|
"rewards/chosen": 2.660017251968384, |
|
"rewards/margins": 13.785112380981445, |
|
"rewards/rejected": -11.125094413757324, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 27.338308334350586, |
|
"kl": 0.0, |
|
"learning_rate": 2.3399715504978662e-07, |
|
"logps/chosen": -223.98062133789062, |
|
"logps/rejected": -382.53924560546875, |
|
"loss": 0.0733, |
|
"rewards/chosen": 2.573464870452881, |
|
"rewards/margins": 14.070528030395508, |
|
"rewards/rejected": -11.497062683105469, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 28.126638412475586, |
|
"kl": 0.0, |
|
"learning_rate": 2.322190611664296e-07, |
|
"logps/chosen": -225.90432739257812, |
|
"logps/rejected": -369.765380859375, |
|
"loss": 0.1068, |
|
"rewards/chosen": 2.4907584190368652, |
|
"rewards/margins": 13.410183906555176, |
|
"rewards/rejected": -10.919425964355469, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 23.488555908203125, |
|
"kl": 0.0, |
|
"learning_rate": 2.304409672830725e-07, |
|
"logps/chosen": -264.053466796875, |
|
"logps/rejected": -347.4461364746094, |
|
"loss": 0.0978, |
|
"rewards/chosen": 2.3625271320343018, |
|
"rewards/margins": 13.296697616577148, |
|
"rewards/rejected": -10.934170722961426, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 16.538496017456055, |
|
"kl": 0.0, |
|
"learning_rate": 2.2866287339971549e-07, |
|
"logps/chosen": -305.2149353027344, |
|
"logps/rejected": -387.3183898925781, |
|
"loss": 0.0895, |
|
"rewards/chosen": 2.51884126663208, |
|
"rewards/margins": 14.519546508789062, |
|
"rewards/rejected": -12.000704765319824, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 14.570239067077637, |
|
"kl": 0.0, |
|
"learning_rate": 2.2688477951635846e-07, |
|
"logps/chosen": -288.55859375, |
|
"logps/rejected": -360.0328063964844, |
|
"loss": 0.0891, |
|
"rewards/chosen": 2.6060962677001953, |
|
"rewards/margins": 13.63697338104248, |
|
"rewards/rejected": -11.030878067016602, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 14.252922058105469, |
|
"kl": 0.0, |
|
"learning_rate": 2.251066856330014e-07, |
|
"logps/chosen": -237.12985229492188, |
|
"logps/rejected": -375.7847900390625, |
|
"loss": 0.0968, |
|
"rewards/chosen": 2.277179718017578, |
|
"rewards/margins": 13.618906021118164, |
|
"rewards/rejected": -11.341727256774902, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 14.224340438842773, |
|
"kl": 0.0, |
|
"learning_rate": 2.2332859174964438e-07, |
|
"logps/chosen": -247.994384765625, |
|
"logps/rejected": -384.1309509277344, |
|
"loss": 0.0921, |
|
"rewards/chosen": 2.5065815448760986, |
|
"rewards/margins": 14.67822551727295, |
|
"rewards/rejected": -12.17164421081543, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 15.533319473266602, |
|
"kl": 0.0, |
|
"learning_rate": 2.2155049786628733e-07, |
|
"logps/chosen": -217.72982788085938, |
|
"logps/rejected": -364.0068054199219, |
|
"loss": 0.1044, |
|
"rewards/chosen": 2.4342358112335205, |
|
"rewards/margins": 13.825854301452637, |
|
"rewards/rejected": -11.391618728637695, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 12.951379776000977, |
|
"kl": 0.0, |
|
"learning_rate": 2.1977240398293027e-07, |
|
"logps/chosen": -216.9298858642578, |
|
"logps/rejected": -391.88916015625, |
|
"loss": 0.0959, |
|
"rewards/chosen": 2.2248167991638184, |
|
"rewards/margins": 14.502099990844727, |
|
"rewards/rejected": -12.27728271484375, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 12.726017951965332, |
|
"kl": 0.0, |
|
"learning_rate": 2.1799431009957325e-07, |
|
"logps/chosen": -250.4014129638672, |
|
"logps/rejected": -367.2666931152344, |
|
"loss": 0.099, |
|
"rewards/chosen": 2.6326889991760254, |
|
"rewards/margins": 14.203544616699219, |
|
"rewards/rejected": -11.570856094360352, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 24.116321563720703, |
|
"kl": 0.0, |
|
"learning_rate": 2.1621621621621622e-07, |
|
"logps/chosen": -256.0219421386719, |
|
"logps/rejected": -359.89410400390625, |
|
"loss": 0.1114, |
|
"rewards/chosen": 2.291504383087158, |
|
"rewards/margins": 13.204824447631836, |
|
"rewards/rejected": -10.913320541381836, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 21.19695472717285, |
|
"kl": 0.0, |
|
"learning_rate": 2.1443812233285914e-07, |
|
"logps/chosen": -253.9010009765625, |
|
"logps/rejected": -348.592041015625, |
|
"loss": 0.0887, |
|
"rewards/chosen": 2.41105580329895, |
|
"rewards/margins": 13.58563232421875, |
|
"rewards/rejected": -11.174577713012695, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 19.317626953125, |
|
"kl": 0.0, |
|
"learning_rate": 2.1266002844950212e-07, |
|
"logps/chosen": -245.3134765625, |
|
"logps/rejected": -380.17327880859375, |
|
"loss": 0.0998, |
|
"rewards/chosen": 2.4610061645507812, |
|
"rewards/margins": 13.953335762023926, |
|
"rewards/rejected": -11.492330551147461, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 22.688295364379883, |
|
"kl": 0.0, |
|
"learning_rate": 2.108819345661451e-07, |
|
"logps/chosen": -278.2052917480469, |
|
"logps/rejected": -357.1881408691406, |
|
"loss": 0.1063, |
|
"rewards/chosen": 2.404897928237915, |
|
"rewards/margins": 13.51880931854248, |
|
"rewards/rejected": -11.113912582397461, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 14.956331253051758, |
|
"kl": 0.0, |
|
"learning_rate": 2.0910384068278806e-07, |
|
"logps/chosen": -274.7865295410156, |
|
"logps/rejected": -348.69024658203125, |
|
"loss": 0.1178, |
|
"rewards/chosen": 2.560181140899658, |
|
"rewards/margins": 13.09942626953125, |
|
"rewards/rejected": -10.539244651794434, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 14.130335807800293, |
|
"kl": 0.0, |
|
"learning_rate": 2.0732574679943098e-07, |
|
"logps/chosen": -241.42257690429688, |
|
"logps/rejected": -374.1187438964844, |
|
"loss": 0.0728, |
|
"rewards/chosen": 2.970536470413208, |
|
"rewards/margins": 14.709304809570312, |
|
"rewards/rejected": -11.738768577575684, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 20.794334411621094, |
|
"kl": 0.0, |
|
"learning_rate": 2.0554765291607396e-07, |
|
"logps/chosen": -230.2430419921875, |
|
"logps/rejected": -380.54608154296875, |
|
"loss": 0.09, |
|
"rewards/chosen": 2.673881769180298, |
|
"rewards/margins": 14.4403076171875, |
|
"rewards/rejected": -11.766425132751465, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 16.297340393066406, |
|
"kl": 0.0, |
|
"learning_rate": 2.0376955903271693e-07, |
|
"logps/chosen": -264.10137939453125, |
|
"logps/rejected": -372.11346435546875, |
|
"loss": 0.0896, |
|
"rewards/chosen": 2.7595813274383545, |
|
"rewards/margins": 13.790555953979492, |
|
"rewards/rejected": -11.030974388122559, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 20.304162979125977, |
|
"kl": 0.0, |
|
"learning_rate": 2.0199146514935988e-07, |
|
"logps/chosen": -243.2149200439453, |
|
"logps/rejected": -342.080322265625, |
|
"loss": 0.1091, |
|
"rewards/chosen": 2.3006443977355957, |
|
"rewards/margins": 13.069559097290039, |
|
"rewards/rejected": -10.768914222717285, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 19.80646514892578, |
|
"kl": 0.0, |
|
"learning_rate": 2.0021337126600283e-07, |
|
"logps/chosen": -251.78524780273438, |
|
"logps/rejected": -360.4002380371094, |
|
"loss": 0.088, |
|
"rewards/chosen": 2.7110652923583984, |
|
"rewards/margins": 13.96354866027832, |
|
"rewards/rejected": -11.252483367919922, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 19.04472541809082, |
|
"kl": 0.0, |
|
"learning_rate": 1.984352773826458e-07, |
|
"logps/chosen": -227.35873413085938, |
|
"logps/rejected": -363.24713134765625, |
|
"loss": 0.0952, |
|
"rewards/chosen": 2.7294869422912598, |
|
"rewards/margins": 14.287317276000977, |
|
"rewards/rejected": -11.557830810546875, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 14.0511474609375, |
|
"kl": 0.0, |
|
"learning_rate": 1.9665718349928875e-07, |
|
"logps/chosen": -248.2740478515625, |
|
"logps/rejected": -359.29534912109375, |
|
"loss": 0.0783, |
|
"rewards/chosen": 2.7295162677764893, |
|
"rewards/margins": 14.418438911437988, |
|
"rewards/rejected": -11.688921928405762, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 21.88315773010254, |
|
"kl": 0.0, |
|
"learning_rate": 1.9487908961593172e-07, |
|
"logps/chosen": -228.99447631835938, |
|
"logps/rejected": -387.5201721191406, |
|
"loss": 0.0771, |
|
"rewards/chosen": 2.3790504932403564, |
|
"rewards/margins": 13.84051513671875, |
|
"rewards/rejected": -11.461464881896973, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 15.932575225830078, |
|
"kl": 0.0, |
|
"learning_rate": 1.931009957325747e-07, |
|
"logps/chosen": -278.6766357421875, |
|
"logps/rejected": -372.94793701171875, |
|
"loss": 0.0826, |
|
"rewards/chosen": 2.363306760787964, |
|
"rewards/margins": 14.281936645507812, |
|
"rewards/rejected": -11.918628692626953, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 11.954800605773926, |
|
"kl": 0.0, |
|
"learning_rate": 1.9132290184921761e-07, |
|
"logps/chosen": -223.6442108154297, |
|
"logps/rejected": -371.5616149902344, |
|
"loss": 0.0961, |
|
"rewards/chosen": 2.332761526107788, |
|
"rewards/margins": 13.71965217590332, |
|
"rewards/rejected": -11.386890411376953, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 14.154836654663086, |
|
"kl": 0.0, |
|
"learning_rate": 1.895448079658606e-07, |
|
"logps/chosen": -223.6517791748047, |
|
"logps/rejected": -386.1749267578125, |
|
"loss": 0.0908, |
|
"rewards/chosen": 2.7226128578186035, |
|
"rewards/margins": 14.438260078430176, |
|
"rewards/rejected": -11.71564769744873, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 18.134370803833008, |
|
"kl": 0.0, |
|
"learning_rate": 1.8776671408250356e-07, |
|
"logps/chosen": -260.0550842285156, |
|
"logps/rejected": -346.24127197265625, |
|
"loss": 0.1007, |
|
"rewards/chosen": 2.5339343547821045, |
|
"rewards/margins": 13.723528861999512, |
|
"rewards/rejected": -11.189595222473145, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 15.416353225708008, |
|
"kl": 0.0, |
|
"learning_rate": 1.859886201991465e-07, |
|
"logps/chosen": -247.7535400390625, |
|
"logps/rejected": -371.42034912109375, |
|
"loss": 0.0912, |
|
"rewards/chosen": 2.3352103233337402, |
|
"rewards/margins": 13.56823444366455, |
|
"rewards/rejected": -11.233022689819336, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 18.23054313659668, |
|
"kl": 0.0, |
|
"learning_rate": 1.8421052631578946e-07, |
|
"logps/chosen": -213.8344268798828, |
|
"logps/rejected": -369.88702392578125, |
|
"loss": 0.0758, |
|
"rewards/chosen": 2.6615304946899414, |
|
"rewards/margins": 14.028742790222168, |
|
"rewards/rejected": -11.367212295532227, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 18.42432975769043, |
|
"kl": 0.0, |
|
"learning_rate": 1.8243243243243243e-07, |
|
"logps/chosen": -238.5988006591797, |
|
"logps/rejected": -392.305419921875, |
|
"loss": 0.0965, |
|
"rewards/chosen": 2.5309016704559326, |
|
"rewards/margins": 14.56842041015625, |
|
"rewards/rejected": -12.037518501281738, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 15.395295143127441, |
|
"kl": 0.0, |
|
"learning_rate": 1.8065433854907538e-07, |
|
"logps/chosen": -242.9210968017578, |
|
"logps/rejected": -375.423828125, |
|
"loss": 0.0985, |
|
"rewards/chosen": 2.644166946411133, |
|
"rewards/margins": 13.905471801757812, |
|
"rewards/rejected": -11.26130485534668, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 16.343006134033203, |
|
"kl": 0.0, |
|
"learning_rate": 1.7887624466571835e-07, |
|
"logps/chosen": -198.0528106689453, |
|
"logps/rejected": -354.1863098144531, |
|
"loss": 0.092, |
|
"rewards/chosen": 2.4538371562957764, |
|
"rewards/margins": 12.678072929382324, |
|
"rewards/rejected": -10.224235534667969, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 19.673669815063477, |
|
"kl": 0.0, |
|
"learning_rate": 1.770981507823613e-07, |
|
"logps/chosen": -241.1757049560547, |
|
"logps/rejected": -339.1280822753906, |
|
"loss": 0.088, |
|
"rewards/chosen": 2.550891876220703, |
|
"rewards/margins": 13.831698417663574, |
|
"rewards/rejected": -11.280807495117188, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 15.624777793884277, |
|
"kl": 0.0, |
|
"learning_rate": 1.7532005689900424e-07, |
|
"logps/chosen": -247.8914031982422, |
|
"logps/rejected": -364.5996398925781, |
|
"loss": 0.1038, |
|
"rewards/chosen": 2.5451152324676514, |
|
"rewards/margins": 13.162847518920898, |
|
"rewards/rejected": -10.617732048034668, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 16.0270938873291, |
|
"kl": 0.0, |
|
"learning_rate": 1.7354196301564722e-07, |
|
"logps/chosen": -271.2415466308594, |
|
"logps/rejected": -348.6362609863281, |
|
"loss": 0.0832, |
|
"rewards/chosen": 2.6901183128356934, |
|
"rewards/margins": 13.692281723022461, |
|
"rewards/rejected": -11.002164840698242, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 16.198070526123047, |
|
"kl": 0.0, |
|
"learning_rate": 1.717638691322902e-07, |
|
"logps/chosen": -228.4770965576172, |
|
"logps/rejected": -371.384521484375, |
|
"loss": 0.0879, |
|
"rewards/chosen": 2.317864179611206, |
|
"rewards/margins": 14.147501945495605, |
|
"rewards/rejected": -11.82963752746582, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 24.463367462158203, |
|
"kl": 0.0, |
|
"learning_rate": 1.6998577524893314e-07, |
|
"logps/chosen": -271.0051574707031, |
|
"logps/rejected": -403.8445129394531, |
|
"loss": 0.0817, |
|
"rewards/chosen": 2.82243275642395, |
|
"rewards/margins": 14.68195629119873, |
|
"rewards/rejected": -11.859524726867676, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 21.81173324584961, |
|
"kl": 0.0, |
|
"learning_rate": 1.6820768136557609e-07, |
|
"logps/chosen": -209.64572143554688, |
|
"logps/rejected": -360.5612487792969, |
|
"loss": 0.0849, |
|
"rewards/chosen": 2.4915404319763184, |
|
"rewards/margins": 13.439886093139648, |
|
"rewards/rejected": -10.948348045349121, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 20.325061798095703, |
|
"kl": 0.0, |
|
"learning_rate": 1.6642958748221906e-07, |
|
"logps/chosen": -244.2580108642578, |
|
"logps/rejected": -348.1650390625, |
|
"loss": 0.0804, |
|
"rewards/chosen": 2.744297742843628, |
|
"rewards/margins": 13.572134017944336, |
|
"rewards/rejected": -10.827836990356445, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 24.380229949951172, |
|
"kl": 0.0, |
|
"learning_rate": 1.64651493598862e-07, |
|
"logps/chosen": -253.1858673095703, |
|
"logps/rejected": -348.04144287109375, |
|
"loss": 0.0846, |
|
"rewards/chosen": 2.656752824783325, |
|
"rewards/margins": 13.392297744750977, |
|
"rewards/rejected": -10.735544204711914, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 12.965998649597168, |
|
"kl": 0.0, |
|
"learning_rate": 1.6287339971550498e-07, |
|
"logps/chosen": -262.8606262207031, |
|
"logps/rejected": -356.92242431640625, |
|
"loss": 0.0783, |
|
"rewards/chosen": 2.8926401138305664, |
|
"rewards/margins": 14.1354341506958, |
|
"rewards/rejected": -11.242793083190918, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 16.383338928222656, |
|
"kl": 0.0, |
|
"learning_rate": 1.6109530583214793e-07, |
|
"logps/chosen": -300.94384765625, |
|
"logps/rejected": -358.71588134765625, |
|
"loss": 0.0951, |
|
"rewards/chosen": 2.539355993270874, |
|
"rewards/margins": 13.336338996887207, |
|
"rewards/rejected": -10.79698371887207, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 15.756513595581055, |
|
"kl": 0.0, |
|
"learning_rate": 1.5931721194879087e-07, |
|
"logps/chosen": -261.84637451171875, |
|
"logps/rejected": -374.3198547363281, |
|
"loss": 0.0791, |
|
"rewards/chosen": 2.755915880203247, |
|
"rewards/margins": 14.211552619934082, |
|
"rewards/rejected": -11.455635070800781, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 18.087120056152344, |
|
"kl": 0.0, |
|
"learning_rate": 1.5753911806543385e-07, |
|
"logps/chosen": -274.73370361328125, |
|
"logps/rejected": -326.76123046875, |
|
"loss": 0.0935, |
|
"rewards/chosen": 2.786874294281006, |
|
"rewards/margins": 12.87488079071045, |
|
"rewards/rejected": -10.088006973266602, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 15.65069580078125, |
|
"kl": 0.0, |
|
"learning_rate": 1.5576102418207682e-07, |
|
"logps/chosen": -235.80062866210938, |
|
"logps/rejected": -340.6771545410156, |
|
"loss": 0.1005, |
|
"rewards/chosen": 2.691729784011841, |
|
"rewards/margins": 13.200857162475586, |
|
"rewards/rejected": -10.509127616882324, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 25.325843811035156, |
|
"kl": 0.0, |
|
"learning_rate": 1.5398293029871974e-07, |
|
"logps/chosen": -244.3973388671875, |
|
"logps/rejected": -343.04412841796875, |
|
"loss": 0.0883, |
|
"rewards/chosen": 2.485719680786133, |
|
"rewards/margins": 13.161664962768555, |
|
"rewards/rejected": -10.675946235656738, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 17.044322967529297, |
|
"kl": 0.0, |
|
"learning_rate": 1.5220483641536272e-07, |
|
"logps/chosen": -244.70559692382812, |
|
"logps/rejected": -358.86920166015625, |
|
"loss": 0.0987, |
|
"rewards/chosen": 2.5949511528015137, |
|
"rewards/margins": 13.485272407531738, |
|
"rewards/rejected": -10.890320777893066, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 27.13024139404297, |
|
"kl": 0.0, |
|
"learning_rate": 1.504267425320057e-07, |
|
"logps/chosen": -247.91641235351562, |
|
"logps/rejected": -343.4930419921875, |
|
"loss": 0.0899, |
|
"rewards/chosen": 2.4175989627838135, |
|
"rewards/margins": 13.832735061645508, |
|
"rewards/rejected": -11.415135383605957, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 14.320551872253418, |
|
"kl": 0.0, |
|
"learning_rate": 1.4864864864864866e-07, |
|
"logps/chosen": -246.96670532226562, |
|
"logps/rejected": -370.0877990722656, |
|
"loss": 0.0758, |
|
"rewards/chosen": 2.691598653793335, |
|
"rewards/margins": 13.812780380249023, |
|
"rewards/rejected": -11.121182441711426, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 16.582712173461914, |
|
"kl": 0.0, |
|
"learning_rate": 1.4687055476529158e-07, |
|
"logps/chosen": -224.07785034179688, |
|
"logps/rejected": -349.31927490234375, |
|
"loss": 0.082, |
|
"rewards/chosen": 2.6934258937835693, |
|
"rewards/margins": 13.236696243286133, |
|
"rewards/rejected": -10.543269157409668, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 17.989919662475586, |
|
"kl": 0.0, |
|
"learning_rate": 1.4509246088193456e-07, |
|
"logps/chosen": -243.8923797607422, |
|
"logps/rejected": -367.2593688964844, |
|
"loss": 0.0846, |
|
"rewards/chosen": 2.7338271141052246, |
|
"rewards/margins": 14.203822135925293, |
|
"rewards/rejected": -11.469995498657227, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 17.582975387573242, |
|
"kl": 0.0, |
|
"learning_rate": 1.4331436699857753e-07, |
|
"logps/chosen": -227.1583709716797, |
|
"logps/rejected": -378.5196228027344, |
|
"loss": 0.1011, |
|
"rewards/chosen": 2.531038284301758, |
|
"rewards/margins": 13.578967094421387, |
|
"rewards/rejected": -11.047929763793945, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 8.821680068969727, |
|
"kl": 0.0, |
|
"learning_rate": 1.4153627311522048e-07, |
|
"logps/chosen": -277.82684326171875, |
|
"logps/rejected": -364.6512145996094, |
|
"loss": 0.0742, |
|
"rewards/chosen": 2.9555764198303223, |
|
"rewards/margins": 14.14686393737793, |
|
"rewards/rejected": -11.19128704071045, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 15.817394256591797, |
|
"kl": 0.0, |
|
"learning_rate": 1.3975817923186345e-07, |
|
"logps/chosen": -271.718017578125, |
|
"logps/rejected": -352.4165954589844, |
|
"loss": 0.0848, |
|
"rewards/chosen": 2.5184245109558105, |
|
"rewards/margins": 13.12096881866455, |
|
"rewards/rejected": -10.602544784545898, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 18.2441463470459, |
|
"kl": 0.0, |
|
"learning_rate": 1.379800853485064e-07, |
|
"logps/chosen": -232.19467163085938, |
|
"logps/rejected": -379.3485412597656, |
|
"loss": 0.0909, |
|
"rewards/chosen": 2.5335030555725098, |
|
"rewards/margins": 14.489652633666992, |
|
"rewards/rejected": -11.956149101257324, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 30.594867706298828, |
|
"kl": 0.0, |
|
"learning_rate": 1.3620199146514935e-07, |
|
"logps/chosen": -265.6886291503906, |
|
"logps/rejected": -361.285400390625, |
|
"loss": 0.0964, |
|
"rewards/chosen": 2.450854778289795, |
|
"rewards/margins": 13.442533493041992, |
|
"rewards/rejected": -10.991681098937988, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 12.528332710266113, |
|
"kl": 0.0, |
|
"learning_rate": 1.3442389758179232e-07, |
|
"logps/chosen": -288.8167724609375, |
|
"logps/rejected": -352.4346618652344, |
|
"loss": 0.0741, |
|
"rewards/chosen": 2.7889742851257324, |
|
"rewards/margins": 14.034700393676758, |
|
"rewards/rejected": -11.245725631713867, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 13.565973281860352, |
|
"kl": 0.0, |
|
"learning_rate": 1.326458036984353e-07, |
|
"logps/chosen": -263.8364562988281, |
|
"logps/rejected": -346.26019287109375, |
|
"loss": 0.0843, |
|
"rewards/chosen": 2.638777256011963, |
|
"rewards/margins": 13.351663589477539, |
|
"rewards/rejected": -10.712886810302734, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 21.124759674072266, |
|
"kl": 0.0, |
|
"learning_rate": 1.3086770981507821e-07, |
|
"logps/chosen": -245.15576171875, |
|
"logps/rejected": -372.0450134277344, |
|
"loss": 0.0898, |
|
"rewards/chosen": 2.657459259033203, |
|
"rewards/margins": 14.276753425598145, |
|
"rewards/rejected": -11.619293212890625, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 15.396940231323242, |
|
"kl": 0.0, |
|
"learning_rate": 1.290896159317212e-07, |
|
"logps/chosen": -247.9556121826172, |
|
"logps/rejected": -371.3470764160156, |
|
"loss": 0.0828, |
|
"rewards/chosen": 2.4249672889709473, |
|
"rewards/margins": 14.25316333770752, |
|
"rewards/rejected": -11.828195571899414, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 14.409557342529297, |
|
"kl": 0.0, |
|
"learning_rate": 1.2731152204836416e-07, |
|
"logps/chosen": -252.07241821289062, |
|
"logps/rejected": -361.1800842285156, |
|
"loss": 0.085, |
|
"rewards/chosen": 2.680985689163208, |
|
"rewards/margins": 14.30175495147705, |
|
"rewards/rejected": -11.620769500732422, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 13.202485084533691, |
|
"kl": 0.0, |
|
"learning_rate": 1.255334281650071e-07, |
|
"logps/chosen": -237.88565063476562, |
|
"logps/rejected": -350.7021179199219, |
|
"loss": 0.0874, |
|
"rewards/chosen": 2.6545567512512207, |
|
"rewards/margins": 13.95142936706543, |
|
"rewards/rejected": -11.29687213897705, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 28.25654411315918, |
|
"kl": 0.0, |
|
"learning_rate": 1.2375533428165005e-07, |
|
"logps/chosen": -282.26031494140625, |
|
"logps/rejected": -377.5342712402344, |
|
"loss": 0.0771, |
|
"rewards/chosen": 2.6503818035125732, |
|
"rewards/margins": 14.911274909973145, |
|
"rewards/rejected": -12.260892868041992, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 20.115049362182617, |
|
"kl": 0.0, |
|
"learning_rate": 1.2197724039829303e-07, |
|
"logps/chosen": -234.886962890625, |
|
"logps/rejected": -347.6160583496094, |
|
"loss": 0.0879, |
|
"rewards/chosen": 2.644916296005249, |
|
"rewards/margins": 14.264989852905273, |
|
"rewards/rejected": -11.620074272155762, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 17.797800064086914, |
|
"kl": 0.0, |
|
"learning_rate": 1.2019914651493598e-07, |
|
"logps/chosen": -246.7455291748047, |
|
"logps/rejected": -392.910400390625, |
|
"loss": 0.0791, |
|
"rewards/chosen": 2.706545352935791, |
|
"rewards/margins": 14.843210220336914, |
|
"rewards/rejected": -12.136663436889648, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 12.662943840026855, |
|
"kl": 0.0, |
|
"learning_rate": 1.1842105263157894e-07, |
|
"logps/chosen": -225.7235870361328, |
|
"logps/rejected": -402.0716247558594, |
|
"loss": 0.0925, |
|
"rewards/chosen": 2.4518020153045654, |
|
"rewards/margins": 14.207118034362793, |
|
"rewards/rejected": -11.755315780639648, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 16.056779861450195, |
|
"kl": 0.0, |
|
"learning_rate": 1.166429587482219e-07, |
|
"logps/chosen": -215.66708374023438, |
|
"logps/rejected": -375.41717529296875, |
|
"loss": 0.0844, |
|
"rewards/chosen": 2.730950355529785, |
|
"rewards/margins": 14.23332691192627, |
|
"rewards/rejected": -11.502375602722168, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 16.644628524780273, |
|
"kl": 0.0, |
|
"learning_rate": 1.1486486486486487e-07, |
|
"logps/chosen": -266.43878173828125, |
|
"logps/rejected": -358.8226318359375, |
|
"loss": 0.0835, |
|
"rewards/chosen": 2.870779514312744, |
|
"rewards/margins": 14.435707092285156, |
|
"rewards/rejected": -11.56492805480957, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 8.560157775878906, |
|
"kl": 0.0, |
|
"learning_rate": 1.1308677098150782e-07, |
|
"logps/chosen": -238.55319213867188, |
|
"logps/rejected": -390.98358154296875, |
|
"loss": 0.0804, |
|
"rewards/chosen": 2.759446620941162, |
|
"rewards/margins": 14.843562126159668, |
|
"rewards/rejected": -12.084115028381348, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 16.91299819946289, |
|
"kl": 0.0, |
|
"learning_rate": 1.1130867709815078e-07, |
|
"logps/chosen": -234.5216064453125, |
|
"logps/rejected": -338.272705078125, |
|
"loss": 0.093, |
|
"rewards/chosen": 2.591742992401123, |
|
"rewards/margins": 14.04884147644043, |
|
"rewards/rejected": -11.457098007202148, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 13.1578369140625, |
|
"kl": 0.0, |
|
"learning_rate": 1.0953058321479374e-07, |
|
"logps/chosen": -275.4759826660156, |
|
"logps/rejected": -379.9882507324219, |
|
"loss": 0.0692, |
|
"rewards/chosen": 2.984293222427368, |
|
"rewards/margins": 15.574743270874023, |
|
"rewards/rejected": -12.59045124053955, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 17.815027236938477, |
|
"kl": 0.0, |
|
"learning_rate": 1.077524893314367e-07, |
|
"logps/chosen": -267.3926696777344, |
|
"logps/rejected": -353.8580017089844, |
|
"loss": 0.0725, |
|
"rewards/chosen": 2.5867929458618164, |
|
"rewards/margins": 14.201835632324219, |
|
"rewards/rejected": -11.615041732788086, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 11.758225440979004, |
|
"kl": 0.0, |
|
"learning_rate": 1.0597439544807964e-07, |
|
"logps/chosen": -249.6155242919922, |
|
"logps/rejected": -364.9371337890625, |
|
"loss": 0.0769, |
|
"rewards/chosen": 2.7065796852111816, |
|
"rewards/margins": 14.05157470703125, |
|
"rewards/rejected": -11.344995498657227, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 16.8466739654541, |
|
"kl": 0.0, |
|
"learning_rate": 1.0419630156472262e-07, |
|
"logps/chosen": -238.05673217773438, |
|
"logps/rejected": -373.03546142578125, |
|
"loss": 0.0748, |
|
"rewards/chosen": 2.9012789726257324, |
|
"rewards/margins": 14.46104621887207, |
|
"rewards/rejected": -11.55976676940918, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 14.22836685180664, |
|
"kl": 0.0, |
|
"learning_rate": 1.0241820768136557e-07, |
|
"logps/chosen": -224.74887084960938, |
|
"logps/rejected": -332.44415283203125, |
|
"loss": 0.084, |
|
"rewards/chosen": 2.65814208984375, |
|
"rewards/margins": 13.293182373046875, |
|
"rewards/rejected": -10.635040283203125, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 11.591891288757324, |
|
"kl": 0.0, |
|
"learning_rate": 1.0064011379800854e-07, |
|
"logps/chosen": -280.44427490234375, |
|
"logps/rejected": -365.83392333984375, |
|
"loss": 0.0801, |
|
"rewards/chosen": 2.5924744606018066, |
|
"rewards/margins": 14.31025505065918, |
|
"rewards/rejected": -11.717779159545898, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 14.504942893981934, |
|
"kl": 0.0, |
|
"learning_rate": 9.886201991465149e-08, |
|
"logps/chosen": -277.6451416015625, |
|
"logps/rejected": -350.4718322753906, |
|
"loss": 0.0661, |
|
"rewards/chosen": 2.9364330768585205, |
|
"rewards/margins": 14.681096076965332, |
|
"rewards/rejected": -11.744662284851074, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 16.065649032592773, |
|
"kl": 0.0, |
|
"learning_rate": 9.708392603129445e-08, |
|
"logps/chosen": -262.8613586425781, |
|
"logps/rejected": -391.71649169921875, |
|
"loss": 0.0723, |
|
"rewards/chosen": 2.7925260066986084, |
|
"rewards/margins": 15.433810234069824, |
|
"rewards/rejected": -12.641283988952637, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 21.038543701171875, |
|
"kl": 0.0, |
|
"learning_rate": 9.530583214793741e-08, |
|
"logps/chosen": -262.80584716796875, |
|
"logps/rejected": -380.09625244140625, |
|
"loss": 0.0801, |
|
"rewards/chosen": 2.761460781097412, |
|
"rewards/margins": 15.412538528442383, |
|
"rewards/rejected": -12.651077270507812, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 18.872102737426758, |
|
"kl": 0.0, |
|
"learning_rate": 9.352773826458037e-08, |
|
"logps/chosen": -285.86712646484375, |
|
"logps/rejected": -342.8421325683594, |
|
"loss": 0.0819, |
|
"rewards/chosen": 2.6993701457977295, |
|
"rewards/margins": 14.52336311340332, |
|
"rewards/rejected": -11.823991775512695, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 15.029848098754883, |
|
"kl": 0.0, |
|
"learning_rate": 9.174964438122331e-08, |
|
"logps/chosen": -245.3412322998047, |
|
"logps/rejected": -356.58172607421875, |
|
"loss": 0.094, |
|
"rewards/chosen": 2.818472385406494, |
|
"rewards/margins": 14.51873779296875, |
|
"rewards/rejected": -11.700265884399414, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 17.082958221435547, |
|
"kl": 0.0, |
|
"learning_rate": 8.997155049786629e-08, |
|
"logps/chosen": -286.61383056640625, |
|
"logps/rejected": -395.6304626464844, |
|
"loss": 0.0716, |
|
"rewards/chosen": 2.8008289337158203, |
|
"rewards/margins": 15.597787857055664, |
|
"rewards/rejected": -12.796960830688477, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 22.768911361694336, |
|
"kl": 0.0, |
|
"learning_rate": 8.819345661450925e-08, |
|
"logps/chosen": -250.6887664794922, |
|
"logps/rejected": -358.8474426269531, |
|
"loss": 0.0929, |
|
"rewards/chosen": 2.7452361583709717, |
|
"rewards/margins": 14.326945304870605, |
|
"rewards/rejected": -11.581708908081055, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 21.478580474853516, |
|
"kl": 0.0, |
|
"learning_rate": 8.64153627311522e-08, |
|
"logps/chosen": -267.79974365234375, |
|
"logps/rejected": -369.04815673828125, |
|
"loss": 0.0838, |
|
"rewards/chosen": 2.744828224182129, |
|
"rewards/margins": 14.584070205688477, |
|
"rewards/rejected": -11.839241027832031, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 12.477672576904297, |
|
"kl": 0.0, |
|
"learning_rate": 8.463726884779517e-08, |
|
"logps/chosen": -239.75399780273438, |
|
"logps/rejected": -383.63812255859375, |
|
"loss": 0.0833, |
|
"rewards/chosen": 2.6762642860412598, |
|
"rewards/margins": 14.272786140441895, |
|
"rewards/rejected": -11.596521377563477, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 19.415245056152344, |
|
"kl": 0.0, |
|
"learning_rate": 8.285917496443812e-08, |
|
"logps/chosen": -258.97406005859375, |
|
"logps/rejected": -372.96173095703125, |
|
"loss": 0.0874, |
|
"rewards/chosen": 2.7613649368286133, |
|
"rewards/margins": 14.08574390411377, |
|
"rewards/rejected": -11.324378967285156, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 13.463936805725098, |
|
"kl": 0.0, |
|
"learning_rate": 8.108108108108108e-08, |
|
"logps/chosen": -228.6225128173828, |
|
"logps/rejected": -344.24688720703125, |
|
"loss": 0.0777, |
|
"rewards/chosen": 2.942438840866089, |
|
"rewards/margins": 14.587237358093262, |
|
"rewards/rejected": -11.644798278808594, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 13.836956977844238, |
|
"kl": 0.0, |
|
"learning_rate": 7.930298719772404e-08, |
|
"logps/chosen": -247.45742797851562, |
|
"logps/rejected": -371.2969055175781, |
|
"loss": 0.0721, |
|
"rewards/chosen": 3.017321825027466, |
|
"rewards/margins": 14.967196464538574, |
|
"rewards/rejected": -11.949874877929688, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 16.551321029663086, |
|
"kl": 0.0, |
|
"learning_rate": 7.7524893314367e-08, |
|
"logps/chosen": -230.99227905273438, |
|
"logps/rejected": -384.6443786621094, |
|
"loss": 0.0871, |
|
"rewards/chosen": 2.6670827865600586, |
|
"rewards/margins": 14.841870307922363, |
|
"rewards/rejected": -12.174787521362305, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 15.87748908996582, |
|
"kl": 0.0, |
|
"learning_rate": 7.574679943100994e-08, |
|
"logps/chosen": -266.7972717285156, |
|
"logps/rejected": -335.5400390625, |
|
"loss": 0.0976, |
|
"rewards/chosen": 2.702658176422119, |
|
"rewards/margins": 13.561399459838867, |
|
"rewards/rejected": -10.858741760253906, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 17.662395477294922, |
|
"kl": 0.0, |
|
"learning_rate": 7.396870554765292e-08, |
|
"logps/chosen": -269.71026611328125, |
|
"logps/rejected": -383.00628662109375, |
|
"loss": 0.0921, |
|
"rewards/chosen": 2.5814826488494873, |
|
"rewards/margins": 14.34886646270752, |
|
"rewards/rejected": -11.76738452911377, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 16.09382438659668, |
|
"kl": 0.0, |
|
"learning_rate": 7.219061166429587e-08, |
|
"logps/chosen": -269.3471984863281, |
|
"logps/rejected": -375.32208251953125, |
|
"loss": 0.0886, |
|
"rewards/chosen": 2.8077282905578613, |
|
"rewards/margins": 14.510396003723145, |
|
"rewards/rejected": -11.702667236328125, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 13.27104377746582, |
|
"kl": 0.0, |
|
"learning_rate": 7.041251778093883e-08, |
|
"logps/chosen": -273.42950439453125, |
|
"logps/rejected": -371.14837646484375, |
|
"loss": 0.0685, |
|
"rewards/chosen": 2.944091558456421, |
|
"rewards/margins": 15.448992729187012, |
|
"rewards/rejected": -12.504900932312012, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 12.18476390838623, |
|
"kl": 0.0, |
|
"learning_rate": 6.863442389758179e-08, |
|
"logps/chosen": -256.47161865234375, |
|
"logps/rejected": -384.1866760253906, |
|
"loss": 0.0742, |
|
"rewards/chosen": 2.7861623764038086, |
|
"rewards/margins": 15.34550952911377, |
|
"rewards/rejected": -12.559347152709961, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 23.00941276550293, |
|
"kl": 0.0, |
|
"learning_rate": 6.685633001422475e-08, |
|
"logps/chosen": -242.3636474609375, |
|
"logps/rejected": -404.00848388671875, |
|
"loss": 0.0747, |
|
"rewards/chosen": 2.5074126720428467, |
|
"rewards/margins": 14.919418334960938, |
|
"rewards/rejected": -12.412006378173828, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 10.992572784423828, |
|
"kl": 0.0, |
|
"learning_rate": 6.507823613086771e-08, |
|
"logps/chosen": -248.75967407226562, |
|
"logps/rejected": -368.59063720703125, |
|
"loss": 0.0867, |
|
"rewards/chosen": 2.401717185974121, |
|
"rewards/margins": 14.726252555847168, |
|
"rewards/rejected": -12.32453441619873, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 11.035951614379883, |
|
"kl": 0.0, |
|
"learning_rate": 6.330014224751067e-08, |
|
"logps/chosen": -240.3590850830078, |
|
"logps/rejected": -347.6131896972656, |
|
"loss": 0.0921, |
|
"rewards/chosen": 2.680974245071411, |
|
"rewards/margins": 14.051698684692383, |
|
"rewards/rejected": -11.370722770690918, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 21.619415283203125, |
|
"kl": 0.0, |
|
"learning_rate": 6.152204836415363e-08, |
|
"logps/chosen": -240.7654266357422, |
|
"logps/rejected": -381.09307861328125, |
|
"loss": 0.087, |
|
"rewards/chosen": 2.646531105041504, |
|
"rewards/margins": 14.633687019348145, |
|
"rewards/rejected": -11.987154960632324, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 20.996952056884766, |
|
"kl": 0.0, |
|
"learning_rate": 5.974395448079659e-08, |
|
"logps/chosen": -228.12826538085938, |
|
"logps/rejected": -373.8050231933594, |
|
"loss": 0.0903, |
|
"rewards/chosen": 2.5932085514068604, |
|
"rewards/margins": 14.422918319702148, |
|
"rewards/rejected": -11.829710960388184, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 12.941699981689453, |
|
"kl": 0.0, |
|
"learning_rate": 5.796586059743954e-08, |
|
"logps/chosen": -266.34759521484375, |
|
"logps/rejected": -377.67572021484375, |
|
"loss": 0.0733, |
|
"rewards/chosen": 2.6969380378723145, |
|
"rewards/margins": 14.922724723815918, |
|
"rewards/rejected": -12.225786209106445, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 18.563539505004883, |
|
"kl": 0.0, |
|
"learning_rate": 5.61877667140825e-08, |
|
"logps/chosen": -233.05337524414062, |
|
"logps/rejected": -351.65478515625, |
|
"loss": 0.0819, |
|
"rewards/chosen": 2.4380977153778076, |
|
"rewards/margins": 14.38970947265625, |
|
"rewards/rejected": -11.951611518859863, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 14.77774715423584, |
|
"kl": 0.0, |
|
"learning_rate": 5.4409672830725456e-08, |
|
"logps/chosen": -272.6517028808594, |
|
"logps/rejected": -359.89013671875, |
|
"loss": 0.0762, |
|
"rewards/chosen": 2.888676404953003, |
|
"rewards/margins": 14.460576057434082, |
|
"rewards/rejected": -11.571900367736816, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 15.590229988098145, |
|
"kl": 0.0, |
|
"learning_rate": 5.2631578947368416e-08, |
|
"logps/chosen": -256.3451843261719, |
|
"logps/rejected": -375.3144226074219, |
|
"loss": 0.0762, |
|
"rewards/chosen": 2.667102336883545, |
|
"rewards/margins": 15.10230827331543, |
|
"rewards/rejected": -12.435206413269043, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 10.8274507522583, |
|
"kl": 0.0, |
|
"learning_rate": 5.0853485064011376e-08, |
|
"logps/chosen": -268.83636474609375, |
|
"logps/rejected": -372.27838134765625, |
|
"loss": 0.0648, |
|
"rewards/chosen": 2.9176650047302246, |
|
"rewards/margins": 14.840046882629395, |
|
"rewards/rejected": -11.922381401062012, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 15.774340629577637, |
|
"kl": 0.0, |
|
"learning_rate": 4.9075391180654337e-08, |
|
"logps/chosen": -260.6099853515625, |
|
"logps/rejected": -358.04241943359375, |
|
"loss": 0.0585, |
|
"rewards/chosen": 2.9233345985412598, |
|
"rewards/margins": 14.653867721557617, |
|
"rewards/rejected": -11.730535507202148, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 15.554561614990234, |
|
"kl": 0.0, |
|
"learning_rate": 4.72972972972973e-08, |
|
"logps/chosen": -238.23739624023438, |
|
"logps/rejected": -386.2373962402344, |
|
"loss": 0.0833, |
|
"rewards/chosen": 2.658268451690674, |
|
"rewards/margins": 14.15376091003418, |
|
"rewards/rejected": -11.49549388885498, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 17.29914093017578, |
|
"kl": 0.0, |
|
"learning_rate": 4.551920341394026e-08, |
|
"logps/chosen": -248.63204956054688, |
|
"logps/rejected": -371.215087890625, |
|
"loss": 0.0872, |
|
"rewards/chosen": 2.9267373085021973, |
|
"rewards/margins": 14.192481994628906, |
|
"rewards/rejected": -11.265745162963867, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 14.301885604858398, |
|
"kl": 0.0, |
|
"learning_rate": 4.374110953058322e-08, |
|
"logps/chosen": -240.0107421875, |
|
"logps/rejected": -355.2042541503906, |
|
"loss": 0.0846, |
|
"rewards/chosen": 2.4140007495880127, |
|
"rewards/margins": 13.864062309265137, |
|
"rewards/rejected": -11.450060844421387, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 13.32541275024414, |
|
"kl": 0.0, |
|
"learning_rate": 4.196301564722617e-08, |
|
"logps/chosen": -247.22152709960938, |
|
"logps/rejected": -360.07110595703125, |
|
"loss": 0.0659, |
|
"rewards/chosen": 2.7749335765838623, |
|
"rewards/margins": 14.867749214172363, |
|
"rewards/rejected": -12.092815399169922, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 12.356550216674805, |
|
"kl": 0.0, |
|
"learning_rate": 4.018492176386913e-08, |
|
"logps/chosen": -256.69683837890625, |
|
"logps/rejected": -363.08489990234375, |
|
"loss": 0.0799, |
|
"rewards/chosen": 2.8861594200134277, |
|
"rewards/margins": 14.505203247070312, |
|
"rewards/rejected": -11.619044303894043, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 14.969185829162598, |
|
"kl": 0.0, |
|
"learning_rate": 3.840682788051209e-08, |
|
"logps/chosen": -254.7665252685547, |
|
"logps/rejected": -354.30841064453125, |
|
"loss": 0.0757, |
|
"rewards/chosen": 2.8421378135681152, |
|
"rewards/margins": 14.373845100402832, |
|
"rewards/rejected": -11.531707763671875, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 17.030691146850586, |
|
"kl": 0.0, |
|
"learning_rate": 3.6628733997155046e-08, |
|
"logps/chosen": -263.8084411621094, |
|
"logps/rejected": -395.4825439453125, |
|
"loss": 0.0759, |
|
"rewards/chosen": 2.9121432304382324, |
|
"rewards/margins": 15.505640983581543, |
|
"rewards/rejected": -12.593496322631836, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 12.211559295654297, |
|
"kl": 0.0, |
|
"learning_rate": 3.4850640113798006e-08, |
|
"logps/chosen": -236.26431274414062, |
|
"logps/rejected": -357.1573791503906, |
|
"loss": 0.0755, |
|
"rewards/chosen": 2.843881130218506, |
|
"rewards/margins": 14.507527351379395, |
|
"rewards/rejected": -11.66364574432373, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 17.574806213378906, |
|
"kl": 0.0, |
|
"learning_rate": 3.3072546230440967e-08, |
|
"logps/chosen": -285.59942626953125, |
|
"logps/rejected": -368.08245849609375, |
|
"loss": 0.0676, |
|
"rewards/chosen": 2.877531051635742, |
|
"rewards/margins": 14.671765327453613, |
|
"rewards/rejected": -11.794233322143555, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 13.167614936828613, |
|
"kl": 0.0, |
|
"learning_rate": 3.129445234708392e-08, |
|
"logps/chosen": -240.7215576171875, |
|
"logps/rejected": -382.0995788574219, |
|
"loss": 0.0651, |
|
"rewards/chosen": 2.778978109359741, |
|
"rewards/margins": 14.833264350891113, |
|
"rewards/rejected": -12.054286003112793, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 19.462112426757812, |
|
"kl": 0.0, |
|
"learning_rate": 2.9516358463726884e-08, |
|
"logps/chosen": -243.0684356689453, |
|
"logps/rejected": -366.4331359863281, |
|
"loss": 0.0802, |
|
"rewards/chosen": 2.8256452083587646, |
|
"rewards/margins": 15.236946105957031, |
|
"rewards/rejected": -12.411300659179688, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 20.093833923339844, |
|
"kl": 0.0, |
|
"learning_rate": 2.7738264580369844e-08, |
|
"logps/chosen": -209.17770385742188, |
|
"logps/rejected": -359.9656677246094, |
|
"loss": 0.0839, |
|
"rewards/chosen": 2.901711940765381, |
|
"rewards/margins": 14.679159164428711, |
|
"rewards/rejected": -11.777449607849121, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 13.270203590393066, |
|
"kl": 0.0, |
|
"learning_rate": 2.59601706970128e-08, |
|
"logps/chosen": -272.44610595703125, |
|
"logps/rejected": -357.05975341796875, |
|
"loss": 0.0868, |
|
"rewards/chosen": 2.6972391605377197, |
|
"rewards/margins": 15.144004821777344, |
|
"rewards/rejected": -12.446764945983887, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 14.922286987304688, |
|
"kl": 0.0, |
|
"learning_rate": 2.418207681365576e-08, |
|
"logps/chosen": -238.18191528320312, |
|
"logps/rejected": -367.4979553222656, |
|
"loss": 0.0824, |
|
"rewards/chosen": 2.5710983276367188, |
|
"rewards/margins": 14.130340576171875, |
|
"rewards/rejected": -11.559242248535156, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 21.186229705810547, |
|
"kl": 0.0, |
|
"learning_rate": 2.240398293029872e-08, |
|
"logps/chosen": -240.6565399169922, |
|
"logps/rejected": -387.65936279296875, |
|
"loss": 0.0783, |
|
"rewards/chosen": 2.6365768909454346, |
|
"rewards/margins": 15.131543159484863, |
|
"rewards/rejected": -12.494966506958008, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 14.05397891998291, |
|
"kl": 0.0, |
|
"learning_rate": 2.0625889046941676e-08, |
|
"logps/chosen": -307.04583740234375, |
|
"logps/rejected": -361.1058654785156, |
|
"loss": 0.0903, |
|
"rewards/chosen": 2.804368257522583, |
|
"rewards/margins": 14.683168411254883, |
|
"rewards/rejected": -11.878799438476562, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 10.290091514587402, |
|
"kl": 0.0, |
|
"learning_rate": 1.8847795163584636e-08, |
|
"logps/chosen": -280.72406005859375, |
|
"logps/rejected": -393.7200622558594, |
|
"loss": 0.0643, |
|
"rewards/chosen": 2.76891827583313, |
|
"rewards/margins": 15.010998725891113, |
|
"rewards/rejected": -12.24207878112793, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 13.187542915344238, |
|
"kl": 0.0, |
|
"learning_rate": 1.7069701280227596e-08, |
|
"logps/chosen": -250.7726287841797, |
|
"logps/rejected": -378.99993896484375, |
|
"loss": 0.0697, |
|
"rewards/chosen": 2.8748562335968018, |
|
"rewards/margins": 15.209932327270508, |
|
"rewards/rejected": -12.335077285766602, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 13.64420223236084, |
|
"kl": 0.0, |
|
"learning_rate": 1.5291607396870554e-08, |
|
"logps/chosen": -253.7127227783203, |
|
"logps/rejected": -367.41558837890625, |
|
"loss": 0.0824, |
|
"rewards/chosen": 3.031802177429199, |
|
"rewards/margins": 14.762832641601562, |
|
"rewards/rejected": -11.73102855682373, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 10.866116523742676, |
|
"kl": 0.0, |
|
"learning_rate": 1.3513513513513514e-08, |
|
"logps/chosen": -250.94400024414062, |
|
"logps/rejected": -381.5174865722656, |
|
"loss": 0.0793, |
|
"rewards/chosen": 2.8337435722351074, |
|
"rewards/margins": 15.124834060668945, |
|
"rewards/rejected": -12.29109001159668, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 11.604872703552246, |
|
"kl": 0.0, |
|
"learning_rate": 1.1735419630156473e-08, |
|
"logps/chosen": -233.5682830810547, |
|
"logps/rejected": -377.261962890625, |
|
"loss": 0.0687, |
|
"rewards/chosen": 3.119915008544922, |
|
"rewards/margins": 15.134057998657227, |
|
"rewards/rejected": -12.014142990112305, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 20.006731033325195, |
|
"kl": 0.0, |
|
"learning_rate": 9.95732574679943e-09, |
|
"logps/chosen": -257.07525634765625, |
|
"logps/rejected": -346.5194396972656, |
|
"loss": 0.0907, |
|
"rewards/chosen": 2.676340103149414, |
|
"rewards/margins": 14.137725830078125, |
|
"rewards/rejected": -11.461384773254395, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 17.203874588012695, |
|
"kl": 0.0, |
|
"learning_rate": 8.179231863442388e-09, |
|
"logps/chosen": -226.61843872070312, |
|
"logps/rejected": -381.11029052734375, |
|
"loss": 0.0683, |
|
"rewards/chosen": 2.940342903137207, |
|
"rewards/margins": 15.23707103729248, |
|
"rewards/rejected": -12.296728134155273, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 14.477093696594238, |
|
"kl": 0.0, |
|
"learning_rate": 6.401137980085348e-09, |
|
"logps/chosen": -240.14157104492188, |
|
"logps/rejected": -388.9137268066406, |
|
"loss": 0.0658, |
|
"rewards/chosen": 2.875831127166748, |
|
"rewards/margins": 15.294588088989258, |
|
"rewards/rejected": -12.418756484985352, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 10.444808959960938, |
|
"kl": 0.0, |
|
"learning_rate": 4.623044096728307e-09, |
|
"logps/chosen": -273.6938781738281, |
|
"logps/rejected": -361.58782958984375, |
|
"loss": 0.0592, |
|
"rewards/chosen": 3.0903258323669434, |
|
"rewards/margins": 15.089597702026367, |
|
"rewards/rejected": -11.999273300170898, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 13.01125717163086, |
|
"kl": 0.0, |
|
"learning_rate": 2.844950213371266e-09, |
|
"logps/chosen": -263.2748107910156, |
|
"logps/rejected": -382.49481201171875, |
|
"loss": 0.0724, |
|
"rewards/chosen": 2.801457166671753, |
|
"rewards/margins": 14.89887809753418, |
|
"rewards/rejected": -12.097421646118164, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 13.397442817687988, |
|
"kl": 0.0, |
|
"learning_rate": 1.0668563300142248e-09, |
|
"logps/chosen": -225.07894897460938, |
|
"logps/rejected": -353.08856201171875, |
|
"loss": 0.0824, |
|
"rewards/chosen": 2.47590970993042, |
|
"rewards/margins": 14.8174409866333, |
|
"rewards/rejected": -12.341531753540039, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1563, |
|
"total_flos": 0.0, |
|
"train_loss": 0.11715243643320149, |
|
"train_runtime": 11089.6574, |
|
"train_samples_per_second": 9.017, |
|
"train_steps_per_second": 0.141 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1563, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|