|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9984168865435357, |
|
"eval_steps": 400, |
|
"global_step": 473, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0021108179419525065, |
|
"grad_norm": 3.792602400172418, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -0.723710298538208, |
|
"logits/rejected": -1.1678439378738403, |
|
"logps/chosen": -266.5860900878906, |
|
"logps/rejected": -246.2262420654297, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010554089709762533, |
|
"grad_norm": 5.35027261694182, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -0.6524915099143982, |
|
"logits/rejected": -0.9277956485748291, |
|
"logps/chosen": -282.5875549316406, |
|
"logps/rejected": -269.2027893066406, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.3828125, |
|
"rewards/chosen": 0.000355295545887202, |
|
"rewards/margins": -0.00032308147638104856, |
|
"rewards/rejected": 0.000678377109579742, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.021108179419525065, |
|
"grad_norm": 5.266933872220353, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -0.6941147446632385, |
|
"logits/rejected": -1.03800368309021, |
|
"logps/chosen": -290.0839538574219, |
|
"logps/rejected": -274.08502197265625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.0008805571123957634, |
|
"rewards/margins": -0.0002368297427892685, |
|
"rewards/rejected": 0.00111738673876971, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0316622691292876, |
|
"grad_norm": 4.4222736963146785, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -0.6915597319602966, |
|
"logits/rejected": -1.0270450115203857, |
|
"logps/chosen": -286.4000549316406, |
|
"logps/rejected": -268.19305419921875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": 0.0015847303438931704, |
|
"rewards/margins": -0.00021869130432605743, |
|
"rewards/rejected": 0.0018034216482192278, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04221635883905013, |
|
"grad_norm": 4.370999160332841, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -0.6628856658935547, |
|
"logits/rejected": -1.0627143383026123, |
|
"logps/chosen": -281.633056640625, |
|
"logps/rejected": -258.80975341796875, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": 0.004043369088321924, |
|
"rewards/margins": 0.0007513560703955591, |
|
"rewards/rejected": 0.0032920129597187042, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.052770448548812667, |
|
"grad_norm": 4.295540874340828, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -0.6402955651283264, |
|
"logits/rejected": -0.9882392883300781, |
|
"logps/chosen": -303.6094055175781, |
|
"logps/rejected": -278.68792724609375, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.012096477672457695, |
|
"rewards/margins": 0.002340012462809682, |
|
"rewards/rejected": 0.009756465442478657, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0633245382585752, |
|
"grad_norm": 4.480110631795238, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -0.6986342668533325, |
|
"logits/rejected": -1.0124592781066895, |
|
"logps/chosen": -277.3695983886719, |
|
"logps/rejected": -256.33648681640625, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.019197864457964897, |
|
"rewards/margins": 0.006392383016645908, |
|
"rewards/rejected": 0.01280547957867384, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07387862796833773, |
|
"grad_norm": 4.572546926633594, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -0.7217592597007751, |
|
"logits/rejected": -0.9826194047927856, |
|
"logps/chosen": -276.353515625, |
|
"logps/rejected": -269.84747314453125, |
|
"loss": 0.6889, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.03310415893793106, |
|
"rewards/margins": 0.008944300934672356, |
|
"rewards/rejected": 0.024159858003258705, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08443271767810026, |
|
"grad_norm": 3.950940685241822, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -0.6703137755393982, |
|
"logits/rejected": -1.0556083917617798, |
|
"logps/chosen": -277.72515869140625, |
|
"logps/rejected": -255.3736572265625, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.044867198914289474, |
|
"rewards/margins": 0.01742670312523842, |
|
"rewards/rejected": 0.027440497651696205, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09498680738786279, |
|
"grad_norm": 4.408045626085674, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -0.7604807615280151, |
|
"logits/rejected": -1.0656068325042725, |
|
"logps/chosen": -283.796142578125, |
|
"logps/rejected": -269.21075439453125, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.055293601006269455, |
|
"rewards/margins": 0.017781417816877365, |
|
"rewards/rejected": 0.03751217946410179, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10554089709762533, |
|
"grad_norm": 4.594023555859445, |
|
"learning_rate": 4.999726797933858e-07, |
|
"logits/chosen": -0.7825593948364258, |
|
"logits/rejected": -1.0136535167694092, |
|
"logps/chosen": -268.57232666015625, |
|
"logps/rejected": -254.4635772705078, |
|
"loss": 0.6786, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.04131672903895378, |
|
"rewards/margins": 0.02473551593720913, |
|
"rewards/rejected": 0.016581213101744652, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11609498680738786, |
|
"grad_norm": 4.732128821227025, |
|
"learning_rate": 4.99665396039775e-07, |
|
"logits/chosen": -0.8582944869995117, |
|
"logits/rejected": -1.092308759689331, |
|
"logps/chosen": -272.50872802734375, |
|
"logps/rejected": -269.22015380859375, |
|
"loss": 0.6711, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.015134250745177269, |
|
"rewards/margins": 0.03893275931477547, |
|
"rewards/rejected": -0.02379850670695305, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1266490765171504, |
|
"grad_norm": 5.480552136086532, |
|
"learning_rate": 4.99017099386437e-07, |
|
"logits/chosen": -0.9315390586853027, |
|
"logits/rejected": -1.1771332025527954, |
|
"logps/chosen": -278.89837646484375, |
|
"logps/rejected": -268.14080810546875, |
|
"loss": 0.6679, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.014189760200679302, |
|
"rewards/margins": 0.06192191690206528, |
|
"rewards/rejected": -0.07611168175935745, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13720316622691292, |
|
"grad_norm": 5.176626164434011, |
|
"learning_rate": 4.980286753286194e-07, |
|
"logits/chosen": -0.8333457708358765, |
|
"logits/rejected": -1.3162130117416382, |
|
"logps/chosen": -288.89825439453125, |
|
"logps/rejected": -264.5441589355469, |
|
"loss": 0.6667, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.045755136758089066, |
|
"rewards/margins": 0.08817130327224731, |
|
"rewards/rejected": -0.13392645120620728, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14775725593667546, |
|
"grad_norm": 5.725175266189831, |
|
"learning_rate": 4.967014739346915e-07, |
|
"logits/chosen": -0.9382959604263306, |
|
"logits/rejected": -1.3034207820892334, |
|
"logps/chosen": -273.29193115234375, |
|
"logps/rejected": -274.21929931640625, |
|
"loss": 0.6606, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.05725777894258499, |
|
"rewards/margins": 0.08167224377393723, |
|
"rewards/rejected": -0.13892999291419983, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.158311345646438, |
|
"grad_norm": 5.9050273856078395, |
|
"learning_rate": 4.950373080021136e-07, |
|
"logits/chosen": -1.0476350784301758, |
|
"logits/rejected": -1.337590217590332, |
|
"logps/chosen": -292.19378662109375, |
|
"logps/rejected": -282.83001708984375, |
|
"loss": 0.6585, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.07018107920885086, |
|
"rewards/margins": 0.08405766636133194, |
|
"rewards/rejected": -0.1542387306690216, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16886543535620052, |
|
"grad_norm": 5.714632118731764, |
|
"learning_rate": 4.930384505813737e-07, |
|
"logits/chosen": -0.9645854830741882, |
|
"logits/rejected": -1.3480749130249023, |
|
"logps/chosen": -290.5950012207031, |
|
"logps/rejected": -275.71417236328125, |
|
"loss": 0.6617, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.11630520969629288, |
|
"rewards/margins": 0.08103077113628387, |
|
"rewards/rejected": -0.19733598828315735, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17941952506596306, |
|
"grad_norm": 6.048274761863404, |
|
"learning_rate": 4.907076318712738e-07, |
|
"logits/chosen": -1.0770204067230225, |
|
"logits/rejected": -1.342997431755066, |
|
"logps/chosen": -301.7802734375, |
|
"logps/rejected": -287.3224792480469, |
|
"loss": 0.6561, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.13322284817695618, |
|
"rewards/margins": 0.07080608606338501, |
|
"rewards/rejected": -0.2040289342403412, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.18997361477572558, |
|
"grad_norm": 5.616972735220456, |
|
"learning_rate": 4.88048035489807e-07, |
|
"logits/chosen": -1.0288609266281128, |
|
"logits/rejected": -1.537954568862915, |
|
"logps/chosen": -303.514892578125, |
|
"logps/rejected": -282.09832763671875, |
|
"loss": 0.6458, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.12775097787380219, |
|
"rewards/margins": 0.11901189386844635, |
|
"rewards/rejected": -0.24676287174224854, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20052770448548812, |
|
"grad_norm": 6.041190762428844, |
|
"learning_rate": 4.85063294125718e-07, |
|
"logits/chosen": -1.1466128826141357, |
|
"logits/rejected": -1.4186201095581055, |
|
"logps/chosen": -323.9360046386719, |
|
"logps/rejected": -326.41461181640625, |
|
"loss": 0.6493, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.18756112456321716, |
|
"rewards/margins": 0.12050308287143707, |
|
"rewards/rejected": -0.3080642521381378, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21108179419525067, |
|
"grad_norm": 7.792002911640772, |
|
"learning_rate": 4.817574845766874e-07, |
|
"logits/chosen": -1.1385769844055176, |
|
"logits/rejected": -1.4923776388168335, |
|
"logps/chosen": -314.1307373046875, |
|
"logps/rejected": -307.49102783203125, |
|
"loss": 0.6441, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.26007553935050964, |
|
"rewards/margins": 0.1371382772922516, |
|
"rewards/rejected": -0.397213876247406, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22163588390501318, |
|
"grad_norm": 6.885087311095594, |
|
"learning_rate": 4.781351221809166e-07, |
|
"logits/chosen": -1.1828514337539673, |
|
"logits/rejected": -1.624103307723999, |
|
"logps/chosen": -304.28204345703125, |
|
"logps/rejected": -294.31048583984375, |
|
"loss": 0.6373, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.30514588952064514, |
|
"rewards/margins": 0.1688612401485443, |
|
"rewards/rejected": -0.47400718927383423, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23218997361477572, |
|
"grad_norm": 8.481883842604432, |
|
"learning_rate": 4.742011546497182e-07, |
|
"logits/chosen": -1.212425947189331, |
|
"logits/rejected": -1.3756533861160278, |
|
"logps/chosen": -313.9586486816406, |
|
"logps/rejected": -320.29425048828125, |
|
"loss": 0.6538, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.30393490195274353, |
|
"rewards/margins": 0.1464935690164566, |
|
"rewards/rejected": -0.45042848587036133, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24274406332453827, |
|
"grad_norm": 7.149769163847217, |
|
"learning_rate": 4.6996095530953875e-07, |
|
"logits/chosen": -1.2339892387390137, |
|
"logits/rejected": -1.58319890499115, |
|
"logps/chosen": -315.6721496582031, |
|
"logps/rejected": -308.2062072753906, |
|
"loss": 0.6291, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.32919472455978394, |
|
"rewards/margins": 0.1386784464120865, |
|
"rewards/rejected": -0.4678731858730316, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2532981530343008, |
|
"grad_norm": 7.759815340386084, |
|
"learning_rate": 4.654203157626399e-07, |
|
"logits/chosen": -1.2471096515655518, |
|
"logits/rejected": -1.6236129999160767, |
|
"logps/chosen": -341.6539611816406, |
|
"logps/rejected": -330.80926513671875, |
|
"loss": 0.6335, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.4439183175563812, |
|
"rewards/margins": 0.12948934733867645, |
|
"rewards/rejected": -0.5734077095985413, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2638522427440633, |
|
"grad_norm": 8.303750659351337, |
|
"learning_rate": 4.605854379764673e-07, |
|
"logits/chosen": -1.2065553665161133, |
|
"logits/rejected": -1.5575497150421143, |
|
"logps/chosen": -347.19696044921875, |
|
"logps/rejected": -339.4477233886719, |
|
"loss": 0.63, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.4391602873802185, |
|
"rewards/margins": 0.14842209219932556, |
|
"rewards/rejected": -0.5875824093818665, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27440633245382584, |
|
"grad_norm": 7.626112760961139, |
|
"learning_rate": 4.5546292581250857e-07, |
|
"logits/chosen": -1.1812589168548584, |
|
"logits/rejected": -1.513511300086975, |
|
"logps/chosen": -325.56005859375, |
|
"logps/rejected": -315.3307800292969, |
|
"loss": 0.6305, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5028723478317261, |
|
"rewards/margins": 0.12545283138751984, |
|
"rewards/rejected": -0.6283251643180847, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2849604221635884, |
|
"grad_norm": 8.681810962953072, |
|
"learning_rate": 4.5005977600621275e-07, |
|
"logits/chosen": -1.33579683303833, |
|
"logits/rejected": -1.586660623550415, |
|
"logps/chosen": -343.98089599609375, |
|
"logps/rejected": -351.74066162109375, |
|
"loss": 0.631, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5469980835914612, |
|
"rewards/margins": 0.19922946393489838, |
|
"rewards/rejected": -0.7462274432182312, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.2955145118733509, |
|
"grad_norm": 9.263751197369732, |
|
"learning_rate": 4.443833686102919e-07, |
|
"logits/chosen": -1.4017233848571777, |
|
"logits/rejected": -1.7090505361557007, |
|
"logps/chosen": -355.2716369628906, |
|
"logps/rejected": -371.23492431640625, |
|
"loss": 0.6335, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.667505145072937, |
|
"rewards/margins": 0.2195053994655609, |
|
"rewards/rejected": -0.8870105743408203, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.30606860158311344, |
|
"grad_norm": 8.944976382840098, |
|
"learning_rate": 4.384414569144561e-07, |
|
"logits/chosen": -1.3571860790252686, |
|
"logits/rejected": -1.624506950378418, |
|
"logps/chosen": -356.50885009765625, |
|
"logps/rejected": -361.44512939453125, |
|
"loss": 0.6242, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.723587691783905, |
|
"rewards/margins": 0.22243139147758484, |
|
"rewards/rejected": -0.9460189938545227, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.316622691292876, |
|
"grad_norm": 9.048728108809618, |
|
"learning_rate": 4.3224215685535287e-07, |
|
"logits/chosen": -1.2304835319519043, |
|
"logits/rejected": -1.607114553451538, |
|
"logps/chosen": -340.3996887207031, |
|
"logps/rejected": -343.8750915527344, |
|
"loss": 0.6193, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.5864183902740479, |
|
"rewards/margins": 0.2611897587776184, |
|
"rewards/rejected": -0.8476082682609558, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.32717678100263853, |
|
"grad_norm": 10.012310357130646, |
|
"learning_rate": 4.2579393593117364e-07, |
|
"logits/chosen": -1.3340481519699097, |
|
"logits/rejected": -1.707767128944397, |
|
"logps/chosen": -366.13104248046875, |
|
"logps/rejected": -364.83026123046875, |
|
"loss": 0.6204, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7475859522819519, |
|
"rewards/margins": 0.2101312130689621, |
|
"rewards/rejected": -0.9577171206474304, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.33773087071240104, |
|
"grad_norm": 9.68044164663275, |
|
"learning_rate": 4.191056016360699e-07, |
|
"logits/chosen": -1.394718050956726, |
|
"logits/rejected": -1.6881500482559204, |
|
"logps/chosen": -368.72381591796875, |
|
"logps/rejected": -381.956298828125, |
|
"loss": 0.6135, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8789850473403931, |
|
"rewards/margins": 0.3012150526046753, |
|
"rewards/rejected": -1.1802000999450684, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3482849604221636, |
|
"grad_norm": 10.276456210059177, |
|
"learning_rate": 4.121862894301754e-07, |
|
"logits/chosen": -1.3367292881011963, |
|
"logits/rejected": -1.7920604944229126, |
|
"logps/chosen": -379.0816650390625, |
|
"logps/rejected": -372.62432861328125, |
|
"loss": 0.6186, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8941882252693176, |
|
"rewards/margins": 0.2552604675292969, |
|
"rewards/rejected": -1.1494486331939697, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.35883905013192613, |
|
"grad_norm": 10.349641550261767, |
|
"learning_rate": 4.050454502616667e-07, |
|
"logits/chosen": -1.3888546228408813, |
|
"logits/rejected": -1.7364885807037354, |
|
"logps/chosen": -375.4383239746094, |
|
"logps/rejected": -369.5252685546875, |
|
"loss": 0.6183, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8307794332504272, |
|
"rewards/margins": 0.19674496352672577, |
|
"rewards/rejected": -1.027524471282959, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36939313984168864, |
|
"grad_norm": 10.29658804390271, |
|
"learning_rate": 3.976928376579047e-07, |
|
"logits/chosen": -1.4784464836120605, |
|
"logits/rejected": -1.8144117593765259, |
|
"logps/chosen": -355.7376708984375, |
|
"logps/rejected": -354.1457824707031, |
|
"loss": 0.6153, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.8430948257446289, |
|
"rewards/margins": 0.21338331699371338, |
|
"rewards/rejected": -1.0564781427383423, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.37994722955145116, |
|
"grad_norm": 20.628198563240826, |
|
"learning_rate": 3.9013849440328945e-07, |
|
"logits/chosen": -1.3779172897338867, |
|
"logits/rejected": -1.7602001428604126, |
|
"logps/chosen": -353.769287109375, |
|
"logps/rejected": -358.7577209472656, |
|
"loss": 0.6204, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.8876091837882996, |
|
"rewards/margins": 0.22880685329437256, |
|
"rewards/rejected": -1.1164162158966064, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39050131926121373, |
|
"grad_norm": 10.868907026626026, |
|
"learning_rate": 3.8239273882202473e-07, |
|
"logits/chosen": -1.439247488975525, |
|
"logits/rejected": -1.8125137090682983, |
|
"logps/chosen": -412.8868103027344, |
|
"logps/rejected": -431.59063720703125, |
|
"loss": 0.6016, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.1018221378326416, |
|
"rewards/margins": 0.4088074564933777, |
|
"rewards/rejected": -1.5106297731399536, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.40105540897097625, |
|
"grad_norm": 10.784941413981636, |
|
"learning_rate": 3.7446615068452804e-07, |
|
"logits/chosen": -1.4441838264465332, |
|
"logits/rejected": -1.7783229351043701, |
|
"logps/chosen": -398.41009521484375, |
|
"logps/rejected": -396.8212890625, |
|
"loss": 0.594, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.115227222442627, |
|
"rewards/margins": 0.2704046070575714, |
|
"rewards/rejected": -1.3856319189071655, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.41160949868073876, |
|
"grad_norm": 10.229960177651233, |
|
"learning_rate": 3.6636955675673743e-07, |
|
"logits/chosen": -1.5908405780792236, |
|
"logits/rejected": -1.9355300664901733, |
|
"logps/chosen": -426.3243713378906, |
|
"logps/rejected": -420.7511291503906, |
|
"loss": 0.5957, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.1847457885742188, |
|
"rewards/margins": 0.32065972685813904, |
|
"rewards/rejected": -1.5054056644439697, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.42216358839050133, |
|
"grad_norm": 18.20685869729302, |
|
"learning_rate": 3.5811401601205093e-07, |
|
"logits/chosen": -1.6325582265853882, |
|
"logits/rejected": -1.8879244327545166, |
|
"logps/chosen": -426.10943603515625, |
|
"logps/rejected": -426.29376220703125, |
|
"loss": 0.6339, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.3495900630950928, |
|
"rewards/margins": 0.15765051543712616, |
|
"rewards/rejected": -1.507240653038025, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43271767810026385, |
|
"grad_norm": 10.716178488233457, |
|
"learning_rate": 3.497108045260995e-07, |
|
"logits/chosen": -1.6447012424468994, |
|
"logits/rejected": -1.9266440868377686, |
|
"logps/chosen": -422.4698181152344, |
|
"logps/rejected": -423.3296813964844, |
|
"loss": 0.6095, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.2483638525009155, |
|
"rewards/margins": 0.21740670502185822, |
|
"rewards/rejected": -1.4657707214355469, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44327176781002636, |
|
"grad_norm": 9.319577970375986, |
|
"learning_rate": 3.411714000749838e-07, |
|
"logits/chosen": -1.5758410692214966, |
|
"logits/rejected": -1.9720706939697266, |
|
"logps/chosen": -413.7496032714844, |
|
"logps/rejected": -432.4217834472656, |
|
"loss": 0.5971, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.2690123319625854, |
|
"rewards/margins": 0.31965065002441406, |
|
"rewards/rejected": -1.58866286277771, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45382585751978893, |
|
"grad_norm": 18.334377917058617, |
|
"learning_rate": 3.3250746645801287e-07, |
|
"logits/chosen": -1.6151403188705444, |
|
"logits/rejected": -1.9621028900146484, |
|
"logps/chosen": -431.717529296875, |
|
"logps/rejected": -438.23095703125, |
|
"loss": 0.5914, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.601030945777893, |
|
"rewards/margins": 0.29736214876174927, |
|
"rewards/rejected": -1.8983930349349976, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.46437994722955145, |
|
"grad_norm": 13.987559233928428, |
|
"learning_rate": 3.237308375663571e-07, |
|
"logits/chosen": -1.5672855377197266, |
|
"logits/rejected": -1.8798201084136963, |
|
"logps/chosen": -465.22882080078125, |
|
"logps/rejected": -480.69036865234375, |
|
"loss": 0.5731, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.6366822719573975, |
|
"rewards/margins": 0.33864718675613403, |
|
"rewards/rejected": -1.9753293991088867, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.47493403693931396, |
|
"grad_norm": 15.585874610978292, |
|
"learning_rate": 3.148535012193767e-07, |
|
"logits/chosen": -1.4787318706512451, |
|
"logits/rejected": -1.7937052249908447, |
|
"logps/chosen": -463.3704528808594, |
|
"logps/rejected": -513.5693359375, |
|
"loss": 0.5913, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.7906410694122314, |
|
"rewards/margins": 0.637313723564148, |
|
"rewards/rejected": -2.42795467376709, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.48548812664907653, |
|
"grad_norm": 10.989676492328872, |
|
"learning_rate": 3.0588758279070183e-07, |
|
"logits/chosen": -1.4634826183319092, |
|
"logits/rejected": -1.688738226890564, |
|
"logps/chosen": -402.5445556640625, |
|
"logps/rejected": -404.0518493652344, |
|
"loss": 0.62, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.4222261905670166, |
|
"rewards/margins": 0.1772518903017044, |
|
"rewards/rejected": -1.599478006362915, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49604221635883905, |
|
"grad_norm": 10.557802697469821, |
|
"learning_rate": 2.968453286464312e-07, |
|
"logits/chosen": -1.386103868484497, |
|
"logits/rejected": -1.759375810623169, |
|
"logps/chosen": -398.8132629394531, |
|
"logps/rejected": -399.6328125, |
|
"loss": 0.5904, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1334482431411743, |
|
"rewards/margins": 0.23164169490337372, |
|
"rewards/rejected": -1.365089774131775, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5065963060686016, |
|
"grad_norm": 13.209672009218341, |
|
"learning_rate": 2.8773908941806877e-07, |
|
"logits/chosen": -1.5705225467681885, |
|
"logits/rejected": -1.753831148147583, |
|
"logps/chosen": -442.28857421875, |
|
"logps/rejected": -449.0203552246094, |
|
"loss": 0.5998, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.6151583194732666, |
|
"rewards/margins": 0.23577281832695007, |
|
"rewards/rejected": -1.85093092918396, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5171503957783641, |
|
"grad_norm": 16.396333599315767, |
|
"learning_rate": 2.785813031330473e-07, |
|
"logits/chosen": -1.6287492513656616, |
|
"logits/rejected": -1.9647096395492554, |
|
"logps/chosen": -466.08599853515625, |
|
"logps/rejected": -482.62847900390625, |
|
"loss": 0.6041, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.8849313259124756, |
|
"rewards/margins": 0.3867245614528656, |
|
"rewards/rejected": -2.271656036376953, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5277044854881267, |
|
"grad_norm": 10.479150105315131, |
|
"learning_rate": 2.693844782258779e-07, |
|
"logits/chosen": -1.6182796955108643, |
|
"logits/rejected": -1.851154088973999, |
|
"logps/chosen": -442.0950622558594, |
|
"logps/rejected": -452.76416015625, |
|
"loss": 0.6023, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.5875468254089355, |
|
"rewards/margins": 0.27402180433273315, |
|
"rewards/rejected": -1.8615686893463135, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5382585751978892, |
|
"grad_norm": 11.245899562560366, |
|
"learning_rate": 2.601611764531342e-07, |
|
"logits/chosen": -1.5520964860916138, |
|
"logits/rejected": -1.8409061431884766, |
|
"logps/chosen": -385.7509765625, |
|
"logps/rejected": -413.82147216796875, |
|
"loss": 0.602, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.3112901449203491, |
|
"rewards/margins": 0.3254047930240631, |
|
"rewards/rejected": -1.6366949081420898, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5488126649076517, |
|
"grad_norm": 10.216434963455866, |
|
"learning_rate": 2.5092399573560323e-07, |
|
"logits/chosen": -1.552223563194275, |
|
"logits/rejected": -1.9581362009048462, |
|
"logps/chosen": -435.2206115722656, |
|
"logps/rejected": -440.0597229003906, |
|
"loss": 0.6024, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.5202934741973877, |
|
"rewards/margins": 0.2939620614051819, |
|
"rewards/rejected": -1.8142554759979248, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5593667546174143, |
|
"grad_norm": 15.557028702183048, |
|
"learning_rate": 2.4168555295104124e-07, |
|
"logits/chosen": -1.5453598499298096, |
|
"logits/rejected": -1.900339126586914, |
|
"logps/chosen": -430.10980224609375, |
|
"logps/rejected": -445.18658447265625, |
|
"loss": 0.5844, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.5744996070861816, |
|
"rewards/margins": 0.3325752317905426, |
|
"rewards/rejected": -1.9070749282836914, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5699208443271768, |
|
"grad_norm": 17.943254997397123, |
|
"learning_rate": 2.3245846670103626e-07, |
|
"logits/chosen": -1.604867935180664, |
|
"logits/rejected": -2.0065605640411377, |
|
"logps/chosen": -474.488037109375, |
|
"logps/rejected": -498.0807189941406, |
|
"loss": 0.5789, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.8745540380477905, |
|
"rewards/margins": 0.41972631216049194, |
|
"rewards/rejected": -2.294280529022217, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5804749340369393, |
|
"grad_norm": 24.025134545110568, |
|
"learning_rate": 2.232553400755159e-07, |
|
"logits/chosen": -1.5600621700286865, |
|
"logits/rejected": -1.9929841756820679, |
|
"logps/chosen": -506.9547424316406, |
|
"logps/rejected": -510.70306396484375, |
|
"loss": 0.6081, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.104123592376709, |
|
"rewards/margins": 0.3540397882461548, |
|
"rewards/rejected": -2.4581634998321533, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5910290237467019, |
|
"grad_norm": 12.929099239614445, |
|
"learning_rate": 2.1408874343844294e-07, |
|
"logits/chosen": -1.6627086400985718, |
|
"logits/rejected": -1.9773311614990234, |
|
"logps/chosen": -452.6092224121094, |
|
"logps/rejected": -466.3548889160156, |
|
"loss": 0.5697, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.777646780014038, |
|
"rewards/margins": 0.39899054169654846, |
|
"rewards/rejected": -2.1766371726989746, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6015831134564644, |
|
"grad_norm": 14.764167900995057, |
|
"learning_rate": 2.049711972582101e-07, |
|
"logits/chosen": -1.4953606128692627, |
|
"logits/rejected": -1.8248519897460938, |
|
"logps/chosen": -454.2190856933594, |
|
"logps/rejected": -484.0538635253906, |
|
"loss": 0.5691, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.6517919301986694, |
|
"rewards/margins": 0.40098685026168823, |
|
"rewards/rejected": -2.052778720855713, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6121372031662269, |
|
"grad_norm": 16.272348359396457, |
|
"learning_rate": 1.9591515500618588e-07, |
|
"logits/chosen": -1.5684363842010498, |
|
"logits/rejected": -1.8171417713165283, |
|
"logps/chosen": -463.537841796875, |
|
"logps/rejected": -480.9203186035156, |
|
"loss": 0.5867, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.7810500860214233, |
|
"rewards/margins": 0.29418668150901794, |
|
"rewards/rejected": -2.0752367973327637, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6226912928759895, |
|
"grad_norm": 14.742811810031489, |
|
"learning_rate": 1.8693298614677112e-07, |
|
"logits/chosen": -1.466384768486023, |
|
"logits/rejected": -1.8593746423721313, |
|
"logps/chosen": -479.5718688964844, |
|
"logps/rejected": -491.52154541015625, |
|
"loss": 0.5822, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.8734004497528076, |
|
"rewards/margins": 0.33124423027038574, |
|
"rewards/rejected": -2.2046444416046143, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.633245382585752, |
|
"grad_norm": 17.118353279558573, |
|
"learning_rate": 1.7803695924219814e-07, |
|
"logits/chosen": -1.6126632690429688, |
|
"logits/rejected": -1.906806230545044, |
|
"logps/chosen": -501.42083740234375, |
|
"logps/rejected": -519.7081909179688, |
|
"loss": 0.5917, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.099165439605713, |
|
"rewards/margins": 0.307799756526947, |
|
"rewards/rejected": -2.4069650173187256, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6437994722955145, |
|
"grad_norm": 13.624538503432188, |
|
"learning_rate": 1.6923922519515067e-07, |
|
"logits/chosen": -1.6364351511001587, |
|
"logits/rejected": -1.9255473613739014, |
|
"logps/chosen": -485.3211975097656, |
|
"logps/rejected": -504.00701904296875, |
|
"loss": 0.5809, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9193140268325806, |
|
"rewards/margins": 0.4129720628261566, |
|
"rewards/rejected": -2.3322861194610596, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6543535620052771, |
|
"grad_norm": 17.071661718014518, |
|
"learning_rate": 1.605518006520924e-07, |
|
"logits/chosen": -1.727064847946167, |
|
"logits/rejected": -2.0727763175964355, |
|
"logps/chosen": -501.14495849609375, |
|
"logps/rejected": -513.572509765625, |
|
"loss": 0.5871, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.1765451431274414, |
|
"rewards/margins": 0.34206461906433105, |
|
"rewards/rejected": -2.5186100006103516, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6649076517150396, |
|
"grad_norm": 13.617029224965975, |
|
"learning_rate": 1.519865515899731e-07, |
|
"logits/chosen": -1.722412109375, |
|
"logits/rejected": -2.04305362701416, |
|
"logps/chosen": -467.9588928222656, |
|
"logps/rejected": -480.5577087402344, |
|
"loss": 0.5821, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.8842096328735352, |
|
"rewards/margins": 0.34835028648376465, |
|
"rewards/rejected": -2.2325596809387207, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6754617414248021, |
|
"grad_norm": 13.33856540505469, |
|
"learning_rate": 1.4355517710873182e-07, |
|
"logits/chosen": -1.8616483211517334, |
|
"logits/rejected": -2.127676248550415, |
|
"logps/chosen": -491.52545166015625, |
|
"logps/rejected": -527.18212890625, |
|
"loss": 0.5874, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -2.0936801433563232, |
|
"rewards/margins": 0.45663338899612427, |
|
"rewards/rejected": -2.5503134727478027, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6860158311345647, |
|
"grad_norm": 17.145800349025656, |
|
"learning_rate": 1.3526919345173318e-07, |
|
"logits/chosen": -1.7799503803253174, |
|
"logits/rejected": -2.053417921066284, |
|
"logps/chosen": -521.0397338867188, |
|
"logps/rejected": -544.9762573242188, |
|
"loss": 0.5769, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.4065451622009277, |
|
"rewards/margins": 0.452395498752594, |
|
"rewards/rejected": -2.858940601348877, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6965699208443272, |
|
"grad_norm": 19.087646634462068, |
|
"learning_rate": 1.2713991827596443e-07, |
|
"logits/chosen": -1.8048852682113647, |
|
"logits/rejected": -2.0732533931732178, |
|
"logps/chosen": -538.1304931640625, |
|
"logps/rejected": -579.5018310546875, |
|
"loss": 0.5753, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.58000111579895, |
|
"rewards/margins": 0.5617579221725464, |
|
"rewards/rejected": -3.141758680343628, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7071240105540897, |
|
"grad_norm": 16.296965660815633, |
|
"learning_rate": 1.191784551934773e-07, |
|
"logits/chosen": -1.6937000751495361, |
|
"logits/rejected": -2.0096402168273926, |
|
"logps/chosen": -490.8270568847656, |
|
"logps/rejected": -560.6513671875, |
|
"loss": 0.5806, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.2391765117645264, |
|
"rewards/margins": 0.8371523022651672, |
|
"rewards/rejected": -3.076328992843628, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7176781002638523, |
|
"grad_norm": 13.84198150957549, |
|
"learning_rate": 1.1139567860518953e-07, |
|
"logits/chosen": -1.6130354404449463, |
|
"logits/rejected": -1.875739336013794, |
|
"logps/chosen": -477.005615234375, |
|
"logps/rejected": -505.4608459472656, |
|
"loss": 0.5914, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.9602851867675781, |
|
"rewards/margins": 0.4700210988521576, |
|
"rewards/rejected": -2.4303066730499268, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7282321899736148, |
|
"grad_norm": 15.316683752394184, |
|
"learning_rate": 1.0380221884776128e-07, |
|
"logits/chosen": -1.671500563621521, |
|
"logits/rejected": -1.958186149597168, |
|
"logps/chosen": -483.4461975097656, |
|
"logps/rejected": -497.53643798828125, |
|
"loss": 0.5842, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.9341436624526978, |
|
"rewards/margins": 0.3594801723957062, |
|
"rewards/rejected": -2.293623924255371, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7387862796833773, |
|
"grad_norm": 11.225540406360041, |
|
"learning_rate": 9.640844767383405e-08, |
|
"logits/chosen": -1.7304404973983765, |
|
"logits/rejected": -2.0152411460876465, |
|
"logps/chosen": -474.5326232910156, |
|
"logps/rejected": -519.5494384765625, |
|
"loss": 0.5663, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.8573243618011475, |
|
"rewards/margins": 0.5369530916213989, |
|
"rewards/rejected": -2.394277334213257, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7493403693931399, |
|
"grad_norm": 69.37431303110792, |
|
"learning_rate": 8.922446408546378e-08, |
|
"logits/chosen": -1.636301040649414, |
|
"logits/rejected": -1.9108378887176514, |
|
"logps/chosen": -474.32769775390625, |
|
"logps/rejected": -491.1766052246094, |
|
"loss": 0.5914, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.870996117591858, |
|
"rewards/margins": 0.4108423590660095, |
|
"rewards/rejected": -2.2818384170532227, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7598944591029023, |
|
"grad_norm": 20.752730975509387, |
|
"learning_rate": 8.22600805400994e-08, |
|
"logits/chosen": -1.597144603729248, |
|
"logits/rejected": -1.939162015914917, |
|
"logps/chosen": -516.8674926757812, |
|
"logps/rejected": -526.4575805664062, |
|
"loss": 0.5934, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -2.107037305831909, |
|
"rewards/margins": 0.36362889409065247, |
|
"rewards/rejected": -2.4706661701202393, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7704485488126649, |
|
"grad_norm": 17.42422968220554, |
|
"learning_rate": 7.552480954794558e-08, |
|
"logits/chosen": -1.664350152015686, |
|
"logits/rejected": -1.8763881921768188, |
|
"logps/chosen": -474.96917724609375, |
|
"logps/rejected": -517.1463623046875, |
|
"loss": 0.5755, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.939814567565918, |
|
"rewards/margins": 0.3745439350605011, |
|
"rewards/rejected": -2.3143584728240967, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7810026385224275, |
|
"grad_norm": 14.771602880443869, |
|
"learning_rate": 6.902785067901854e-08, |
|
"logits/chosen": -1.6192362308502197, |
|
"logits/rejected": -1.9148075580596924, |
|
"logps/chosen": -488.96221923828125, |
|
"logps/rejected": -493.0494689941406, |
|
"loss": 0.5705, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.987235426902771, |
|
"rewards/margins": 0.29930660128593445, |
|
"rewards/rejected": -2.2865424156188965, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7915567282321899, |
|
"grad_norm": 17.979535692288096, |
|
"learning_rate": 6.277807799763973e-08, |
|
"logits/chosen": -1.739436149597168, |
|
"logits/rejected": -1.9250596761703491, |
|
"logps/chosen": -524.38720703125, |
|
"logps/rejected": -558.7305908203125, |
|
"loss": 0.5799, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.345944881439209, |
|
"rewards/margins": 0.3936893045902252, |
|
"rewards/rejected": -2.7396342754364014, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8021108179419525, |
|
"grad_norm": 16.020544985708035, |
|
"learning_rate": 5.678402794153145e-08, |
|
"logits/chosen": -1.6335742473602295, |
|
"logits/rejected": -1.9916164875030518, |
|
"logps/chosen": -496.64111328125, |
|
"logps/rejected": -516.6607666015625, |
|
"loss": 0.5759, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -2.1185414791107178, |
|
"rewards/margins": 0.3739583492279053, |
|
"rewards/rejected": -2.492499828338623, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8126649076517151, |
|
"grad_norm": 15.483975057559833, |
|
"learning_rate": 5.105388766206969e-08, |
|
"logits/chosen": -1.7242807149887085, |
|
"logits/rejected": -1.9720449447631836, |
|
"logps/chosen": -476.0779724121094, |
|
"logps/rejected": -498.2892150878906, |
|
"loss": 0.5878, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.9155553579330444, |
|
"rewards/margins": 0.33329516649246216, |
|
"rewards/rejected": -2.2488505840301514, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8232189973614775, |
|
"grad_norm": 12.980915706351402, |
|
"learning_rate": 4.5595483841620484e-08, |
|
"logits/chosen": -1.685105562210083, |
|
"logits/rejected": -1.9450676441192627, |
|
"logps/chosen": -459.869384765625, |
|
"logps/rejected": -495.52069091796875, |
|
"loss": 0.5753, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.7653785943984985, |
|
"rewards/margins": 0.45078420639038086, |
|
"rewards/rejected": -2.216163158416748, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8337730870712401, |
|
"grad_norm": 12.943578700815056, |
|
"learning_rate": 4.0416272003232526e-08, |
|
"logits/chosen": -1.5918303728103638, |
|
"logits/rejected": -1.9432264566421509, |
|
"logps/chosen": -461.55078125, |
|
"logps/rejected": -483.1607971191406, |
|
"loss": 0.5828, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.7096798419952393, |
|
"rewards/margins": 0.43595314025878906, |
|
"rewards/rejected": -2.1456329822540283, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8443271767810027, |
|
"grad_norm": 13.529250322769109, |
|
"learning_rate": 3.552332632729041e-08, |
|
"logits/chosen": -1.676417350769043, |
|
"logits/rejected": -1.8683099746704102, |
|
"logps/chosen": -448.98809814453125, |
|
"logps/rejected": -474.80450439453125, |
|
"loss": 0.5696, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.735640525817871, |
|
"rewards/margins": 0.3609997630119324, |
|
"rewards/rejected": -2.096640110015869, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8443271767810027, |
|
"eval_logits/chosen": -1.8635751008987427, |
|
"eval_logits/rejected": -1.727868914604187, |
|
"eval_logps/chosen": -464.8841857910156, |
|
"eval_logps/rejected": -503.46514892578125, |
|
"eval_loss": 0.6257370710372925, |
|
"eval_rewards/accuracies": 0.6639676094055176, |
|
"eval_rewards/chosen": -1.8789465427398682, |
|
"eval_rewards/margins": 0.299042671918869, |
|
"eval_rewards/rejected": -2.1779892444610596, |
|
"eval_runtime": 316.7001, |
|
"eval_samples_per_second": 6.239, |
|
"eval_steps_per_second": 1.56, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8548812664907651, |
|
"grad_norm": 16.739492605341695, |
|
"learning_rate": 3.092332998903416e-08, |
|
"logits/chosen": -1.7163026332855225, |
|
"logits/rejected": -2.0801901817321777, |
|
"logps/chosen": -481.8212890625, |
|
"logps/rejected": -521.2871704101562, |
|
"loss": 0.5594, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8860801458358765, |
|
"rewards/margins": 0.5326521992683411, |
|
"rewards/rejected": -2.418732166290283, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8654353562005277, |
|
"grad_norm": 18.511909575910575, |
|
"learning_rate": 2.6622566030146455e-08, |
|
"logits/chosen": -1.7279727458953857, |
|
"logits/rejected": -1.9562079906463623, |
|
"logps/chosen": -501.9583435058594, |
|
"logps/rejected": -521.0777587890625, |
|
"loss": 0.5736, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.089940309524536, |
|
"rewards/margins": 0.37453165650367737, |
|
"rewards/rejected": -2.4644720554351807, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8759894459102903, |
|
"grad_norm": 13.262757276399812, |
|
"learning_rate": 2.26269087768734e-08, |
|
"logits/chosen": -1.7813360691070557, |
|
"logits/rejected": -1.99080491065979, |
|
"logps/chosen": -470.19732666015625, |
|
"logps/rejected": -517.9837646484375, |
|
"loss": 0.5669, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0514144897460938, |
|
"rewards/margins": 0.6286773681640625, |
|
"rewards/rejected": -2.680091619491577, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8865435356200527, |
|
"grad_norm": 16.729852500651287, |
|
"learning_rate": 1.894181581640106e-08, |
|
"logits/chosen": -1.7729663848876953, |
|
"logits/rejected": -2.0622265338897705, |
|
"logps/chosen": -503.3247985839844, |
|
"logps/rejected": -532.9273681640625, |
|
"loss": 0.5733, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.2469749450683594, |
|
"rewards/margins": 0.4464968144893646, |
|
"rewards/rejected": -2.6934714317321777, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8970976253298153, |
|
"grad_norm": 15.498959956089978, |
|
"learning_rate": 1.5572320542448143e-08, |
|
"logits/chosen": -1.8235836029052734, |
|
"logits/rejected": -2.0790963172912598, |
|
"logps/chosen": -518.3297119140625, |
|
"logps/rejected": -555.9387817382812, |
|
"loss": 0.5909, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.182375431060791, |
|
"rewards/margins": 0.5672179460525513, |
|
"rewards/rejected": -2.7495932579040527, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9076517150395779, |
|
"grad_norm": 13.029691392427118, |
|
"learning_rate": 1.2523025280255729e-08, |
|
"logits/chosen": -1.7515465021133423, |
|
"logits/rejected": -2.0758919715881348, |
|
"logps/chosen": -505.37646484375, |
|
"logps/rejected": -527.7960815429688, |
|
"loss": 0.5682, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.237623691558838, |
|
"rewards/margins": 0.4710654616355896, |
|
"rewards/rejected": -2.7086894512176514, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9182058047493403, |
|
"grad_norm": 16.269526596286124, |
|
"learning_rate": 9.798095000364214e-09, |
|
"logits/chosen": -1.7598968744277954, |
|
"logits/rejected": -1.9988504648208618, |
|
"logps/chosen": -508.0267028808594, |
|
"logps/rejected": -554.0763549804688, |
|
"loss": 0.5581, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.217205762863159, |
|
"rewards/margins": 0.5872582197189331, |
|
"rewards/rejected": -2.8044638633728027, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9287598944591029, |
|
"grad_norm": 13.648970556247901, |
|
"learning_rate": 7.401251629764876e-09, |
|
"logits/chosen": -1.830775499343872, |
|
"logits/rejected": -2.0407309532165527, |
|
"logps/chosen": -511.0887145996094, |
|
"logps/rejected": -543.5230712890625, |
|
"loss": 0.5799, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -2.284379482269287, |
|
"rewards/margins": 0.47375327348709106, |
|
"rewards/rejected": -2.7581324577331543, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9393139841688655, |
|
"grad_norm": 17.489158193863855, |
|
"learning_rate": 5.335768968195098e-09, |
|
"logits/chosen": -1.7661769390106201, |
|
"logits/rejected": -2.1901516914367676, |
|
"logps/chosen": -519.0462646484375, |
|
"logps/rejected": -544.9937133789062, |
|
"loss": 0.5703, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.308170795440674, |
|
"rewards/margins": 0.4751991331577301, |
|
"rewards/rejected": -2.783369779586792, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9498680738786279, |
|
"grad_norm": 18.472750585474607, |
|
"learning_rate": 3.604468216521883e-09, |
|
"logits/chosen": -1.8184922933578491, |
|
"logits/rejected": -2.069641590118408, |
|
"logps/chosen": -510.5535583496094, |
|
"logps/rejected": -536.5929565429688, |
|
"loss": 0.5651, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.3444266319274902, |
|
"rewards/margins": 0.45197463035583496, |
|
"rewards/rejected": -2.7964015007019043, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9604221635883905, |
|
"grad_norm": 19.193548961658735, |
|
"learning_rate": 2.2097141233206884e-09, |
|
"logits/chosen": -1.7842222452163696, |
|
"logits/rejected": -2.0406641960144043, |
|
"logps/chosen": -513.885986328125, |
|
"logps/rejected": -545.530029296875, |
|
"loss": 0.5708, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.247333526611328, |
|
"rewards/margins": 0.44551533460617065, |
|
"rewards/rejected": -2.6928489208221436, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9709762532981531, |
|
"grad_norm": 15.684871774317772, |
|
"learning_rate": 1.1534117549133472e-09, |
|
"logits/chosen": -1.8590974807739258, |
|
"logits/rejected": -2.08577036857605, |
|
"logps/chosen": -512.5687866210938, |
|
"logps/rejected": -551.6975708007812, |
|
"loss": 0.5662, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.2769620418548584, |
|
"rewards/margins": 0.5433439016342163, |
|
"rewards/rejected": -2.8203060626983643, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9815303430079155, |
|
"grad_norm": 16.324336075352026, |
|
"learning_rate": 4.3700389327672173e-10, |
|
"logits/chosen": -1.74801504611969, |
|
"logits/rejected": -2.0831220149993896, |
|
"logps/chosen": -508.1880798339844, |
|
"logps/rejected": -548.400390625, |
|
"loss": 0.578, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.2477779388427734, |
|
"rewards/margins": 0.5980393886566162, |
|
"rewards/rejected": -2.8458173274993896, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9920844327176781, |
|
"grad_norm": 18.434311800327553, |
|
"learning_rate": 6.146906537587982e-11, |
|
"logits/chosen": -1.7675012350082397, |
|
"logits/rejected": -2.0456321239471436, |
|
"logps/chosen": -524.4590454101562, |
|
"logps/rejected": -550.3624877929688, |
|
"loss": 0.5793, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.3114473819732666, |
|
"rewards/margins": 0.4332752823829651, |
|
"rewards/rejected": -2.744722366333008, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9984168865435357, |
|
"step": 473, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6103140643736776, |
|
"train_runtime": 23898.8744, |
|
"train_samples_per_second": 2.537, |
|
"train_steps_per_second": 0.02 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 473, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|