|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 5000, |
|
"global_step": 4168, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0002399232245681382, |
|
"grad_norm": 12.830117413274314, |
|
"learning_rate": 1.199040767386091e-09, |
|
"logits/chosen": -0.9333123564720154, |
|
"logits/rejected": -0.9608660936355591, |
|
"logps/chosen": -159.56137084960938, |
|
"logps/rejected": -163.75823974609375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0023992322456813818, |
|
"grad_norm": 14.474357775073676, |
|
"learning_rate": 1.199040767386091e-08, |
|
"logits/chosen": -0.7961726188659668, |
|
"logits/rejected": -0.9891128540039062, |
|
"logps/chosen": -372.8497314453125, |
|
"logps/rejected": -307.27880859375, |
|
"loss": 0.6936, |
|
"rewards/accuracies": 0.2222222238779068, |
|
"rewards/chosen": -0.0022438960149884224, |
|
"rewards/margins": -0.0032549728639423847, |
|
"rewards/rejected": 0.001011077081784606, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0047984644913627635, |
|
"grad_norm": 14.81799284714085, |
|
"learning_rate": 2.398081534772182e-08, |
|
"logits/chosen": -0.8415049314498901, |
|
"logits/rejected": -0.8875001072883606, |
|
"logps/chosen": -254.6843719482422, |
|
"logps/rejected": -224.0112762451172, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.000493885949254036, |
|
"rewards/margins": -0.0004514312313403934, |
|
"rewards/rejected": -4.245472882757895e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007197696737044146, |
|
"grad_norm": 13.794514003749402, |
|
"learning_rate": 3.597122302158273e-08, |
|
"logits/chosen": -0.9713956117630005, |
|
"logits/rejected": -1.0547858476638794, |
|
"logps/chosen": -246.8055419921875, |
|
"logps/rejected": -250.71664428710938, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.001063814153894782, |
|
"rewards/margins": -0.0020584219601005316, |
|
"rewards/rejected": 0.0009946079226210713, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.009596928982725527, |
|
"grad_norm": 13.247567844837228, |
|
"learning_rate": 4.796163069544364e-08, |
|
"logits/chosen": -0.9620189666748047, |
|
"logits/rejected": -1.0594929456710815, |
|
"logps/chosen": -245.97018432617188, |
|
"logps/rejected": -238.09866333007812, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0002268419339088723, |
|
"rewards/margins": -0.0007565030828118324, |
|
"rewards/rejected": 0.0009833450894802809, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01199616122840691, |
|
"grad_norm": 14.63019248204087, |
|
"learning_rate": 5.995203836930455e-08, |
|
"logits/chosen": -0.8769725561141968, |
|
"logits/rejected": -0.9388787150382996, |
|
"logps/chosen": -273.33740234375, |
|
"logps/rejected": -236.96035766601562, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00011439235095167533, |
|
"rewards/margins": -0.0013857032172381878, |
|
"rewards/rejected": 0.0012713107280433178, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.014395393474088292, |
|
"grad_norm": 14.759835440686068, |
|
"learning_rate": 7.194244604316546e-08, |
|
"logits/chosen": -1.0583831071853638, |
|
"logits/rejected": -0.98320472240448, |
|
"logps/chosen": -289.59100341796875, |
|
"logps/rejected": -263.46539306640625, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.001381714828312397, |
|
"rewards/margins": -0.001984237926080823, |
|
"rewards/rejected": 0.0006025228649377823, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.016794625719769675, |
|
"grad_norm": 13.293061279163139, |
|
"learning_rate": 8.393285371702638e-08, |
|
"logits/chosen": -0.7076249122619629, |
|
"logits/rejected": -0.7553724050521851, |
|
"logps/chosen": -280.7513122558594, |
|
"logps/rejected": -270.6877136230469, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0003030824300367385, |
|
"rewards/margins": 0.0011825991095975041, |
|
"rewards/rejected": -0.0014856813941150904, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.019193857965451054, |
|
"grad_norm": 13.898946391946994, |
|
"learning_rate": 9.592326139088728e-08, |
|
"logits/chosen": -1.043713927268982, |
|
"logits/rejected": -0.7779287099838257, |
|
"logps/chosen": -202.8668212890625, |
|
"logps/rejected": -240.8871612548828, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0010693834628909826, |
|
"rewards/margins": 0.0018330765888094902, |
|
"rewards/rejected": -0.000763692834880203, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.021593090211132437, |
|
"grad_norm": 13.852772166042456, |
|
"learning_rate": 1.0791366906474819e-07, |
|
"logits/chosen": -1.053758978843689, |
|
"logits/rejected": -1.118728756904602, |
|
"logps/chosen": -346.8046875, |
|
"logps/rejected": -297.9544677734375, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0008068332681432366, |
|
"rewards/margins": 0.0018463155720382929, |
|
"rewards/rejected": -0.0010394820710644126, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02399232245681382, |
|
"grad_norm": 14.519229316524262, |
|
"learning_rate": 1.199040767386091e-07, |
|
"logits/chosen": -0.7971643209457397, |
|
"logits/rejected": -0.6808469295501709, |
|
"logps/chosen": -260.2826843261719, |
|
"logps/rejected": -278.73406982421875, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.001732430187985301, |
|
"rewards/margins": 0.0005316648748703301, |
|
"rewards/rejected": -0.002264095237478614, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.026391554702495202, |
|
"grad_norm": 12.435724553655291, |
|
"learning_rate": 1.3189448441247004e-07, |
|
"logits/chosen": -0.9703518152236938, |
|
"logits/rejected": -1.0042107105255127, |
|
"logps/chosen": -230.6790771484375, |
|
"logps/rejected": -227.7737274169922, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.002495633903890848, |
|
"rewards/margins": 0.00144762615673244, |
|
"rewards/rejected": -0.0039432598277926445, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.028790786948176585, |
|
"grad_norm": 14.017624859502787, |
|
"learning_rate": 1.4388489208633092e-07, |
|
"logits/chosen": -0.8614851832389832, |
|
"logits/rejected": -0.9825533032417297, |
|
"logps/chosen": -302.4905090332031, |
|
"logps/rejected": -277.0740966796875, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.004078847821801901, |
|
"rewards/margins": 0.0013862637570127845, |
|
"rewards/rejected": -0.005465111695230007, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.031190019193857964, |
|
"grad_norm": 12.415568509175522, |
|
"learning_rate": 1.5587529976019183e-07, |
|
"logits/chosen": -1.0289809703826904, |
|
"logits/rejected": -0.9567875862121582, |
|
"logps/chosen": -224.55801391601562, |
|
"logps/rejected": -305.19329833984375, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0029300746973603964, |
|
"rewards/margins": 0.0053221117705106735, |
|
"rewards/rejected": -0.008252186700701714, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03358925143953935, |
|
"grad_norm": 13.71977139583337, |
|
"learning_rate": 1.6786570743405277e-07, |
|
"logits/chosen": -0.7473757266998291, |
|
"logits/rejected": -0.7813047170639038, |
|
"logps/chosen": -280.0737609863281, |
|
"logps/rejected": -270.67962646484375, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.007163494825363159, |
|
"rewards/margins": 0.0030975989066064358, |
|
"rewards/rejected": -0.010261094197630882, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03598848368522073, |
|
"grad_norm": 13.807869552669564, |
|
"learning_rate": 1.7985611510791365e-07, |
|
"logits/chosen": -0.9591526985168457, |
|
"logits/rejected": -0.973153293132782, |
|
"logps/chosen": -230.39022827148438, |
|
"logps/rejected": -225.483642578125, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.00702179130166769, |
|
"rewards/margins": 0.008269025012850761, |
|
"rewards/rejected": -0.015290816314518452, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03838771593090211, |
|
"grad_norm": 14.320312006310687, |
|
"learning_rate": 1.9184652278177456e-07, |
|
"logits/chosen": -0.7980312705039978, |
|
"logits/rejected": -0.8937035799026489, |
|
"logps/chosen": -296.2853088378906, |
|
"logps/rejected": -229.8527374267578, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.012824411503970623, |
|
"rewards/margins": 0.0057816035114228725, |
|
"rewards/rejected": -0.018606014549732208, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.040786948176583494, |
|
"grad_norm": 13.000634918715434, |
|
"learning_rate": 2.038369304556355e-07, |
|
"logits/chosen": -0.770675778388977, |
|
"logits/rejected": -0.8101946711540222, |
|
"logps/chosen": -343.27947998046875, |
|
"logps/rejected": -335.1900329589844, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.01736806333065033, |
|
"rewards/margins": 0.010059249587357044, |
|
"rewards/rejected": -0.0274273119866848, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04318618042226487, |
|
"grad_norm": 14.311756905286574, |
|
"learning_rate": 2.1582733812949638e-07, |
|
"logits/chosen": -1.056549072265625, |
|
"logits/rejected": -1.0499789714813232, |
|
"logps/chosen": -238.1182403564453, |
|
"logps/rejected": -229.7616729736328, |
|
"loss": 0.6891, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.02097422443330288, |
|
"rewards/margins": 0.00995118822902441, |
|
"rewards/rejected": -0.030925417318940163, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04558541266794626, |
|
"grad_norm": 15.983225243139483, |
|
"learning_rate": 2.278177458033573e-07, |
|
"logits/chosen": -0.8500925302505493, |
|
"logits/rejected": -0.9182466268539429, |
|
"logps/chosen": -307.84356689453125, |
|
"logps/rejected": -250.31552124023438, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.02229948900640011, |
|
"rewards/margins": 0.012221109122037888, |
|
"rewards/rejected": -0.034520603716373444, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04798464491362764, |
|
"grad_norm": 12.792863291405517, |
|
"learning_rate": 2.398081534772182e-07, |
|
"logits/chosen": -0.9231821894645691, |
|
"logits/rejected": -0.8621791005134583, |
|
"logps/chosen": -315.01055908203125, |
|
"logps/rejected": -300.3835754394531, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.028801122680306435, |
|
"rewards/margins": 0.020397091284394264, |
|
"rewards/rejected": -0.0491982102394104, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05038387715930902, |
|
"grad_norm": 13.564982268308716, |
|
"learning_rate": 2.517985611510791e-07, |
|
"logits/chosen": -0.8487990498542786, |
|
"logits/rejected": -0.8659976720809937, |
|
"logps/chosen": -232.47500610351562, |
|
"logps/rejected": -256.1665344238281, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.036128297448158264, |
|
"rewards/margins": 0.01755443774163723, |
|
"rewards/rejected": -0.053682733327150345, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.052783109404990404, |
|
"grad_norm": 13.6123418164062, |
|
"learning_rate": 2.637889688249401e-07, |
|
"logits/chosen": -0.8184248208999634, |
|
"logits/rejected": -0.8951767683029175, |
|
"logps/chosen": -316.2499084472656, |
|
"logps/rejected": -317.16680908203125, |
|
"loss": 0.6826, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.052874885499477386, |
|
"rewards/margins": 0.020790213719010353, |
|
"rewards/rejected": -0.07366509735584259, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05518234165067178, |
|
"grad_norm": 14.40416586096848, |
|
"learning_rate": 2.7577937649880093e-07, |
|
"logits/chosen": -0.8254715204238892, |
|
"logits/rejected": -0.7313406467437744, |
|
"logps/chosen": -243.4302520751953, |
|
"logps/rejected": -282.7564697265625, |
|
"loss": 0.6764, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.05869588255882263, |
|
"rewards/margins": 0.04219502583146095, |
|
"rewards/rejected": -0.10089089721441269, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05758157389635317, |
|
"grad_norm": 15.966117267131516, |
|
"learning_rate": 2.8776978417266184e-07, |
|
"logits/chosen": -0.9691603779792786, |
|
"logits/rejected": -1.0244967937469482, |
|
"logps/chosen": -307.95867919921875, |
|
"logps/rejected": -264.9517517089844, |
|
"loss": 0.6713, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.061439257115125656, |
|
"rewards/margins": 0.04740726947784424, |
|
"rewards/rejected": -0.1088465228676796, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05998080614203455, |
|
"grad_norm": 15.540992469488703, |
|
"learning_rate": 2.997601918465228e-07, |
|
"logits/chosen": -0.9066941142082214, |
|
"logits/rejected": -0.9724334478378296, |
|
"logps/chosen": -245.83828735351562, |
|
"logps/rejected": -241.96401977539062, |
|
"loss": 0.6705, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.08735504001379013, |
|
"rewards/margins": 0.03149568662047386, |
|
"rewards/rejected": -0.1188507229089737, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06238003838771593, |
|
"grad_norm": 14.016457897110723, |
|
"learning_rate": 3.1175059952038366e-07, |
|
"logits/chosen": -0.9671137928962708, |
|
"logits/rejected": -0.8278489112854004, |
|
"logps/chosen": -271.09344482421875, |
|
"logps/rejected": -267.5201110839844, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.09744887799024582, |
|
"rewards/margins": 0.07394719123840332, |
|
"rewards/rejected": -0.17139606177806854, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0647792706333973, |
|
"grad_norm": 15.591971270081572, |
|
"learning_rate": 3.2374100719424457e-07, |
|
"logits/chosen": -0.8864096403121948, |
|
"logits/rejected": -1.0893969535827637, |
|
"logps/chosen": -297.97674560546875, |
|
"logps/rejected": -242.01272583007812, |
|
"loss": 0.6597, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.10928692668676376, |
|
"rewards/margins": 0.024153277277946472, |
|
"rewards/rejected": -0.13344022631645203, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.0671785028790787, |
|
"grad_norm": 16.24546965787235, |
|
"learning_rate": 3.3573141486810554e-07, |
|
"logits/chosen": -1.0096687078475952, |
|
"logits/rejected": -0.9647713899612427, |
|
"logps/chosen": -311.0169677734375, |
|
"logps/rejected": -303.4920349121094, |
|
"loss": 0.6447, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.15291506052017212, |
|
"rewards/margins": 0.09652292728424072, |
|
"rewards/rejected": -0.24943797290325165, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06957773512476008, |
|
"grad_norm": 13.70885821013161, |
|
"learning_rate": 3.477218225419664e-07, |
|
"logits/chosen": -0.879925549030304, |
|
"logits/rejected": -0.7820504903793335, |
|
"logps/chosen": -306.32293701171875, |
|
"logps/rejected": -284.5321960449219, |
|
"loss": 0.6453, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.19852982461452484, |
|
"rewards/margins": 0.10227125883102417, |
|
"rewards/rejected": -0.3008010983467102, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07197696737044146, |
|
"grad_norm": 16.420599197521923, |
|
"learning_rate": 3.597122302158273e-07, |
|
"logits/chosen": -0.9782785177230835, |
|
"logits/rejected": -1.02054762840271, |
|
"logps/chosen": -278.99755859375, |
|
"logps/rejected": -304.5132141113281, |
|
"loss": 0.648, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.25667688250541687, |
|
"rewards/margins": 0.10419619083404541, |
|
"rewards/rejected": -0.3608730733394623, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07437619961612284, |
|
"grad_norm": 15.857184742509135, |
|
"learning_rate": 3.7170263788968827e-07, |
|
"logits/chosen": -0.8711949586868286, |
|
"logits/rejected": -0.9557937383651733, |
|
"logps/chosen": -297.0644226074219, |
|
"logps/rejected": -263.8592529296875, |
|
"loss": 0.655, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.26040521264076233, |
|
"rewards/margins": 0.16062946617603302, |
|
"rewards/rejected": -0.42103463411331177, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07677543186180422, |
|
"grad_norm": 15.154553129414445, |
|
"learning_rate": 3.836930455635491e-07, |
|
"logits/chosen": -0.9826911687850952, |
|
"logits/rejected": -1.0516026020050049, |
|
"logps/chosen": -302.815673828125, |
|
"logps/rejected": -278.9208984375, |
|
"loss": 0.6482, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.28811559081077576, |
|
"rewards/margins": 0.10437599569559097, |
|
"rewards/rejected": -0.39249157905578613, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07917466410748561, |
|
"grad_norm": 14.842365249432783, |
|
"learning_rate": 3.9568345323741003e-07, |
|
"logits/chosen": -0.8737947344779968, |
|
"logits/rejected": -0.765605092048645, |
|
"logps/chosen": -283.9456787109375, |
|
"logps/rejected": -342.69580078125, |
|
"loss": 0.632, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.3966819941997528, |
|
"rewards/margins": 0.21009349822998047, |
|
"rewards/rejected": -0.6067754030227661, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08157389635316699, |
|
"grad_norm": 16.05033848642518, |
|
"learning_rate": 4.07673860911271e-07, |
|
"logits/chosen": -0.7628117799758911, |
|
"logits/rejected": -0.8412669897079468, |
|
"logps/chosen": -271.9056091308594, |
|
"logps/rejected": -311.70526123046875, |
|
"loss": 0.6336, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.318366676568985, |
|
"rewards/margins": 0.2277708798646927, |
|
"rewards/rejected": -0.5461375117301941, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.08397312859884837, |
|
"grad_norm": 17.720120609163384, |
|
"learning_rate": 4.1966426858513185e-07, |
|
"logits/chosen": -1.0602662563323975, |
|
"logits/rejected": -1.0608508586883545, |
|
"logps/chosen": -316.4015197753906, |
|
"logps/rejected": -330.9758605957031, |
|
"loss": 0.6383, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4658915102481842, |
|
"rewards/margins": 0.16727015376091003, |
|
"rewards/rejected": -0.6331616640090942, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08637236084452975, |
|
"grad_norm": 16.65175354746556, |
|
"learning_rate": 4.3165467625899276e-07, |
|
"logits/chosen": -0.8833236694335938, |
|
"logits/rejected": -1.0470280647277832, |
|
"logps/chosen": -314.62298583984375, |
|
"logps/rejected": -272.09527587890625, |
|
"loss": 0.6334, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.4979444146156311, |
|
"rewards/margins": 0.1287182867527008, |
|
"rewards/rejected": -0.6266626715660095, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08877159309021113, |
|
"grad_norm": 18.783320614959067, |
|
"learning_rate": 4.436450839328537e-07, |
|
"logits/chosen": -0.8926633596420288, |
|
"logits/rejected": -0.820746898651123, |
|
"logps/chosen": -289.3975524902344, |
|
"logps/rejected": -326.2989196777344, |
|
"loss": 0.6239, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.5632190108299255, |
|
"rewards/margins": 0.2842358648777008, |
|
"rewards/rejected": -0.8474549055099487, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09117082533589252, |
|
"grad_norm": 15.455693570330665, |
|
"learning_rate": 4.556354916067146e-07, |
|
"logits/chosen": -1.033244013786316, |
|
"logits/rejected": -0.9363048672676086, |
|
"logps/chosen": -280.1861877441406, |
|
"logps/rejected": -303.18902587890625, |
|
"loss": 0.5982, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.43663984537124634, |
|
"rewards/margins": 0.23225346207618713, |
|
"rewards/rejected": -0.6688933372497559, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.0935700575815739, |
|
"grad_norm": 19.882511537780037, |
|
"learning_rate": 4.676258992805755e-07, |
|
"logits/chosen": -0.8549890518188477, |
|
"logits/rejected": -0.8999547958374023, |
|
"logps/chosen": -325.2740783691406, |
|
"logps/rejected": -315.0982666015625, |
|
"loss": 0.6162, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6211831569671631, |
|
"rewards/margins": 0.15810197591781616, |
|
"rewards/rejected": -0.779285192489624, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09596928982725528, |
|
"grad_norm": 16.372176463297425, |
|
"learning_rate": 4.796163069544364e-07, |
|
"logits/chosen": -0.9059786796569824, |
|
"logits/rejected": -0.9771867990493774, |
|
"logps/chosen": -333.3121032714844, |
|
"logps/rejected": -343.83929443359375, |
|
"loss": 0.6163, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.710607647895813, |
|
"rewards/margins": 0.3263167440891266, |
|
"rewards/rejected": -1.0369244813919067, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09836852207293666, |
|
"grad_norm": 21.155324613616017, |
|
"learning_rate": 4.916067146282974e-07, |
|
"logits/chosen": -1.0448824167251587, |
|
"logits/rejected": -0.9939874410629272, |
|
"logps/chosen": -319.8747253417969, |
|
"logps/rejected": -378.6325378417969, |
|
"loss": 0.5826, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7368643879890442, |
|
"rewards/margins": 0.3942120373249054, |
|
"rewards/rejected": -1.131076455116272, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10076775431861804, |
|
"grad_norm": 23.801221104048313, |
|
"learning_rate": 4.999992108529978e-07, |
|
"logits/chosen": -0.9094634056091309, |
|
"logits/rejected": -0.9350014925003052, |
|
"logps/chosen": -435.54412841796875, |
|
"logps/rejected": -445.7367248535156, |
|
"loss": 0.6147, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0090878009796143, |
|
"rewards/margins": 0.46369805932044983, |
|
"rewards/rejected": -1.4727857112884521, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.10316698656429943, |
|
"grad_norm": 24.146443568702797, |
|
"learning_rate": 4.999851817115532e-07, |
|
"logits/chosen": -1.1204873323440552, |
|
"logits/rejected": -1.0295510292053223, |
|
"logps/chosen": -337.2879943847656, |
|
"logps/rejected": -372.93011474609375, |
|
"loss": 0.6154, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.8201831579208374, |
|
"rewards/margins": 0.4531213343143463, |
|
"rewards/rejected": -1.2733044624328613, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.10556621880998081, |
|
"grad_norm": 17.50567821054797, |
|
"learning_rate": 4.999536171027889e-07, |
|
"logits/chosen": -0.8827461004257202, |
|
"logits/rejected": -0.9801127314567566, |
|
"logps/chosen": -339.089599609375, |
|
"logps/rejected": -357.59283447265625, |
|
"loss": 0.6034, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8114099502563477, |
|
"rewards/margins": 0.2592558264732361, |
|
"rewards/rejected": -1.0706658363342285, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.10796545105566219, |
|
"grad_norm": 20.292040142941033, |
|
"learning_rate": 4.999045192408369e-07, |
|
"logits/chosen": -0.9986470937728882, |
|
"logits/rejected": -1.0170477628707886, |
|
"logps/chosen": -317.05694580078125, |
|
"logps/rejected": -324.7137145996094, |
|
"loss": 0.6153, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7792980074882507, |
|
"rewards/margins": 0.23761887848377228, |
|
"rewards/rejected": -1.0169168710708618, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.11036468330134357, |
|
"grad_norm": 20.30371683148718, |
|
"learning_rate": 4.998378915697171e-07, |
|
"logits/chosen": -0.9217838048934937, |
|
"logits/rejected": -0.9802480936050415, |
|
"logps/chosen": -339.0627136230469, |
|
"logps/rejected": -374.9605407714844, |
|
"loss": 0.5762, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.5629919767379761, |
|
"rewards/margins": 0.43420299887657166, |
|
"rewards/rejected": -0.9971949458122253, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.11276391554702495, |
|
"grad_norm": 23.739625609026348, |
|
"learning_rate": 4.997537387630958e-07, |
|
"logits/chosen": -1.0254257917404175, |
|
"logits/rejected": -1.0769364833831787, |
|
"logps/chosen": -299.828125, |
|
"logps/rejected": -346.30621337890625, |
|
"loss": 0.5646, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.757472038269043, |
|
"rewards/margins": 0.4156116843223572, |
|
"rewards/rejected": -1.1730836629867554, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.11516314779270634, |
|
"grad_norm": 24.76367716580562, |
|
"learning_rate": 4.996520667239582e-07, |
|
"logits/chosen": -1.2554540634155273, |
|
"logits/rejected": -1.1178642511367798, |
|
"logps/chosen": -323.2625427246094, |
|
"logps/rejected": -438.39569091796875, |
|
"loss": 0.5663, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7605866193771362, |
|
"rewards/margins": 0.6550606489181519, |
|
"rewards/rejected": -1.415647268295288, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.11756238003838772, |
|
"grad_norm": 24.50216433662912, |
|
"learning_rate": 4.995328825841939e-07, |
|
"logits/chosen": -0.967789351940155, |
|
"logits/rejected": -0.9846280813217163, |
|
"logps/chosen": -327.4945373535156, |
|
"logps/rejected": -458.9354553222656, |
|
"loss": 0.5625, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8878215551376343, |
|
"rewards/margins": 1.3659319877624512, |
|
"rewards/rejected": -2.253753662109375, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1199616122840691, |
|
"grad_norm": 22.44855997761646, |
|
"learning_rate": 4.993961947040967e-07, |
|
"logits/chosen": -0.9332793354988098, |
|
"logits/rejected": -1.0110089778900146, |
|
"logps/chosen": -384.03509521484375, |
|
"logps/rejected": -390.9599914550781, |
|
"loss": 0.5758, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0161880254745483, |
|
"rewards/margins": 0.4222269654273987, |
|
"rewards/rejected": -1.438415288925171, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12236084452975048, |
|
"grad_norm": 20.668791420309383, |
|
"learning_rate": 4.992420126717784e-07, |
|
"logits/chosen": -1.0082364082336426, |
|
"logits/rejected": -0.9416404962539673, |
|
"logps/chosen": -328.42987060546875, |
|
"logps/rejected": -429.83099365234375, |
|
"loss": 0.5653, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.6905103921890259, |
|
"rewards/margins": 1.0820300579071045, |
|
"rewards/rejected": -1.7725404500961304, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.12476007677543186, |
|
"grad_norm": 22.77455852864622, |
|
"learning_rate": 4.990703473024958e-07, |
|
"logits/chosen": -0.8772110939025879, |
|
"logits/rejected": -1.0350819826126099, |
|
"logps/chosen": -414.46978759765625, |
|
"logps/rejected": -478.1474609375, |
|
"loss": 0.5796, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.259319543838501, |
|
"rewards/margins": 0.6991303563117981, |
|
"rewards/rejected": -1.9584500789642334, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12715930902111325, |
|
"grad_norm": 22.984070098434174, |
|
"learning_rate": 4.98881210637893e-07, |
|
"logits/chosen": -1.167495608329773, |
|
"logits/rejected": -1.10280442237854, |
|
"logps/chosen": -298.97845458984375, |
|
"logps/rejected": -406.2267761230469, |
|
"loss": 0.5751, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.7922585606575012, |
|
"rewards/margins": 0.7441150546073914, |
|
"rewards/rejected": -1.5363736152648926, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1295585412667946, |
|
"grad_norm": 28.715620814370784, |
|
"learning_rate": 4.986746159451553e-07, |
|
"logits/chosen": -0.9988299608230591, |
|
"logits/rejected": -1.0019475221633911, |
|
"logps/chosen": -349.500732421875, |
|
"logps/rejected": -464.0498046875, |
|
"loss": 0.5733, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9567633867263794, |
|
"rewards/margins": 1.248631477355957, |
|
"rewards/rejected": -2.205394983291626, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.131957773512476, |
|
"grad_norm": 24.64040581669011, |
|
"learning_rate": 4.984505777160795e-07, |
|
"logits/chosen": -0.8519158363342285, |
|
"logits/rejected": -0.8761106729507446, |
|
"logps/chosen": -391.86907958984375, |
|
"logps/rejected": -490.4788513183594, |
|
"loss": 0.5847, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9429546594619751, |
|
"rewards/margins": 0.9859554171562195, |
|
"rewards/rejected": -1.9289100170135498, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1343570057581574, |
|
"grad_norm": 21.445602932905317, |
|
"learning_rate": 4.982091116660574e-07, |
|
"logits/chosen": -0.984406590461731, |
|
"logits/rejected": -1.1147202253341675, |
|
"logps/chosen": -273.537353515625, |
|
"logps/rejected": -286.6374816894531, |
|
"loss": 0.5874, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.7375530004501343, |
|
"rewards/margins": 0.3278002142906189, |
|
"rewards/rejected": -1.0653531551361084, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.13675623800383876, |
|
"grad_norm": 30.19526853243183, |
|
"learning_rate": 4.979502347329732e-07, |
|
"logits/chosen": -0.8063471913337708, |
|
"logits/rejected": -0.8000919222831726, |
|
"logps/chosen": -368.16864013671875, |
|
"logps/rejected": -476.47247314453125, |
|
"loss": 0.556, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9274753332138062, |
|
"rewards/margins": 0.7706169486045837, |
|
"rewards/rejected": -1.6980924606323242, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.13915547024952016, |
|
"grad_norm": 43.10504058256567, |
|
"learning_rate": 4.976739650760151e-07, |
|
"logits/chosen": -1.0534042119979858, |
|
"logits/rejected": -1.0876705646514893, |
|
"logps/chosen": -374.096923828125, |
|
"logps/rejected": -453.3235778808594, |
|
"loss": 0.5548, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.1569541692733765, |
|
"rewards/margins": 0.8895587921142578, |
|
"rewards/rejected": -2.046513080596924, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.14155470249520152, |
|
"grad_norm": 33.097831242106516, |
|
"learning_rate": 4.97380322074402e-07, |
|
"logits/chosen": -0.7486315369606018, |
|
"logits/rejected": -0.8265093564987183, |
|
"logps/chosen": -342.4309387207031, |
|
"logps/rejected": -411.20416259765625, |
|
"loss": 0.5917, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2151663303375244, |
|
"rewards/margins": 0.699668824672699, |
|
"rewards/rejected": -1.9148353338241577, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.14395393474088292, |
|
"grad_norm": 26.621827613808147, |
|
"learning_rate": 4.970693263260237e-07, |
|
"logits/chosen": -1.0230684280395508, |
|
"logits/rejected": -1.1072685718536377, |
|
"logps/chosen": -387.28839111328125, |
|
"logps/rejected": -406.2510986328125, |
|
"loss": 0.5536, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8648967742919922, |
|
"rewards/margins": 0.6640993356704712, |
|
"rewards/rejected": -1.528996229171753, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1463531669865643, |
|
"grad_norm": 36.11327139057057, |
|
"learning_rate": 4.967409996459966e-07, |
|
"logits/chosen": -0.9299138188362122, |
|
"logits/rejected": -0.949521541595459, |
|
"logps/chosen": -357.72381591796875, |
|
"logps/rejected": -395.5397644042969, |
|
"loss": 0.5318, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9965571165084839, |
|
"rewards/margins": 0.6282019019126892, |
|
"rewards/rejected": -1.6247589588165283, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.14875239923224567, |
|
"grad_norm": 110.15447659680028, |
|
"learning_rate": 4.963953650651326e-07, |
|
"logits/chosen": -0.8881407976150513, |
|
"logits/rejected": -0.9683340191841125, |
|
"logps/chosen": -517.2413330078125, |
|
"logps/rejected": -486.80865478515625, |
|
"loss": 0.5738, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.7012176513671875, |
|
"rewards/margins": 0.6705323457717896, |
|
"rewards/rejected": -2.3717498779296875, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.15115163147792707, |
|
"grad_norm": 24.7770238152267, |
|
"learning_rate": 4.960324468283248e-07, |
|
"logits/chosen": -1.0681040287017822, |
|
"logits/rejected": -1.102770209312439, |
|
"logps/chosen": -297.15594482421875, |
|
"logps/rejected": -379.40032958984375, |
|
"loss": 0.5235, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.7767683267593384, |
|
"rewards/margins": 0.8115633726119995, |
|
"rewards/rejected": -1.5883318185806274, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.15355086372360843, |
|
"grad_norm": 39.277732598538044, |
|
"learning_rate": 4.956522703928451e-07, |
|
"logits/chosen": -1.011266827583313, |
|
"logits/rejected": -0.8994027376174927, |
|
"logps/chosen": -318.6116943359375, |
|
"logps/rejected": -397.0060119628906, |
|
"loss": 0.5233, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8186876177787781, |
|
"rewards/margins": 0.6957352757453918, |
|
"rewards/rejected": -1.5144227743148804, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.15595009596928983, |
|
"grad_norm": 37.25851736588769, |
|
"learning_rate": 4.952548624265606e-07, |
|
"logits/chosen": -0.9296770095825195, |
|
"logits/rejected": -0.9706400632858276, |
|
"logps/chosen": -394.94012451171875, |
|
"logps/rejected": -462.7085876464844, |
|
"loss": 0.5689, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3027846813201904, |
|
"rewards/margins": 0.7685932517051697, |
|
"rewards/rejected": -2.071377992630005, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.15834932821497122, |
|
"grad_norm": 23.054150563951033, |
|
"learning_rate": 4.948402508060607e-07, |
|
"logits/chosen": -1.106856346130371, |
|
"logits/rejected": -1.1293423175811768, |
|
"logps/chosen": -319.3155822753906, |
|
"logps/rejected": -400.53863525390625, |
|
"loss": 0.5562, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8783910870552063, |
|
"rewards/margins": 0.9650925397872925, |
|
"rewards/rejected": -1.843483567237854, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.16074856046065258, |
|
"grad_norm": 42.278776304188725, |
|
"learning_rate": 4.944084646147038e-07, |
|
"logits/chosen": -0.9512613415718079, |
|
"logits/rejected": -1.0062638521194458, |
|
"logps/chosen": -390.70245361328125, |
|
"logps/rejected": -408.19207763671875, |
|
"loss": 0.6226, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8297697305679321, |
|
"rewards/margins": 0.40133753418922424, |
|
"rewards/rejected": -1.2311073541641235, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.16314779270633398, |
|
"grad_norm": 26.571180814300117, |
|
"learning_rate": 4.939595341405754e-07, |
|
"logits/chosen": -0.936383843421936, |
|
"logits/rejected": -0.9673361778259277, |
|
"logps/chosen": -333.87176513671875, |
|
"logps/rejected": -367.17034912109375, |
|
"loss": 0.5324, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8141332864761353, |
|
"rewards/margins": 0.5334564447402954, |
|
"rewards/rejected": -1.3475896120071411, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.16554702495201534, |
|
"grad_norm": 31.203377729047734, |
|
"learning_rate": 4.93493490874365e-07, |
|
"logits/chosen": -0.970528781414032, |
|
"logits/rejected": -0.9868891835212708, |
|
"logps/chosen": -359.50250244140625, |
|
"logps/rejected": -437.6192932128906, |
|
"loss": 0.5371, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.2206305265426636, |
|
"rewards/margins": 0.7284920811653137, |
|
"rewards/rejected": -1.949122667312622, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.16794625719769674, |
|
"grad_norm": 26.866432369585116, |
|
"learning_rate": 4.93010367507156e-07, |
|
"logits/chosen": -1.1611895561218262, |
|
"logits/rejected": -1.1491591930389404, |
|
"logps/chosen": -315.22088623046875, |
|
"logps/rejected": -436.4913635253906, |
|
"loss": 0.5069, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0012879371643066, |
|
"rewards/margins": 1.438588261604309, |
|
"rewards/rejected": -2.439876079559326, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.17034548944337813, |
|
"grad_norm": 39.78872918816068, |
|
"learning_rate": 4.925101979281332e-07, |
|
"logits/chosen": -1.0544211864471436, |
|
"logits/rejected": -1.2225271463394165, |
|
"logps/chosen": -457.6055603027344, |
|
"logps/rejected": -532.146240234375, |
|
"loss": 0.5203, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.50386381149292, |
|
"rewards/margins": 1.2967182397842407, |
|
"rewards/rejected": -2.800581693649292, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1727447216890595, |
|
"grad_norm": 51.98312964639157, |
|
"learning_rate": 4.919930172222054e-07, |
|
"logits/chosen": -1.0103614330291748, |
|
"logits/rejected": -1.0998207330703735, |
|
"logps/chosen": -389.05126953125, |
|
"logps/rejected": -467.95892333984375, |
|
"loss": 0.5022, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.418017029762268, |
|
"rewards/margins": 0.8280168771743774, |
|
"rewards/rejected": -2.2460339069366455, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.1751439539347409, |
|
"grad_norm": 63.53276334873331, |
|
"learning_rate": 4.914588616675445e-07, |
|
"logits/chosen": -1.100673794746399, |
|
"logits/rejected": -1.091517686843872, |
|
"logps/chosen": -379.39208984375, |
|
"logps/rejected": -458.4183044433594, |
|
"loss": 0.5784, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3578667640686035, |
|
"rewards/margins": 0.995037853717804, |
|
"rewards/rejected": -2.3529045581817627, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.17754318618042225, |
|
"grad_norm": 29.527270739257894, |
|
"learning_rate": 4.909077687330404e-07, |
|
"logits/chosen": -0.8857021331787109, |
|
"logits/rejected": -0.975568413734436, |
|
"logps/chosen": -357.48187255859375, |
|
"logps/rejected": -378.65234375, |
|
"loss": 0.5127, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9617660641670227, |
|
"rewards/margins": 0.4279107451438904, |
|
"rewards/rejected": -1.3896766901016235, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.17994241842610365, |
|
"grad_norm": 48.092463876152614, |
|
"learning_rate": 4.903397770756729e-07, |
|
"logits/chosen": -1.0979223251342773, |
|
"logits/rejected": -1.15996515750885, |
|
"logps/chosen": -342.7486267089844, |
|
"logps/rejected": -429.194580078125, |
|
"loss": 0.5268, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8847867846488953, |
|
"rewards/margins": 0.835138201713562, |
|
"rewards/rejected": -1.7199251651763916, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.18234165067178504, |
|
"grad_norm": 32.41150087194352, |
|
"learning_rate": 4.897549265378004e-07, |
|
"logits/chosen": -1.0312681198120117, |
|
"logits/rejected": -1.1079070568084717, |
|
"logps/chosen": -474.59259033203125, |
|
"logps/rejected": -628.4571533203125, |
|
"loss": 0.5173, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.605443000793457, |
|
"rewards/margins": 1.5567104816436768, |
|
"rewards/rejected": -3.162153720855713, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.1847408829174664, |
|
"grad_norm": 43.75189458459123, |
|
"learning_rate": 4.891532581443643e-07, |
|
"logits/chosen": -1.274938941001892, |
|
"logits/rejected": -1.308607816696167, |
|
"logps/chosen": -466.8857421875, |
|
"logps/rejected": -609.5013427734375, |
|
"loss": 0.5271, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6197988986968994, |
|
"rewards/margins": 1.4712640047073364, |
|
"rewards/rejected": -3.0910630226135254, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.1871401151631478, |
|
"grad_norm": 39.5075439819695, |
|
"learning_rate": 4.885348141000122e-07, |
|
"logits/chosen": -1.109499216079712, |
|
"logits/rejected": -1.0709692239761353, |
|
"logps/chosen": -367.5265808105469, |
|
"logps/rejected": -455.7412109375, |
|
"loss": 0.4992, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2532503604888916, |
|
"rewards/margins": 0.814247727394104, |
|
"rewards/rejected": -2.067498207092285, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.18953934740882916, |
|
"grad_norm": 50.004130642833005, |
|
"learning_rate": 4.878996377861367e-07, |
|
"logits/chosen": -1.068798303604126, |
|
"logits/rejected": -1.1314045190811157, |
|
"logps/chosen": -326.4665222167969, |
|
"logps/rejected": -417.7521057128906, |
|
"loss": 0.5083, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.0814282894134521, |
|
"rewards/margins": 0.8757278323173523, |
|
"rewards/rejected": -1.9571564197540283, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.19193857965451055, |
|
"grad_norm": 36.000949990598, |
|
"learning_rate": 4.872477737578327e-07, |
|
"logits/chosen": -1.091275930404663, |
|
"logits/rejected": -1.0248304605484009, |
|
"logps/chosen": -439.6976623535156, |
|
"logps/rejected": -656.8413696289062, |
|
"loss": 0.4559, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.5745375156402588, |
|
"rewards/margins": 2.239027261734009, |
|
"rewards/rejected": -3.8135643005371094, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.19433781190019195, |
|
"grad_norm": 74.95862087165564, |
|
"learning_rate": 4.865792677407718e-07, |
|
"logits/chosen": -1.1572951078414917, |
|
"logits/rejected": -1.2205321788787842, |
|
"logps/chosen": -395.1188659667969, |
|
"logps/rejected": -492.11865234375, |
|
"loss": 0.5769, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.5695520639419556, |
|
"rewards/margins": 1.1664550304412842, |
|
"rewards/rejected": -2.73600697517395, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.1967370441458733, |
|
"grad_norm": 33.56632786243978, |
|
"learning_rate": 4.858941666279955e-07, |
|
"logits/chosen": -0.9763522148132324, |
|
"logits/rejected": -1.0531394481658936, |
|
"logps/chosen": -409.54302978515625, |
|
"logps/rejected": -430.5146484375, |
|
"loss": 0.584, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.4252262115478516, |
|
"rewards/margins": 0.3249640464782715, |
|
"rewards/rejected": -1.7501903772354126, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.1991362763915547, |
|
"grad_norm": 44.099904884961624, |
|
"learning_rate": 4.851925184766247e-07, |
|
"logits/chosen": -1.0988633632659912, |
|
"logits/rejected": -1.1514496803283691, |
|
"logps/chosen": -356.073974609375, |
|
"logps/rejected": -433.0359802246094, |
|
"loss": 0.5209, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0130106210708618, |
|
"rewards/margins": 0.9259645342826843, |
|
"rewards/rejected": -1.9389750957489014, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.20153550863723607, |
|
"grad_norm": 32.03347708687078, |
|
"learning_rate": 4.844743725044897e-07, |
|
"logits/chosen": -1.031770944595337, |
|
"logits/rejected": -1.2438600063323975, |
|
"logps/chosen": -349.9236755371094, |
|
"logps/rejected": -410.17193603515625, |
|
"loss": 0.5217, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9646291732788086, |
|
"rewards/margins": 0.8405798077583313, |
|
"rewards/rejected": -1.8052088022232056, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.20393474088291746, |
|
"grad_norm": 28.831301311281855, |
|
"learning_rate": 4.837397790866774e-07, |
|
"logits/chosen": -1.206099033355713, |
|
"logits/rejected": -1.1994575262069702, |
|
"logps/chosen": -389.10491943359375, |
|
"logps/rejected": -480.0270080566406, |
|
"loss": 0.5573, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.9688132405281067, |
|
"rewards/margins": 1.1717948913574219, |
|
"rewards/rejected": -2.140608310699463, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.20633397312859886, |
|
"grad_norm": 32.87509191390056, |
|
"learning_rate": 4.829887897519974e-07, |
|
"logits/chosen": -1.2817895412445068, |
|
"logits/rejected": -1.247855305671692, |
|
"logps/chosen": -326.98419189453125, |
|
"logps/rejected": -420.003173828125, |
|
"loss": 0.532, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9683993458747864, |
|
"rewards/margins": 0.7596687078475952, |
|
"rewards/rejected": -1.7280681133270264, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.20873320537428022, |
|
"grad_norm": 37.88063549323584, |
|
"learning_rate": 4.82221457179368e-07, |
|
"logits/chosen": -1.261070966720581, |
|
"logits/rejected": -1.2210159301757812, |
|
"logps/chosen": -387.666259765625, |
|
"logps/rejected": -531.7518310546875, |
|
"loss": 0.4613, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.138819932937622, |
|
"rewards/margins": 1.6149892807006836, |
|
"rewards/rejected": -2.7538094520568848, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.21113243761996162, |
|
"grad_norm": 58.84381198153733, |
|
"learning_rate": 4.814378351941206e-07, |
|
"logits/chosen": -1.1720526218414307, |
|
"logits/rejected": -1.23434579372406, |
|
"logps/chosen": -395.0361328125, |
|
"logps/rejected": -475.3619079589844, |
|
"loss": 0.5302, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.363128900527954, |
|
"rewards/margins": 0.9177983403205872, |
|
"rewards/rejected": -2.2809271812438965, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.21353166986564298, |
|
"grad_norm": 49.26204797533434, |
|
"learning_rate": 4.806379787642241e-07, |
|
"logits/chosen": -1.207521915435791, |
|
"logits/rejected": -1.2205421924591064, |
|
"logps/chosen": -425.2489318847656, |
|
"logps/rejected": -575.5451049804688, |
|
"loss": 0.5495, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7557293176651, |
|
"rewards/margins": 1.5624897480010986, |
|
"rewards/rejected": -3.318218946456909, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.21593090211132437, |
|
"grad_norm": 43.44042160829665, |
|
"learning_rate": 4.798219439964293e-07, |
|
"logits/chosen": -1.1142994165420532, |
|
"logits/rejected": -1.1826696395874023, |
|
"logps/chosen": -370.36236572265625, |
|
"logps/rejected": -413.3993225097656, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.159775733947754, |
|
"rewards/margins": 0.3043367862701416, |
|
"rewards/rejected": -1.4641125202178955, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.21833013435700577, |
|
"grad_norm": 40.8770183610601, |
|
"learning_rate": 4.78989788132333e-07, |
|
"logits/chosen": -1.0691736936569214, |
|
"logits/rejected": -1.0526028871536255, |
|
"logps/chosen": -325.0689392089844, |
|
"logps/rejected": -487.765869140625, |
|
"loss": 0.4695, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.01616632938385, |
|
"rewards/margins": 1.5912829637527466, |
|
"rewards/rejected": -2.6074492931365967, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.22072936660268713, |
|
"grad_norm": 42.43700473469738, |
|
"learning_rate": 4.781415695443631e-07, |
|
"logits/chosen": -1.1770126819610596, |
|
"logits/rejected": -1.2120893001556396, |
|
"logps/chosen": -507.10418701171875, |
|
"logps/rejected": -643.808349609375, |
|
"loss": 0.5198, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.1411383152008057, |
|
"rewards/margins": 1.3329979181289673, |
|
"rewards/rejected": -3.4741358757019043, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.22312859884836853, |
|
"grad_norm": 35.4780805866807, |
|
"learning_rate": 4.772773477316836e-07, |
|
"logits/chosen": -1.09368896484375, |
|
"logits/rejected": -1.1431119441986084, |
|
"logps/chosen": -385.4435729980469, |
|
"logps/rejected": -476.1529846191406, |
|
"loss": 0.5098, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.2141118049621582, |
|
"rewards/margins": 0.879438579082489, |
|
"rewards/rejected": -2.093550205230713, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2255278310940499, |
|
"grad_norm": 62.42297425070803, |
|
"learning_rate": 4.7639718331602117e-07, |
|
"logits/chosen": -1.0556354522705078, |
|
"logits/rejected": -1.0695927143096924, |
|
"logps/chosen": -428.8665466308594, |
|
"logps/rejected": -625.2503662109375, |
|
"loss": 0.5214, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4926120042800903, |
|
"rewards/margins": 2.130432367324829, |
|
"rewards/rejected": -3.62304425239563, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.22792706333973128, |
|
"grad_norm": 72.76307491975679, |
|
"learning_rate": 4.7550113803741275e-07, |
|
"logits/chosen": -1.135399580001831, |
|
"logits/rejected": -1.2878599166870117, |
|
"logps/chosen": -425.1543884277344, |
|
"logps/rejected": -450.4256286621094, |
|
"loss": 0.5152, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.3350064754486084, |
|
"rewards/margins": 0.9679735898971558, |
|
"rewards/rejected": -2.3029801845550537, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.23032629558541268, |
|
"grad_norm": 81.86639367536438, |
|
"learning_rate": 4.7458927474987454e-07, |
|
"logits/chosen": -1.1322991847991943, |
|
"logits/rejected": -1.2043081521987915, |
|
"logps/chosen": -430.93438720703125, |
|
"logps/rejected": -453.57568359375, |
|
"loss": 0.5059, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.235338568687439, |
|
"rewards/margins": 0.7968686819076538, |
|
"rewards/rejected": -2.0322070121765137, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.23272552783109404, |
|
"grad_norm": 63.23515877313946, |
|
"learning_rate": 4.7366165741699347e-07, |
|
"logits/chosen": -1.0109546184539795, |
|
"logits/rejected": -1.0750302076339722, |
|
"logps/chosen": -470.6412048339844, |
|
"logps/rejected": -530.3613891601562, |
|
"loss": 0.4727, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.46035635471344, |
|
"rewards/margins": 0.9168599247932434, |
|
"rewards/rejected": -2.377216100692749, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.23512476007677544, |
|
"grad_norm": 56.92032329813907, |
|
"learning_rate": 4.727183511074401e-07, |
|
"logits/chosen": -1.2379209995269775, |
|
"logits/rejected": -1.258826732635498, |
|
"logps/chosen": -412.4378356933594, |
|
"logps/rejected": -459.80169677734375, |
|
"loss": 0.505, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.271667718887329, |
|
"rewards/margins": 0.624854326248169, |
|
"rewards/rejected": -1.8965221643447876, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2375239923224568, |
|
"grad_norm": 50.38298248984227, |
|
"learning_rate": 4.717594219904043e-07, |
|
"logits/chosen": -1.0959298610687256, |
|
"logits/rejected": -1.2155076265335083, |
|
"logps/chosen": -446.4449157714844, |
|
"logps/rejected": -513.0243530273438, |
|
"loss": 0.5422, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8132413625717163, |
|
"rewards/margins": 1.0427824258804321, |
|
"rewards/rejected": -2.8560237884521484, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.2399232245681382, |
|
"grad_norm": 31.549570234936283, |
|
"learning_rate": 4.7078493733095393e-07, |
|
"logits/chosen": -1.0665780305862427, |
|
"logits/rejected": -1.1320844888687134, |
|
"logps/chosen": -422.85858154296875, |
|
"logps/rejected": -537.7945556640625, |
|
"loss": 0.4704, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6550779342651367, |
|
"rewards/margins": 1.1452900171279907, |
|
"rewards/rejected": -2.800367832183838, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2423224568138196, |
|
"grad_norm": 69.1151390549962, |
|
"learning_rate": 4.6979496548531614e-07, |
|
"logits/chosen": -1.2547630071640015, |
|
"logits/rejected": -1.210106372833252, |
|
"logps/chosen": -437.75146484375, |
|
"logps/rejected": -624.9695434570312, |
|
"loss": 0.5139, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.724177598953247, |
|
"rewards/margins": 1.4503930807113647, |
|
"rewards/rejected": -3.1745710372924805, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.24472168905950095, |
|
"grad_norm": 54.03780429318879, |
|
"learning_rate": 4.6878957589608293e-07, |
|
"logits/chosen": -1.163216233253479, |
|
"logits/rejected": -1.1093411445617676, |
|
"logps/chosen": -447.0042419433594, |
|
"logps/rejected": -650.7940063476562, |
|
"loss": 0.5585, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.826093077659607, |
|
"rewards/margins": 1.6275144815444946, |
|
"rewards/rejected": -3.4536075592041016, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.24712092130518235, |
|
"grad_norm": 32.185267978697475, |
|
"learning_rate": 4.6776883908733956e-07, |
|
"logits/chosen": -1.2985765933990479, |
|
"logits/rejected": -1.4077537059783936, |
|
"logps/chosen": -435.51824951171875, |
|
"logps/rejected": -492.0585021972656, |
|
"loss": 0.4836, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3929134607315063, |
|
"rewards/margins": 1.2694865465164185, |
|
"rewards/rejected": -2.6623997688293457, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2495201535508637, |
|
"grad_norm": 41.371752275653265, |
|
"learning_rate": 4.667328266597178e-07, |
|
"logits/chosen": -1.1596192121505737, |
|
"logits/rejected": -1.1949832439422607, |
|
"logps/chosen": -382.23492431640625, |
|
"logps/rejected": -510.849609375, |
|
"loss": 0.4711, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.294812798500061, |
|
"rewards/margins": 1.3286726474761963, |
|
"rewards/rejected": -2.623485565185547, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2519193857965451, |
|
"grad_norm": 35.25863038534864, |
|
"learning_rate": 4.6568161128537354e-07, |
|
"logits/chosen": -1.1067166328430176, |
|
"logits/rejected": -1.262487530708313, |
|
"logps/chosen": -414.877685546875, |
|
"logps/rejected": -446.10076904296875, |
|
"loss": 0.5067, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.5195152759552002, |
|
"rewards/margins": 0.8884671926498413, |
|
"rewards/rejected": -2.407982349395752, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2543186180422265, |
|
"grad_norm": 41.117661614634926, |
|
"learning_rate": 4.6461526670288877e-07, |
|
"logits/chosen": -1.1547003984451294, |
|
"logits/rejected": -1.1661105155944824, |
|
"logps/chosen": -398.5390319824219, |
|
"logps/rejected": -465.92010498046875, |
|
"loss": 0.5088, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.312683343887329, |
|
"rewards/margins": 0.9579456448554993, |
|
"rewards/rejected": -2.2706291675567627, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2567178502879079, |
|
"grad_norm": 43.48076574914832, |
|
"learning_rate": 4.635338677120994e-07, |
|
"logits/chosen": -1.366939902305603, |
|
"logits/rejected": -1.3449715375900269, |
|
"logps/chosen": -382.4663391113281, |
|
"logps/rejected": -528.1848754882812, |
|
"loss": 0.4686, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1687244176864624, |
|
"rewards/margins": 1.3069040775299072, |
|
"rewards/rejected": -2.475628614425659, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2591170825335892, |
|
"grad_norm": 44.614924118264355, |
|
"learning_rate": 4.6243749016884835e-07, |
|
"logits/chosen": -1.1942687034606934, |
|
"logits/rejected": -1.2592580318450928, |
|
"logps/chosen": -479.9891662597656, |
|
"logps/rejected": -708.2697143554688, |
|
"loss": 0.5414, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.0574417114257812, |
|
"rewards/margins": 1.8553825616836548, |
|
"rewards/rejected": -3.9128241539001465, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2615163147792706, |
|
"grad_norm": 59.97327717096107, |
|
"learning_rate": 4.613262109796645e-07, |
|
"logits/chosen": -1.2804964780807495, |
|
"logits/rejected": -1.1930066347122192, |
|
"logps/chosen": -415.44415283203125, |
|
"logps/rejected": -597.2564697265625, |
|
"loss": 0.5053, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.665036916732788, |
|
"rewards/margins": 1.5294617414474487, |
|
"rewards/rejected": -3.1944985389709473, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.263915547024952, |
|
"grad_norm": 38.425411352382326, |
|
"learning_rate": 4.602001080963678e-07, |
|
"logits/chosen": -1.2301857471466064, |
|
"logits/rejected": -1.2801592350006104, |
|
"logps/chosen": -401.37860107421875, |
|
"logps/rejected": -571.9041748046875, |
|
"loss": 0.4782, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.3433271646499634, |
|
"rewards/margins": 1.8979564905166626, |
|
"rewards/rejected": -3.241283416748047, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2663147792706334, |
|
"grad_norm": 64.38781898233145, |
|
"learning_rate": 4.590592605106017e-07, |
|
"logits/chosen": -1.2344551086425781, |
|
"logits/rejected": -1.2683765888214111, |
|
"logps/chosen": -422.1192321777344, |
|
"logps/rejected": -536.6231079101562, |
|
"loss": 0.5268, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.380975604057312, |
|
"rewards/margins": 1.4407894611358643, |
|
"rewards/rejected": -2.821765184402466, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.2687140115163148, |
|
"grad_norm": 37.84082368188261, |
|
"learning_rate": 4.5790374824829165e-07, |
|
"logits/chosen": -1.185731053352356, |
|
"logits/rejected": -1.245924949645996, |
|
"logps/chosen": -321.22283935546875, |
|
"logps/rejected": -476.427001953125, |
|
"loss": 0.5349, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.3014849424362183, |
|
"rewards/margins": 1.4552912712097168, |
|
"rewards/rejected": -2.7567763328552246, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.27111324376199614, |
|
"grad_norm": 69.30402137772205, |
|
"learning_rate": 4.5673365236403216e-07, |
|
"logits/chosen": -1.1933674812316895, |
|
"logits/rejected": -1.273466944694519, |
|
"logps/chosen": -366.64105224609375, |
|
"logps/rejected": -533.8867797851562, |
|
"loss": 0.4867, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6127936840057373, |
|
"rewards/margins": 1.428100347518921, |
|
"rewards/rejected": -3.040894031524658, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.27351247600767753, |
|
"grad_norm": 42.84417987935753, |
|
"learning_rate": 4.5554905493540075e-07, |
|
"logits/chosen": -1.4118454456329346, |
|
"logits/rejected": -1.4354169368743896, |
|
"logps/chosen": -371.85394287109375, |
|
"logps/rejected": -596.58349609375, |
|
"loss": 0.4489, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.5539534091949463, |
|
"rewards/margins": 2.193103313446045, |
|
"rewards/rejected": -3.747056484222412, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.2759117082533589, |
|
"grad_norm": 67.4443408265607, |
|
"learning_rate": 4.5435003905720074e-07, |
|
"logits/chosen": -1.305117130279541, |
|
"logits/rejected": -1.3726755380630493, |
|
"logps/chosen": -571.3667602539062, |
|
"logps/rejected": -721.839111328125, |
|
"loss": 0.5057, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.947200059890747, |
|
"rewards/margins": 1.7557926177978516, |
|
"rewards/rejected": -4.7029924392700195, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2783109404990403, |
|
"grad_norm": 41.247470313425566, |
|
"learning_rate": 4.531366888356324e-07, |
|
"logits/chosen": -1.2661703824996948, |
|
"logits/rejected": -1.209538459777832, |
|
"logps/chosen": -392.5999450683594, |
|
"logps/rejected": -645.23779296875, |
|
"loss": 0.4602, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.0089774131774902, |
|
"rewards/margins": 2.1448001861572266, |
|
"rewards/rejected": -4.153777122497559, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.2807101727447217, |
|
"grad_norm": 44.44476003130934, |
|
"learning_rate": 4.519090893823931e-07, |
|
"logits/chosen": -1.2478493452072144, |
|
"logits/rejected": -1.3221884965896606, |
|
"logps/chosen": -430.3058166503906, |
|
"logps/rejected": -560.8129272460938, |
|
"loss": 0.4847, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8127937316894531, |
|
"rewards/margins": 1.4228136539459229, |
|
"rewards/rejected": -3.235607624053955, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.28310940499040305, |
|
"grad_norm": 58.90294230523504, |
|
"learning_rate": 4.5066732680870734e-07, |
|
"logits/chosen": -1.2605488300323486, |
|
"logits/rejected": -1.371800422668457, |
|
"logps/chosen": -394.3585205078125, |
|
"logps/rejected": -516.7437744140625, |
|
"loss": 0.4532, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.4103697538375854, |
|
"rewards/margins": 1.6034168004989624, |
|
"rewards/rejected": -3.0137863159179688, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.28550863723608444, |
|
"grad_norm": 63.52046207494392, |
|
"learning_rate": 4.494114882192862e-07, |
|
"logits/chosen": -1.1947085857391357, |
|
"logits/rejected": -1.2123124599456787, |
|
"logps/chosen": -448.3394470214844, |
|
"logps/rejected": -599.2200927734375, |
|
"loss": 0.4769, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9440696239471436, |
|
"rewards/margins": 1.7676725387573242, |
|
"rewards/rejected": -3.711742877960205, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.28790786948176583, |
|
"grad_norm": 39.36667987271267, |
|
"learning_rate": 4.4814166170621735e-07, |
|
"logits/chosen": -1.3341089487075806, |
|
"logits/rejected": -1.3843356370925903, |
|
"logps/chosen": -467.9960021972656, |
|
"logps/rejected": -570.3829956054688, |
|
"loss": 0.4999, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.193697452545166, |
|
"rewards/margins": 1.253821611404419, |
|
"rewards/rejected": -3.447518825531006, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2903071017274472, |
|
"grad_norm": 87.46450239350362, |
|
"learning_rate": 4.468579363427858e-07, |
|
"logits/chosen": -1.2197396755218506, |
|
"logits/rejected": -1.2728745937347412, |
|
"logps/chosen": -416.8896484375, |
|
"logps/rejected": -539.3204345703125, |
|
"loss": 0.4654, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.5721436738967896, |
|
"rewards/margins": 1.510520339012146, |
|
"rewards/rejected": -3.0826640129089355, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2927063339731286, |
|
"grad_norm": 51.78500104445991, |
|
"learning_rate": 4.4556040217722555e-07, |
|
"logits/chosen": -1.3058878183364868, |
|
"logits/rejected": -1.25492262840271, |
|
"logps/chosen": -362.447509765625, |
|
"logps/rejected": -506.99200439453125, |
|
"loss": 0.4799, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2098300457000732, |
|
"rewards/margins": 1.2542223930358887, |
|
"rewards/rejected": -2.464052200317383, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.29510556621880996, |
|
"grad_norm": 56.76563538419931, |
|
"learning_rate": 4.442491502264033e-07, |
|
"logits/chosen": -1.2897526025772095, |
|
"logits/rejected": -1.2957372665405273, |
|
"logps/chosen": -358.2672424316406, |
|
"logps/rejected": -424.9501953125, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.3041239976882935, |
|
"rewards/margins": 0.8152852058410645, |
|
"rewards/rejected": -2.1194090843200684, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.29750479846449135, |
|
"grad_norm": 37.17813817407036, |
|
"learning_rate": 4.429242724694338e-07, |
|
"logits/chosen": -1.4048488140106201, |
|
"logits/rejected": -1.3725817203521729, |
|
"logps/chosen": -374.0482482910156, |
|
"logps/rejected": -543.1328125, |
|
"loss": 0.4712, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2972534894943237, |
|
"rewards/margins": 1.5613895654678345, |
|
"rewards/rejected": -2.858642816543579, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.29990403071017274, |
|
"grad_norm": 121.14546355660654, |
|
"learning_rate": 4.4158586184122817e-07, |
|
"logits/chosen": -1.3434243202209473, |
|
"logits/rejected": -1.4299046993255615, |
|
"logps/chosen": -455.94219970703125, |
|
"logps/rejected": -574.8892822265625, |
|
"loss": 0.4767, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.651075005531311, |
|
"rewards/margins": 1.5524400472640991, |
|
"rewards/rejected": -3.203514814376831, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.30230326295585414, |
|
"grad_norm": 58.51061462804094, |
|
"learning_rate": 4.4023401222597443e-07, |
|
"logits/chosen": -1.2570745944976807, |
|
"logits/rejected": -1.3877769708633423, |
|
"logps/chosen": -464.9574279785156, |
|
"logps/rejected": -576.7808837890625, |
|
"loss": 0.503, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9455432891845703, |
|
"rewards/margins": 1.3386828899383545, |
|
"rewards/rejected": -3.284226179122925, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.30470249520153553, |
|
"grad_norm": 50.8382185022156, |
|
"learning_rate": 4.3886881845055235e-07, |
|
"logits/chosen": -1.3191502094268799, |
|
"logits/rejected": -1.4105967283248901, |
|
"logps/chosen": -372.0560302734375, |
|
"logps/rejected": -600.4634399414062, |
|
"loss": 0.4746, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2365124225616455, |
|
"rewards/margins": 2.38405179977417, |
|
"rewards/rejected": -3.6205639839172363, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.30710172744721687, |
|
"grad_norm": 38.93701739946284, |
|
"learning_rate": 4.374903762778814e-07, |
|
"logits/chosen": -1.4359400272369385, |
|
"logits/rejected": -1.4567973613739014, |
|
"logps/chosen": -475.7760314941406, |
|
"logps/rejected": -609.7572021484375, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.182716131210327, |
|
"rewards/margins": 1.6052064895629883, |
|
"rewards/rejected": -3.7879226207733154, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.30950095969289826, |
|
"grad_norm": 36.62047718195085, |
|
"learning_rate": 4.3609878240020356e-07, |
|
"logits/chosen": -1.2985656261444092, |
|
"logits/rejected": -1.4006609916687012, |
|
"logps/chosen": -532.8348999023438, |
|
"logps/rejected": -629.5772705078125, |
|
"loss": 0.5078, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.3774900436401367, |
|
"rewards/margins": 1.5861461162567139, |
|
"rewards/rejected": -3.9636359214782715, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.31190019193857965, |
|
"grad_norm": 44.56202560175983, |
|
"learning_rate": 4.346941344323005e-07, |
|
"logits/chosen": -1.3752353191375732, |
|
"logits/rejected": -1.4714699983596802, |
|
"logps/chosen": -441.88787841796875, |
|
"logps/rejected": -458.97039794921875, |
|
"loss": 0.525, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.8622640371322632, |
|
"rewards/margins": 0.7467669248580933, |
|
"rewards/rejected": -2.6090309619903564, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.31429942418426104, |
|
"grad_norm": 55.01759944637494, |
|
"learning_rate": 4.332765309046467e-07, |
|
"logits/chosen": -1.4191035032272339, |
|
"logits/rejected": -1.42724609375, |
|
"logps/chosen": -423.49676513671875, |
|
"logps/rejected": -555.864013671875, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.615816354751587, |
|
"rewards/margins": 1.7208226919174194, |
|
"rewards/rejected": -3.336638927459717, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.31669865642994244, |
|
"grad_norm": 57.373160721067336, |
|
"learning_rate": 4.3184607125649754e-07, |
|
"logits/chosen": -1.3222754001617432, |
|
"logits/rejected": -1.3755613565444946, |
|
"logps/chosen": -435.70098876953125, |
|
"logps/rejected": -632.2313232421875, |
|
"loss": 0.5046, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4980878829956055, |
|
"rewards/margins": 1.8624553680419922, |
|
"rewards/rejected": -3.3605434894561768, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3190978886756238, |
|
"grad_norm": 41.42793793737372, |
|
"learning_rate": 4.304028558289141e-07, |
|
"logits/chosen": -1.4451624155044556, |
|
"logits/rejected": -1.4865562915802002, |
|
"logps/chosen": -459.1952209472656, |
|
"logps/rejected": -624.0546875, |
|
"loss": 0.45, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.689760446548462, |
|
"rewards/margins": 1.864870309829712, |
|
"rewards/rejected": -3.5546302795410156, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.32149712092130517, |
|
"grad_norm": 73.49052120855735, |
|
"learning_rate": 4.28946985857725e-07, |
|
"logits/chosen": -1.528925895690918, |
|
"logits/rejected": -1.5248639583587646, |
|
"logps/chosen": -477.422119140625, |
|
"logps/rejected": -631.7359619140625, |
|
"loss": 0.4778, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0255789756774902, |
|
"rewards/margins": 1.6332505941390991, |
|
"rewards/rejected": -3.658829927444458, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.32389635316698656, |
|
"grad_norm": 40.76124275734906, |
|
"learning_rate": 4.2747856346642445e-07, |
|
"logits/chosen": -1.4490742683410645, |
|
"logits/rejected": -1.4240951538085938, |
|
"logps/chosen": -389.1294250488281, |
|
"logps/rejected": -492.74932861328125, |
|
"loss": 0.4236, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6151390075683594, |
|
"rewards/margins": 1.2310636043548584, |
|
"rewards/rejected": -2.8462026119232178, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.32629558541266795, |
|
"grad_norm": 56.780023366402574, |
|
"learning_rate": 4.2599769165900933e-07, |
|
"logits/chosen": -1.4480865001678467, |
|
"logits/rejected": -1.4879968166351318, |
|
"logps/chosen": -458.68988037109375, |
|
"logps/rejected": -565.7777099609375, |
|
"loss": 0.4956, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.167175054550171, |
|
"rewards/margins": 1.2777191400527954, |
|
"rewards/rejected": -3.4448940753936768, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.32869481765834935, |
|
"grad_norm": 36.1346588520156, |
|
"learning_rate": 4.245044743127535e-07, |
|
"logits/chosen": -1.5145995616912842, |
|
"logits/rejected": -1.4562435150146484, |
|
"logps/chosen": -433.6683044433594, |
|
"logps/rejected": -546.98193359375, |
|
"loss": 0.4736, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7565805912017822, |
|
"rewards/margins": 1.206705093383789, |
|
"rewards/rejected": -2.9632861614227295, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3310940499040307, |
|
"grad_norm": 34.079682299668946, |
|
"learning_rate": 4.229990161709214e-07, |
|
"logits/chosen": -1.3262040615081787, |
|
"logits/rejected": -1.188719630241394, |
|
"logps/chosen": -356.0837097167969, |
|
"logps/rejected": -541.5905151367188, |
|
"loss": 0.4866, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.2393875122070312, |
|
"rewards/margins": 1.6804568767547607, |
|
"rewards/rejected": -2.919844627380371, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3334932821497121, |
|
"grad_norm": 37.6846699410638, |
|
"learning_rate": 4.214814228354204e-07, |
|
"logits/chosen": -1.337536334991455, |
|
"logits/rejected": -1.3587877750396729, |
|
"logps/chosen": -431.2080078125, |
|
"logps/rejected": -660.2830810546875, |
|
"loss": 0.4482, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.5639148950576782, |
|
"rewards/margins": 2.4401581287384033, |
|
"rewards/rejected": -4.004073143005371, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.33589251439539347, |
|
"grad_norm": 44.17494049898159, |
|
"learning_rate": 4.1995180075939375e-07, |
|
"logits/chosen": -1.51606023311615, |
|
"logits/rejected": -1.48744535446167, |
|
"logps/chosen": -444.0301208496094, |
|
"logps/rejected": -591.0535888671875, |
|
"loss": 0.449, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.744881272315979, |
|
"rewards/margins": 1.708705186843872, |
|
"rewards/rejected": -3.4535861015319824, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.33829174664107486, |
|
"grad_norm": 41.73324787992279, |
|
"learning_rate": 4.1841025723975297e-07, |
|
"logits/chosen": -1.3268964290618896, |
|
"logits/rejected": -1.3524177074432373, |
|
"logps/chosen": -445.13232421875, |
|
"logps/rejected": -646.4071044921875, |
|
"loss": 0.4661, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6669384241104126, |
|
"rewards/margins": 2.2194371223449707, |
|
"rewards/rejected": -3.8863754272460938, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.34069097888675626, |
|
"grad_norm": 80.91305832361327, |
|
"learning_rate": 4.168569004096516e-07, |
|
"logits/chosen": -1.341691017150879, |
|
"logits/rejected": -1.3161810636520386, |
|
"logps/chosen": -409.152099609375, |
|
"logps/rejected": -593.16845703125, |
|
"loss": 0.4624, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7553116083145142, |
|
"rewards/margins": 1.7082653045654297, |
|
"rewards/rejected": -3.4635767936706543, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.3430902111324376, |
|
"grad_norm": 50.725033498599586, |
|
"learning_rate": 4.152918392308997e-07, |
|
"logits/chosen": -1.546785831451416, |
|
"logits/rejected": -1.5451892614364624, |
|
"logps/chosen": -470.802734375, |
|
"logps/rejected": -639.1434326171875, |
|
"loss": 0.4797, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.1750898361206055, |
|
"rewards/margins": 1.7993156909942627, |
|
"rewards/rejected": -3.974405288696289, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.345489443378119, |
|
"grad_norm": 85.40295220916695, |
|
"learning_rate": 4.137151834863213e-07, |
|
"logits/chosen": -1.418149709701538, |
|
"logits/rejected": -1.3302658796310425, |
|
"logps/chosen": -490.89776611328125, |
|
"logps/rejected": -763.4044799804688, |
|
"loss": 0.5091, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.5681164264678955, |
|
"rewards/margins": 2.4175021648406982, |
|
"rewards/rejected": -4.985618591308594, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3478886756238004, |
|
"grad_norm": 49.948399938173715, |
|
"learning_rate": 4.121270437720526e-07, |
|
"logits/chosen": -1.1961567401885986, |
|
"logits/rejected": -1.1831985712051392, |
|
"logps/chosen": -395.45294189453125, |
|
"logps/rejected": -528.1370849609375, |
|
"loss": 0.4953, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.9191783666610718, |
|
"rewards/margins": 0.9305855631828308, |
|
"rewards/rejected": -2.849764108657837, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3502879078694818, |
|
"grad_norm": 35.580564672474935, |
|
"learning_rate": 4.105275314897852e-07, |
|
"logits/chosen": -1.3679462671279907, |
|
"logits/rejected": -1.3246995210647583, |
|
"logps/chosen": -401.3337707519531, |
|
"logps/rejected": -686.9393920898438, |
|
"loss": 0.469, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7229747772216797, |
|
"rewards/margins": 2.585207462310791, |
|
"rewards/rejected": -4.308182239532471, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.35268714011516317, |
|
"grad_norm": 43.685991160210406, |
|
"learning_rate": 4.089167588389508e-07, |
|
"logits/chosen": -1.1158957481384277, |
|
"logits/rejected": -1.230369210243225, |
|
"logps/chosen": -547.0029907226562, |
|
"logps/rejected": -697.0067138671875, |
|
"loss": 0.4694, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.114428758621216, |
|
"rewards/margins": 1.9506731033325195, |
|
"rewards/rejected": -4.065101146697998, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.3550863723608445, |
|
"grad_norm": 66.37356591019706, |
|
"learning_rate": 4.072948388088515e-07, |
|
"logits/chosen": -1.233522891998291, |
|
"logits/rejected": -1.276780366897583, |
|
"logps/chosen": -492.72845458984375, |
|
"logps/rejected": -658.1353149414062, |
|
"loss": 0.4829, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.2197484970092773, |
|
"rewards/margins": 1.6216094493865967, |
|
"rewards/rejected": -3.841358184814453, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.3574856046065259, |
|
"grad_norm": 69.97901799878963, |
|
"learning_rate": 4.056618851707334e-07, |
|
"logits/chosen": -1.2373313903808594, |
|
"logits/rejected": -1.3132033348083496, |
|
"logps/chosen": -457.95831298828125, |
|
"logps/rejected": -695.0521240234375, |
|
"loss": 0.4173, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.7973763942718506, |
|
"rewards/margins": 2.393643379211426, |
|
"rewards/rejected": -4.191019535064697, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.3598848368522073, |
|
"grad_norm": 48.587428711970404, |
|
"learning_rate": 4.0401801246980675e-07, |
|
"logits/chosen": -1.3571937084197998, |
|
"logits/rejected": -1.4006597995758057, |
|
"logps/chosen": -463.6883239746094, |
|
"logps/rejected": -594.7393798828125, |
|
"loss": 0.4892, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.4648189544677734, |
|
"rewards/margins": 1.542531132698059, |
|
"rewards/rejected": -4.007349491119385, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3622840690978887, |
|
"grad_norm": 38.57207095495995, |
|
"learning_rate": 4.0236333601721043e-07, |
|
"logits/chosen": -1.3310314416885376, |
|
"logits/rejected": -1.2778218984603882, |
|
"logps/chosen": -455.7976989746094, |
|
"logps/rejected": -558.8304443359375, |
|
"loss": 0.5111, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.769777536392212, |
|
"rewards/margins": 0.9363303184509277, |
|
"rewards/rejected": -2.7061076164245605, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.3646833013435701, |
|
"grad_norm": 37.96256839231154, |
|
"learning_rate": 4.0069797188192364e-07, |
|
"logits/chosen": -1.203385591506958, |
|
"logits/rejected": -1.1847422122955322, |
|
"logps/chosen": -462.11865234375, |
|
"logps/rejected": -606.5435791015625, |
|
"loss": 0.4851, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7279574871063232, |
|
"rewards/margins": 1.8359229564666748, |
|
"rewards/rejected": -3.563880443572998, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.3670825335892514, |
|
"grad_norm": 44.348458391464845, |
|
"learning_rate": 3.9902203688262417e-07, |
|
"logits/chosen": -1.205517292022705, |
|
"logits/rejected": -1.2756140232086182, |
|
"logps/chosen": -385.13153076171875, |
|
"logps/rejected": -469.98858642578125, |
|
"loss": 0.4599, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.2476422786712646, |
|
"rewards/margins": 0.9903982877731323, |
|
"rewards/rejected": -2.2380406856536865, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.3694817658349328, |
|
"grad_norm": 69.63434467808052, |
|
"learning_rate": 3.9733564857949365e-07, |
|
"logits/chosen": -1.2490085363388062, |
|
"logits/rejected": -1.3441739082336426, |
|
"logps/chosen": -466.0321350097656, |
|
"logps/rejected": -585.1414794921875, |
|
"loss": 0.4321, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6616054773330688, |
|
"rewards/margins": 1.6084191799163818, |
|
"rewards/rejected": -3.270024538040161, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.3718809980806142, |
|
"grad_norm": 61.067944902480086, |
|
"learning_rate": 3.9563892526597177e-07, |
|
"logits/chosen": -1.4084941148757935, |
|
"logits/rejected": -1.3332915306091309, |
|
"logps/chosen": -368.38494873046875, |
|
"logps/rejected": -485.837890625, |
|
"loss": 0.4421, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.4880536794662476, |
|
"rewards/margins": 0.7631991505622864, |
|
"rewards/rejected": -2.2512528896331787, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.3742802303262956, |
|
"grad_norm": 42.61893096692006, |
|
"learning_rate": 3.9393198596045795e-07, |
|
"logits/chosen": -1.30875563621521, |
|
"logits/rejected": -1.2471106052398682, |
|
"logps/chosen": -410.9712829589844, |
|
"logps/rejected": -568.45751953125, |
|
"loss": 0.5049, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7241986989974976, |
|
"rewards/margins": 1.4812783002853394, |
|
"rewards/rejected": -3.205477237701416, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.376679462571977, |
|
"grad_norm": 31.366095609512197, |
|
"learning_rate": 3.922149503979628e-07, |
|
"logits/chosen": -1.1689096689224243, |
|
"logits/rejected": -1.1878143548965454, |
|
"logps/chosen": -481.097900390625, |
|
"logps/rejected": -785.8397827148438, |
|
"loss": 0.4457, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.0780715942382812, |
|
"rewards/margins": 3.0227134227752686, |
|
"rewards/rejected": -5.100784778594971, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.3790786948176583, |
|
"grad_norm": 46.590501351524594, |
|
"learning_rate": 3.904879390217095e-07, |
|
"logits/chosen": -1.2587759494781494, |
|
"logits/rejected": -1.3025065660476685, |
|
"logps/chosen": -419.4425354003906, |
|
"logps/rejected": -535.4026489257812, |
|
"loss": 0.458, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6549510955810547, |
|
"rewards/margins": 1.389463186264038, |
|
"rewards/rejected": -3.0444140434265137, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3814779270633397, |
|
"grad_norm": 74.73243898734299, |
|
"learning_rate": 3.8875107297468463e-07, |
|
"logits/chosen": -1.2635529041290283, |
|
"logits/rejected": -1.2022044658660889, |
|
"logps/chosen": -391.54791259765625, |
|
"logps/rejected": -712.3580322265625, |
|
"loss": 0.503, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.5201681852340698, |
|
"rewards/margins": 2.7511065006256104, |
|
"rewards/rejected": -4.271274566650391, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.3838771593090211, |
|
"grad_norm": 60.47812298444817, |
|
"learning_rate": 3.87004474091141e-07, |
|
"logits/chosen": -1.1937386989593506, |
|
"logits/rejected": -1.223789930343628, |
|
"logps/chosen": -374.3924255371094, |
|
"logps/rejected": -540.607421875, |
|
"loss": 0.4657, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5437977313995361, |
|
"rewards/margins": 1.513298749923706, |
|
"rewards/rejected": -3.0570967197418213, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3862763915547025, |
|
"grad_norm": 39.1187908744064, |
|
"learning_rate": 3.8524826488805114e-07, |
|
"logits/chosen": -1.4063940048217773, |
|
"logits/rejected": -1.3802636861801147, |
|
"logps/chosen": -486.917724609375, |
|
"logps/rejected": -567.9946899414062, |
|
"loss": 0.543, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0151114463806152, |
|
"rewards/margins": 1.2625728845596313, |
|
"rewards/rejected": -3.2776846885681152, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.3886756238003839, |
|
"grad_norm": 53.71098690399445, |
|
"learning_rate": 3.834825685565133e-07, |
|
"logits/chosen": -1.313389778137207, |
|
"logits/rejected": -1.4078295230865479, |
|
"logps/chosen": -371.45654296875, |
|
"logps/rejected": -425.3412170410156, |
|
"loss": 0.4473, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.3473589420318604, |
|
"rewards/margins": 0.8928249478340149, |
|
"rewards/rejected": -2.2401843070983887, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.39107485604606523, |
|
"grad_norm": 107.17695988171393, |
|
"learning_rate": 3.8170750895311007e-07, |
|
"logits/chosen": -1.2148559093475342, |
|
"logits/rejected": -1.1752598285675049, |
|
"logps/chosen": -438.32232666015625, |
|
"logps/rejected": -568.3878173828125, |
|
"loss": 0.4171, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.5638713836669922, |
|
"rewards/margins": 1.5277128219604492, |
|
"rewards/rejected": -3.0915839672088623, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.3934740882917466, |
|
"grad_norm": 37.15180787425003, |
|
"learning_rate": 3.7992321059122045e-07, |
|
"logits/chosen": -1.1769952774047852, |
|
"logits/rejected": -1.300578236579895, |
|
"logps/chosen": -472.325439453125, |
|
"logps/rejected": -621.8825073242188, |
|
"loss": 0.4885, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2215113639831543, |
|
"rewards/margins": 1.6850610971450806, |
|
"rewards/rejected": -3.9065728187561035, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.395873320537428, |
|
"grad_norm": 43.17139372813891, |
|
"learning_rate": 3.7812979863228576e-07, |
|
"logits/chosen": -1.3489793539047241, |
|
"logits/rejected": -1.3900834321975708, |
|
"logps/chosen": -443.6715393066406, |
|
"logps/rejected": -586.6777954101562, |
|
"loss": 0.4476, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2688426971435547, |
|
"rewards/margins": 1.326430320739746, |
|
"rewards/rejected": -3.59527325630188, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3982725527831094, |
|
"grad_norm": 46.141900540489175, |
|
"learning_rate": 3.763273988770296e-07, |
|
"logits/chosen": -1.3089871406555176, |
|
"logits/rejected": -1.3825414180755615, |
|
"logps/chosen": -422.4200744628906, |
|
"logps/rejected": -581.9949340820312, |
|
"loss": 0.4636, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8401399850845337, |
|
"rewards/margins": 1.5839296579360962, |
|
"rewards/rejected": -3.42406964302063, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.4006717850287908, |
|
"grad_norm": 62.372203609183686, |
|
"learning_rate": 3.7451613775663405e-07, |
|
"logits/chosen": -1.2362799644470215, |
|
"logits/rejected": -1.1603913307189941, |
|
"logps/chosen": -412.76837158203125, |
|
"logps/rejected": -674.3446655273438, |
|
"loss": 0.4667, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.710687279701233, |
|
"rewards/margins": 2.6008946895599365, |
|
"rewards/rejected": -4.311581611633301, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.40307101727447214, |
|
"grad_norm": 64.9643167880585, |
|
"learning_rate": 3.726961423238706e-07, |
|
"logits/chosen": -1.3476378917694092, |
|
"logits/rejected": -1.3207610845565796, |
|
"logps/chosen": -394.1864013671875, |
|
"logps/rejected": -580.0813598632812, |
|
"loss": 0.4791, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.6723476648330688, |
|
"rewards/margins": 1.6961578130722046, |
|
"rewards/rejected": -3.3685054779052734, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.40547024952015354, |
|
"grad_norm": 49.88660013825801, |
|
"learning_rate": 3.708675402441882e-07, |
|
"logits/chosen": -1.1998610496520996, |
|
"logits/rejected": -1.35236394405365, |
|
"logps/chosen": -444.1595764160156, |
|
"logps/rejected": -540.0147705078125, |
|
"loss": 0.5023, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6736522912979126, |
|
"rewards/margins": 1.3222843408584595, |
|
"rewards/rejected": -2.995936632156372, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.40786948176583493, |
|
"grad_norm": 77.03784806851208, |
|
"learning_rate": 3.6903045978675775e-07, |
|
"logits/chosen": -1.2468807697296143, |
|
"logits/rejected": -1.2648643255233765, |
|
"logps/chosen": -403.4354553222656, |
|
"logps/rejected": -598.949462890625, |
|
"loss": 0.4621, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6437181234359741, |
|
"rewards/margins": 2.1794071197509766, |
|
"rewards/rejected": -3.823125123977661, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4102687140115163, |
|
"grad_norm": 53.95037172519313, |
|
"learning_rate": 3.6718502981547474e-07, |
|
"logits/chosen": -1.3636986017227173, |
|
"logits/rejected": -1.326259732246399, |
|
"logps/chosen": -409.52935791015625, |
|
"logps/rejected": -571.069091796875, |
|
"loss": 0.467, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.626726508140564, |
|
"rewards/margins": 1.2190231084823608, |
|
"rewards/rejected": -2.845749616622925, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.4126679462571977, |
|
"grad_norm": 49.15860337517488, |
|
"learning_rate": 3.6533137977991986e-07, |
|
"logits/chosen": -1.2063934803009033, |
|
"logits/rejected": -1.215348243713379, |
|
"logps/chosen": -441.303466796875, |
|
"logps/rejected": -561.9154052734375, |
|
"loss": 0.4958, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.6133943796157837, |
|
"rewards/margins": 0.9018635749816895, |
|
"rewards/rejected": -2.5152580738067627, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.41506717850287905, |
|
"grad_norm": 67.07571213677744, |
|
"learning_rate": 3.6346963970627865e-07, |
|
"logits/chosen": -1.2759754657745361, |
|
"logits/rejected": -1.1578261852264404, |
|
"logps/chosen": -399.96307373046875, |
|
"logps/rejected": -558.6627197265625, |
|
"loss": 0.4549, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.611649513244629, |
|
"rewards/margins": 1.4893057346343994, |
|
"rewards/rejected": -3.100955009460449, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.41746641074856045, |
|
"grad_norm": 64.78695085834201, |
|
"learning_rate": 3.615999401882207e-07, |
|
"logits/chosen": -1.5288084745407104, |
|
"logits/rejected": -1.4955217838287354, |
|
"logps/chosen": -473.1421813964844, |
|
"logps/rejected": -715.1536254882812, |
|
"loss": 0.4492, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.391709804534912, |
|
"rewards/margins": 2.3399155139923096, |
|
"rewards/rejected": -4.731625556945801, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.41986564299424184, |
|
"grad_norm": 47.6256598631063, |
|
"learning_rate": 3.597224123777389e-07, |
|
"logits/chosen": -1.4542527198791504, |
|
"logits/rejected": -1.43537175655365, |
|
"logps/chosen": -557.6041870117188, |
|
"logps/rejected": -791.1467895507812, |
|
"loss": 0.4589, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -3.0537285804748535, |
|
"rewards/margins": 2.2443196773529053, |
|
"rewards/rejected": -5.298048973083496, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.42226487523992323, |
|
"grad_norm": 77.93257969251549, |
|
"learning_rate": 3.5783718797595e-07, |
|
"logits/chosen": -1.3806182146072388, |
|
"logits/rejected": -1.4632576704025269, |
|
"logps/chosen": -529.6144409179688, |
|
"logps/rejected": -654.4708862304688, |
|
"loss": 0.4986, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.2454628944396973, |
|
"rewards/margins": 1.7240183353424072, |
|
"rewards/rejected": -3.9694809913635254, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4246641074856046, |
|
"grad_norm": 47.87060495829484, |
|
"learning_rate": 3.559443992238558e-07, |
|
"logits/chosen": -1.3499047756195068, |
|
"logits/rejected": -1.3741114139556885, |
|
"logps/chosen": -404.3805847167969, |
|
"logps/rejected": -626.6046142578125, |
|
"loss": 0.4617, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6187816858291626, |
|
"rewards/margins": 1.976681113243103, |
|
"rewards/rejected": -3.5954627990722656, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.42706333973128596, |
|
"grad_norm": 31.34540043802626, |
|
"learning_rate": 3.540441788930673e-07, |
|
"logits/chosen": -1.341737985610962, |
|
"logits/rejected": -1.3971807956695557, |
|
"logps/chosen": -490.4947814941406, |
|
"logps/rejected": -688.3030395507812, |
|
"loss": 0.4217, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.927219033241272, |
|
"rewards/margins": 2.3758645057678223, |
|
"rewards/rejected": -4.303083419799805, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.42946257197696736, |
|
"grad_norm": 52.184614479528065, |
|
"learning_rate": 3.5213666027649123e-07, |
|
"logits/chosen": -1.334465503692627, |
|
"logits/rejected": -1.4451353549957275, |
|
"logps/chosen": -483.6954040527344, |
|
"logps/rejected": -572.4603271484375, |
|
"loss": 0.4556, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.0889644622802734, |
|
"rewards/margins": 1.2906540632247925, |
|
"rewards/rejected": -3.3796188831329346, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.43186180422264875, |
|
"grad_norm": 62.61854115073099, |
|
"learning_rate": 3.5022197717898017e-07, |
|
"logits/chosen": -1.2022984027862549, |
|
"logits/rejected": -1.3184864521026611, |
|
"logps/chosen": -415.38134765625, |
|
"logps/rejected": -565.1746826171875, |
|
"loss": 0.4032, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.9461984634399414, |
|
"rewards/margins": 1.8992345333099365, |
|
"rewards/rejected": -3.845432996749878, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.43426103646833014, |
|
"grad_norm": 61.20243852328461, |
|
"learning_rate": 3.4830026390794633e-07, |
|
"logits/chosen": -1.3132349252700806, |
|
"logits/rejected": -1.3491175174713135, |
|
"logps/chosen": -539.82666015625, |
|
"logps/rejected": -713.0654907226562, |
|
"loss": 0.4442, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.517554521560669, |
|
"rewards/margins": 2.220135450363159, |
|
"rewards/rejected": -4.737689018249512, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.43666026871401153, |
|
"grad_norm": 37.022962730693074, |
|
"learning_rate": 3.4637165526394104e-07, |
|
"logits/chosen": -1.3220465183258057, |
|
"logits/rejected": -1.3310649394989014, |
|
"logps/chosen": -396.91748046875, |
|
"logps/rejected": -562.5906982421875, |
|
"loss": 0.4541, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.6625458002090454, |
|
"rewards/margins": 1.5693401098251343, |
|
"rewards/rejected": -3.2318859100341797, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.43905950095969287, |
|
"grad_norm": 31.533784042262116, |
|
"learning_rate": 3.4443628653119814e-07, |
|
"logits/chosen": -1.2581579685211182, |
|
"logits/rejected": -1.2321877479553223, |
|
"logps/chosen": -457.6893615722656, |
|
"logps/rejected": -755.5025634765625, |
|
"loss": 0.4958, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8591516017913818, |
|
"rewards/margins": 2.5752387046813965, |
|
"rewards/rejected": -4.434390068054199, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.44145873320537427, |
|
"grad_norm": 74.3602562066919, |
|
"learning_rate": 3.424942934681453e-07, |
|
"logits/chosen": -1.3297505378723145, |
|
"logits/rejected": -1.4661356210708618, |
|
"logps/chosen": -383.6442565917969, |
|
"logps/rejected": -581.6712036132812, |
|
"loss": 0.4591, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4883172512054443, |
|
"rewards/margins": 2.007455587387085, |
|
"rewards/rejected": -3.4957728385925293, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.44385796545105566, |
|
"grad_norm": 75.37888411441581, |
|
"learning_rate": 3.405458122978804e-07, |
|
"logits/chosen": -1.3553504943847656, |
|
"logits/rejected": -1.3567984104156494, |
|
"logps/chosen": -431.16143798828125, |
|
"logps/rejected": -534.2850341796875, |
|
"loss": 0.4234, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.5984207391738892, |
|
"rewards/margins": 1.381314754486084, |
|
"rewards/rejected": -2.9797348976135254, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.44625719769673705, |
|
"grad_norm": 72.84055950942061, |
|
"learning_rate": 3.3859097969861633e-07, |
|
"logits/chosen": -1.2684893608093262, |
|
"logits/rejected": -1.268117070198059, |
|
"logps/chosen": -473.4444274902344, |
|
"logps/rejected": -596.4513549804688, |
|
"loss": 0.4823, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8286316394805908, |
|
"rewards/margins": 1.582054853439331, |
|
"rewards/rejected": -3.410686492919922, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.44865642994241844, |
|
"grad_norm": 50.17662967544575, |
|
"learning_rate": 3.366299327940936e-07, |
|
"logits/chosen": -1.3475382328033447, |
|
"logits/rejected": -1.255838394165039, |
|
"logps/chosen": -461.9693298339844, |
|
"logps/rejected": -676.9766845703125, |
|
"loss": 0.4373, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7682750225067139, |
|
"rewards/margins": 1.9757261276245117, |
|
"rewards/rejected": -3.7440009117126465, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.4510556621880998, |
|
"grad_norm": 41.91903545671121, |
|
"learning_rate": 3.3466280914396117e-07, |
|
"logits/chosen": -1.2820563316345215, |
|
"logits/rejected": -1.2576204538345337, |
|
"logps/chosen": -396.26971435546875, |
|
"logps/rejected": -576.42822265625, |
|
"loss": 0.4575, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6182832717895508, |
|
"rewards/margins": 1.6133058071136475, |
|
"rewards/rejected": -3.2315890789031982, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.4534548944337812, |
|
"grad_norm": 68.00245043430192, |
|
"learning_rate": 3.326897467341281e-07, |
|
"logits/chosen": -1.286210536956787, |
|
"logits/rejected": -1.3600260019302368, |
|
"logps/chosen": -429.225341796875, |
|
"logps/rejected": -622.280029296875, |
|
"loss": 0.4278, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.2270400524139404, |
|
"rewards/margins": 1.820407509803772, |
|
"rewards/rejected": -4.047447681427002, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.45585412667946257, |
|
"grad_norm": 59.680334488165904, |
|
"learning_rate": 3.3071088396708335e-07, |
|
"logits/chosen": -1.330963373184204, |
|
"logits/rejected": -1.283299207687378, |
|
"logps/chosen": -389.97808837890625, |
|
"logps/rejected": -664.4937744140625, |
|
"loss": 0.4647, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8736871480941772, |
|
"rewards/margins": 2.509003162384033, |
|
"rewards/rejected": -4.3826904296875, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.45825335892514396, |
|
"grad_norm": 55.13504798919061, |
|
"learning_rate": 3.2872635965218824e-07, |
|
"logits/chosen": -1.2652981281280518, |
|
"logits/rejected": -1.2861101627349854, |
|
"logps/chosen": -509.47467041015625, |
|
"logps/rejected": -697.8663330078125, |
|
"loss": 0.5082, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.491791009902954, |
|
"rewards/margins": 1.7968419790267944, |
|
"rewards/rejected": -4.288632869720459, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.46065259117082535, |
|
"grad_norm": 52.124591511811545, |
|
"learning_rate": 3.2673631299593905e-07, |
|
"logits/chosen": -1.2162278890609741, |
|
"logits/rejected": -1.3705085515975952, |
|
"logps/chosen": -480.2521057128906, |
|
"logps/rejected": -630.8917846679688, |
|
"loss": 0.4528, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0875420570373535, |
|
"rewards/margins": 1.5945124626159668, |
|
"rewards/rejected": -3.6820545196533203, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.4630518234165067, |
|
"grad_norm": 62.707006736242334, |
|
"learning_rate": 3.247408835922024e-07, |
|
"logits/chosen": -1.3850958347320557, |
|
"logits/rejected": -1.3544895648956299, |
|
"logps/chosen": -555.5938720703125, |
|
"logps/rejected": -746.1630859375, |
|
"loss": 0.4605, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.5104541778564453, |
|
"rewards/margins": 1.8147118091583252, |
|
"rewards/rejected": -4.32516622543335, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.4654510556621881, |
|
"grad_norm": 46.08250458603549, |
|
"learning_rate": 3.2274021141242306e-07, |
|
"logits/chosen": -1.296891212463379, |
|
"logits/rejected": -1.3404098749160767, |
|
"logps/chosen": -482.0558166503906, |
|
"logps/rejected": -644.9270629882812, |
|
"loss": 0.4805, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.2868685722351074, |
|
"rewards/margins": 1.5186541080474854, |
|
"rewards/rejected": -3.805522918701172, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.4678502879078695, |
|
"grad_norm": 69.44491393075363, |
|
"learning_rate": 3.2073443679580613e-07, |
|
"logits/chosen": -1.1887061595916748, |
|
"logits/rejected": -1.2752236127853394, |
|
"logps/chosen": -440.823974609375, |
|
"logps/rejected": -540.7047119140625, |
|
"loss": 0.4653, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7992362976074219, |
|
"rewards/margins": 1.0187088251113892, |
|
"rewards/rejected": -2.8179447650909424, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.47024952015355087, |
|
"grad_norm": 57.153969593846476, |
|
"learning_rate": 3.1872370043947194e-07, |
|
"logits/chosen": -1.402218222618103, |
|
"logits/rejected": -1.4274516105651855, |
|
"logps/chosen": -428.96343994140625, |
|
"logps/rejected": -651.5018310546875, |
|
"loss": 0.3983, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.6342216730117798, |
|
"rewards/margins": 2.3417115211486816, |
|
"rewards/rejected": -3.9759325981140137, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.47264875239923226, |
|
"grad_norm": 64.27887622667613, |
|
"learning_rate": 3.167081433885874e-07, |
|
"logits/chosen": -1.082318902015686, |
|
"logits/rejected": -1.1133878231048584, |
|
"logps/chosen": -543.10498046875, |
|
"logps/rejected": -741.9865112304688, |
|
"loss": 0.4258, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.397563934326172, |
|
"rewards/margins": 1.6185156106948853, |
|
"rewards/rejected": -4.016079902648926, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.4750479846449136, |
|
"grad_norm": 57.701882074535135, |
|
"learning_rate": 3.14687907026472e-07, |
|
"logits/chosen": -1.1284734010696411, |
|
"logits/rejected": -1.2509753704071045, |
|
"logps/chosen": -404.6647033691406, |
|
"logps/rejected": -615.397705078125, |
|
"loss": 0.43, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.8688653707504272, |
|
"rewards/margins": 1.8782680034637451, |
|
"rewards/rejected": -3.747133731842041, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.477447216890595, |
|
"grad_norm": 51.61036006005554, |
|
"learning_rate": 3.126631330646801e-07, |
|
"logits/chosen": -1.193585991859436, |
|
"logits/rejected": -1.2708711624145508, |
|
"logps/chosen": -519.4849853515625, |
|
"logps/rejected": -704.0787963867188, |
|
"loss": 0.4634, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.2691690921783447, |
|
"rewards/margins": 1.7898368835449219, |
|
"rewards/rejected": -4.059006214141846, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.4798464491362764, |
|
"grad_norm": 41.46670714894445, |
|
"learning_rate": 3.1063396353306097e-07, |
|
"logits/chosen": -1.2346079349517822, |
|
"logits/rejected": -1.3723104000091553, |
|
"logps/chosen": -419.3143615722656, |
|
"logps/rejected": -545.783935546875, |
|
"loss": 0.4396, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.60104501247406, |
|
"rewards/margins": 1.641794204711914, |
|
"rewards/rejected": -3.2428393363952637, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4822456813819578, |
|
"grad_norm": 60.7041482335727, |
|
"learning_rate": 3.0860054076979535e-07, |
|
"logits/chosen": -1.3001885414123535, |
|
"logits/rejected": -1.2961161136627197, |
|
"logps/chosen": -490.758544921875, |
|
"logps/rejected": -629.7095947265625, |
|
"loss": 0.4257, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.158540964126587, |
|
"rewards/margins": 1.7136547565460205, |
|
"rewards/rejected": -3.8721961975097656, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.4846449136276392, |
|
"grad_norm": 51.131960110389144, |
|
"learning_rate": 3.065630074114115e-07, |
|
"logits/chosen": -1.3280750513076782, |
|
"logits/rejected": -1.402672290802002, |
|
"logps/chosen": -487.7989196777344, |
|
"logps/rejected": -672.5093994140625, |
|
"loss": 0.4767, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.0603041648864746, |
|
"rewards/margins": 2.2750654220581055, |
|
"rewards/rejected": -4.33536958694458, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.4870441458733205, |
|
"grad_norm": 39.68429607245266, |
|
"learning_rate": 3.0452150638277947e-07, |
|
"logits/chosen": -1.1976875066757202, |
|
"logits/rejected": -1.1391303539276123, |
|
"logps/chosen": -432.50439453125, |
|
"logps/rejected": -571.4925537109375, |
|
"loss": 0.4554, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.130190134048462, |
|
"rewards/margins": 1.2737354040145874, |
|
"rewards/rejected": -3.4039254188537598, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.4894433781190019, |
|
"grad_norm": 32.52999927801439, |
|
"learning_rate": 3.024761808870856e-07, |
|
"logits/chosen": -1.3780122995376587, |
|
"logits/rejected": -1.3512189388275146, |
|
"logps/chosen": -390.6479797363281, |
|
"logps/rejected": -635.6519165039062, |
|
"loss": 0.4069, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.4688472747802734, |
|
"rewards/margins": 2.515817403793335, |
|
"rewards/rejected": -3.9846644401550293, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.4918426103646833, |
|
"grad_norm": 95.60445146391889, |
|
"learning_rate": 3.004271743957875e-07, |
|
"logits/chosen": -1.1661708354949951, |
|
"logits/rejected": -1.1566636562347412, |
|
"logps/chosen": -498.49444580078125, |
|
"logps/rejected": -651.3515625, |
|
"loss": 0.4674, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.443251848220825, |
|
"rewards/margins": 1.215421438217163, |
|
"rewards/rejected": -3.6586735248565674, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4942418426103647, |
|
"grad_norm": 55.47701742913237, |
|
"learning_rate": 2.983746306385499e-07, |
|
"logits/chosen": -1.396315574645996, |
|
"logits/rejected": -1.3816092014312744, |
|
"logps/chosen": -464.2110900878906, |
|
"logps/rejected": -665.644775390625, |
|
"loss": 0.431, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.19048810005188, |
|
"rewards/margins": 1.8881957530975342, |
|
"rewards/rejected": -4.078683853149414, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.4966410748560461, |
|
"grad_norm": 59.85231462052711, |
|
"learning_rate": 2.963186935931628e-07, |
|
"logits/chosen": -1.3054215908050537, |
|
"logits/rejected": -1.2787823677062988, |
|
"logps/chosen": -412.70562744140625, |
|
"logps/rejected": -570.3099365234375, |
|
"loss": 0.4321, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5982519388198853, |
|
"rewards/margins": 1.624089241027832, |
|
"rewards/rejected": -3.2223410606384277, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.4990403071017274, |
|
"grad_norm": 54.133794185753565, |
|
"learning_rate": 2.9425950747544176e-07, |
|
"logits/chosen": -1.2475080490112305, |
|
"logits/rejected": -1.3604786396026611, |
|
"logps/chosen": -532.1666259765625, |
|
"logps/rejected": -762.163818359375, |
|
"loss": 0.4371, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.4116852283477783, |
|
"rewards/margins": 2.56913685798645, |
|
"rewards/rejected": -4.9808220863342285, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5014395393474088, |
|
"grad_norm": 83.6927398487227, |
|
"learning_rate": 2.921972167291119e-07, |
|
"logits/chosen": -1.212083101272583, |
|
"logits/rejected": -1.2536664009094238, |
|
"logps/chosen": -482.11505126953125, |
|
"logps/rejected": -664.6883544921875, |
|
"loss": 0.4412, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0365757942199707, |
|
"rewards/margins": 1.8161745071411133, |
|
"rewards/rejected": -3.852750301361084, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5038387715930902, |
|
"grad_norm": 50.31802122151575, |
|
"learning_rate": 2.9013196601567567e-07, |
|
"logits/chosen": -1.1923916339874268, |
|
"logits/rejected": -1.1941927671432495, |
|
"logps/chosen": -405.5143127441406, |
|
"logps/rejected": -548.7236328125, |
|
"loss": 0.5319, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.5634148120880127, |
|
"rewards/margins": 1.2738431692123413, |
|
"rewards/rejected": -2.8372581005096436, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5062380038387716, |
|
"grad_norm": 56.698876160844584, |
|
"learning_rate": 2.8806390020426555e-07, |
|
"logits/chosen": -1.164459228515625, |
|
"logits/rejected": -1.1488382816314697, |
|
"logps/chosen": -419.48785400390625, |
|
"logps/rejected": -572.8009033203125, |
|
"loss": 0.4367, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.433246374130249, |
|
"rewards/margins": 1.561497449874878, |
|
"rewards/rejected": -2.994743824005127, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.508637236084453, |
|
"grad_norm": 62.14972092087307, |
|
"learning_rate": 2.8599316436148187e-07, |
|
"logits/chosen": -1.3236867189407349, |
|
"logits/rejected": -1.351775884628296, |
|
"logps/chosen": -434.72540283203125, |
|
"logps/rejected": -557.2949829101562, |
|
"loss": 0.4449, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8828712701797485, |
|
"rewards/margins": 1.2600587606430054, |
|
"rewards/rejected": -3.142929792404175, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5110364683301344, |
|
"grad_norm": 48.103685381780075, |
|
"learning_rate": 2.8391990374121723e-07, |
|
"logits/chosen": -1.3166625499725342, |
|
"logits/rejected": -1.3257856369018555, |
|
"logps/chosen": -475.3426818847656, |
|
"logps/rejected": -778.1724243164062, |
|
"loss": 0.421, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.332332134246826, |
|
"rewards/margins": 2.738534450531006, |
|
"rewards/rejected": -5.070866584777832, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5134357005758158, |
|
"grad_norm": 47.162564476256435, |
|
"learning_rate": 2.818442637744669e-07, |
|
"logits/chosen": -1.3116474151611328, |
|
"logits/rejected": -1.3327019214630127, |
|
"logps/chosen": -460.369140625, |
|
"logps/rejected": -660.98388671875, |
|
"loss": 0.4342, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.1553499698638916, |
|
"rewards/margins": 1.9608310461044312, |
|
"rewards/rejected": -4.116180896759033, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5158349328214972, |
|
"grad_norm": 60.64034230881214, |
|
"learning_rate": 2.797663900591284e-07, |
|
"logits/chosen": -1.3147813081741333, |
|
"logits/rejected": -1.386897325515747, |
|
"logps/chosen": -468.63470458984375, |
|
"logps/rejected": -615.6099243164062, |
|
"loss": 0.3904, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.097090244293213, |
|
"rewards/margins": 1.7188762426376343, |
|
"rewards/rejected": -3.8159663677215576, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5182341650671785, |
|
"grad_norm": 66.07767465138748, |
|
"learning_rate": 2.776864283497874e-07, |
|
"logits/chosen": -1.3374742269515991, |
|
"logits/rejected": -1.4205501079559326, |
|
"logps/chosen": -465.52313232421875, |
|
"logps/rejected": -728.1679077148438, |
|
"loss": 0.4107, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.3437561988830566, |
|
"rewards/margins": 2.708853006362915, |
|
"rewards/rejected": -5.052609920501709, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5206333973128598, |
|
"grad_norm": 62.29334430926315, |
|
"learning_rate": 2.756045245474943e-07, |
|
"logits/chosen": -1.261883020401001, |
|
"logits/rejected": -1.2139081954956055, |
|
"logps/chosen": -485.0502014160156, |
|
"logps/rejected": -680.46875, |
|
"loss": 0.4545, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.26891827583313, |
|
"rewards/margins": 1.8356815576553345, |
|
"rewards/rejected": -4.104599475860596, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5230326295585412, |
|
"grad_norm": 41.365181681908616, |
|
"learning_rate": 2.7352082468952977e-07, |
|
"logits/chosen": -1.2522320747375488, |
|
"logits/rejected": -1.301537275314331, |
|
"logps/chosen": -509.38494873046875, |
|
"logps/rejected": -802.3201293945312, |
|
"loss": 0.4662, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.6157939434051514, |
|
"rewards/margins": 2.8169338703155518, |
|
"rewards/rejected": -5.432727336883545, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.5254318618042226, |
|
"grad_norm": 61.71676072813974, |
|
"learning_rate": 2.7143547493916e-07, |
|
"logits/chosen": -1.3673561811447144, |
|
"logits/rejected": -1.3156074285507202, |
|
"logps/chosen": -452.85595703125, |
|
"logps/rejected": -724.8404541015625, |
|
"loss": 0.4678, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.957660436630249, |
|
"rewards/margins": 2.71724271774292, |
|
"rewards/rejected": -4.674903392791748, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.527831094049904, |
|
"grad_norm": 64.09199641901836, |
|
"learning_rate": 2.693486215753853e-07, |
|
"logits/chosen": -1.3113658428192139, |
|
"logits/rejected": -1.3134915828704834, |
|
"logps/chosen": -487.13507080078125, |
|
"logps/rejected": -752.1974487304688, |
|
"loss": 0.436, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.3614611625671387, |
|
"rewards/margins": 2.879239559173584, |
|
"rewards/rejected": -5.240700721740723, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5302303262955854, |
|
"grad_norm": 62.382486148855605, |
|
"learning_rate": 2.6726041098267805e-07, |
|
"logits/chosen": -1.2454713582992554, |
|
"logits/rejected": -1.2938363552093506, |
|
"logps/chosen": -489.5714416503906, |
|
"logps/rejected": -586.869140625, |
|
"loss": 0.4975, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.014014720916748, |
|
"rewards/margins": 1.3383963108062744, |
|
"rewards/rejected": -3.3524117469787598, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5326295585412668, |
|
"grad_norm": 77.82285089480301, |
|
"learning_rate": 2.6517098964071507e-07, |
|
"logits/chosen": -1.3246665000915527, |
|
"logits/rejected": -1.3666447401046753, |
|
"logps/chosen": -430.4461975097656, |
|
"logps/rejected": -537.7572021484375, |
|
"loss": 0.4907, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8055528402328491, |
|
"rewards/margins": 0.8791055679321289, |
|
"rewards/rejected": -2.6846585273742676, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5350287907869482, |
|
"grad_norm": 68.93201264173544, |
|
"learning_rate": 2.630805041141023e-07, |
|
"logits/chosen": -1.4450138807296753, |
|
"logits/rejected": -1.4398632049560547, |
|
"logps/chosen": -390.3777770996094, |
|
"logps/rejected": -676.9429931640625, |
|
"loss": 0.4491, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.6461594104766846, |
|
"rewards/margins": 2.6611924171447754, |
|
"rewards/rejected": -4.307351589202881, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5374280230326296, |
|
"grad_norm": 58.5502482141954, |
|
"learning_rate": 2.609891010420941e-07, |
|
"logits/chosen": -1.4054218530654907, |
|
"logits/rejected": -1.4029279947280884, |
|
"logps/chosen": -484.80145263671875, |
|
"logps/rejected": -684.0780029296875, |
|
"loss": 0.4067, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.152812957763672, |
|
"rewards/margins": 1.9849519729614258, |
|
"rewards/rejected": -4.137764930725098, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.539827255278311, |
|
"grad_norm": 49.966474065126825, |
|
"learning_rate": 2.5889692712830674e-07, |
|
"logits/chosen": -1.2064855098724365, |
|
"logits/rejected": -1.2673299312591553, |
|
"logps/chosen": -412.9921875, |
|
"logps/rejected": -598.0367431640625, |
|
"loss": 0.4303, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8478339910507202, |
|
"rewards/margins": 1.9323060512542725, |
|
"rewards/rejected": -3.7801403999328613, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5422264875239923, |
|
"grad_norm": 78.42556826114057, |
|
"learning_rate": 2.5680412913042843e-07, |
|
"logits/chosen": -1.4747849702835083, |
|
"logits/rejected": -1.426845669746399, |
|
"logps/chosen": -482.0741271972656, |
|
"logps/rejected": -703.7940673828125, |
|
"loss": 0.4473, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.30895733833313, |
|
"rewards/margins": 2.2580885887145996, |
|
"rewards/rejected": -4.567046165466309, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5446257197696737, |
|
"grad_norm": 62.375368028657434, |
|
"learning_rate": 2.5471085384992404e-07, |
|
"logits/chosen": -1.3756840229034424, |
|
"logits/rejected": -1.31130850315094, |
|
"logps/chosen": -460.5741271972656, |
|
"logps/rejected": -753.0440673828125, |
|
"loss": 0.4342, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.089881181716919, |
|
"rewards/margins": 2.9207491874694824, |
|
"rewards/rejected": -5.010630130767822, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5470249520153551, |
|
"grad_norm": 59.8486418535735, |
|
"learning_rate": 2.526172481217381e-07, |
|
"logits/chosen": -1.3878482580184937, |
|
"logits/rejected": -1.3218697309494019, |
|
"logps/chosen": -430.990966796875, |
|
"logps/rejected": -592.5296630859375, |
|
"loss": 0.4811, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.273965358734131, |
|
"rewards/margins": 1.4596675634384155, |
|
"rewards/rejected": -3.733633041381836, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5494241842610365, |
|
"grad_norm": 62.76515485107013, |
|
"learning_rate": 2.5052345880399456e-07, |
|
"logits/chosen": -1.4437415599822998, |
|
"logits/rejected": -1.5084867477416992, |
|
"logps/chosen": -436.0667419433594, |
|
"logps/rejected": -592.7450561523438, |
|
"loss": 0.4273, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0785436630249023, |
|
"rewards/margins": 1.571500539779663, |
|
"rewards/rejected": -3.6500442028045654, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.5518234165067178, |
|
"grad_norm": 58.27430309569641, |
|
"learning_rate": 2.4842963276769555e-07, |
|
"logits/chosen": -1.4130902290344238, |
|
"logits/rejected": -1.3396443128585815, |
|
"logps/chosen": -389.29888916015625, |
|
"logps/rejected": -609.4580078125, |
|
"loss": 0.4542, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.7715704441070557, |
|
"rewards/margins": 1.7805286645889282, |
|
"rewards/rejected": -3.5520987510681152, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5542226487523992, |
|
"grad_norm": 53.972591355193885, |
|
"learning_rate": 2.463359168864189e-07, |
|
"logits/chosen": -1.2576197385787964, |
|
"logits/rejected": -1.4408254623413086, |
|
"logps/chosen": -481.51318359375, |
|
"logps/rejected": -619.8674926757812, |
|
"loss": 0.4782, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8940531015396118, |
|
"rewards/margins": 1.729431390762329, |
|
"rewards/rejected": -3.6234841346740723, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5566218809980806, |
|
"grad_norm": 71.62733220276354, |
|
"learning_rate": 2.4424245802601555e-07, |
|
"logits/chosen": -1.336724042892456, |
|
"logits/rejected": -1.3197355270385742, |
|
"logps/chosen": -375.65155029296875, |
|
"logps/rejected": -558.9928588867188, |
|
"loss": 0.412, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.6036341190338135, |
|
"rewards/margins": 1.2925240993499756, |
|
"rewards/rejected": -2.896157741546631, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.559021113243762, |
|
"grad_norm": 79.75428920809141, |
|
"learning_rate": 2.421494030343072e-07, |
|
"logits/chosen": -1.262160062789917, |
|
"logits/rejected": -1.4333540201187134, |
|
"logps/chosen": -479.90118408203125, |
|
"logps/rejected": -587.5850219726562, |
|
"loss": 0.5374, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.142888069152832, |
|
"rewards/margins": 1.5673768520355225, |
|
"rewards/rejected": -3.7102653980255127, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5614203454894434, |
|
"grad_norm": 67.46659078588667, |
|
"learning_rate": 2.400568987307861e-07, |
|
"logits/chosen": -1.3090261220932007, |
|
"logits/rejected": -1.3993467092514038, |
|
"logps/chosen": -436.4292907714844, |
|
"logps/rejected": -518.08837890625, |
|
"loss": 0.3933, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -2.021019458770752, |
|
"rewards/margins": 0.9995520710945129, |
|
"rewards/rejected": -3.0205719470977783, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5638195777351248, |
|
"grad_norm": 68.75205403037342, |
|
"learning_rate": 2.379650918963156e-07, |
|
"logits/chosen": -1.3889644145965576, |
|
"logits/rejected": -1.376657247543335, |
|
"logps/chosen": -432.72503662109375, |
|
"logps/rejected": -657.9354248046875, |
|
"loss": 0.4304, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.385554075241089, |
|
"rewards/margins": 2.185183048248291, |
|
"rewards/rejected": -4.570737361907959, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5662188099808061, |
|
"grad_norm": 99.03508718649326, |
|
"learning_rate": 2.3587412926283438e-07, |
|
"logits/chosen": -1.3818020820617676, |
|
"logits/rejected": -1.3562251329421997, |
|
"logps/chosen": -541.4228515625, |
|
"logps/rejected": -719.0054931640625, |
|
"loss": 0.4708, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.324052333831787, |
|
"rewards/margins": 2.2292587757110596, |
|
"rewards/rejected": -4.553310871124268, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.5686180422264875, |
|
"grad_norm": 51.35517953676917, |
|
"learning_rate": 2.337841575030642e-07, |
|
"logits/chosen": -1.1950929164886475, |
|
"logits/rejected": -1.216797113418579, |
|
"logps/chosen": -471.62579345703125, |
|
"logps/rejected": -675.9489135742188, |
|
"loss": 0.4223, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9462970495224, |
|
"rewards/margins": 1.9327852725982666, |
|
"rewards/rejected": -3.879082202911377, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.5710172744721689, |
|
"grad_norm": 66.71050035899282, |
|
"learning_rate": 2.316953232202206e-07, |
|
"logits/chosen": -1.3075534105300903, |
|
"logits/rejected": -1.509835958480835, |
|
"logps/chosen": -453.71038818359375, |
|
"logps/rejected": -504.8943786621094, |
|
"loss": 0.4325, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0954437255859375, |
|
"rewards/margins": 1.1868107318878174, |
|
"rewards/rejected": -3.282254457473755, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.5734165067178503, |
|
"grad_norm": 53.41657725702868, |
|
"learning_rate": 2.2960777293772958e-07, |
|
"logits/chosen": -1.2998007535934448, |
|
"logits/rejected": -1.4061708450317383, |
|
"logps/chosen": -413.1690979003906, |
|
"logps/rejected": -623.9481201171875, |
|
"loss": 0.4201, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.010094404220581, |
|
"rewards/margins": 2.3299851417541504, |
|
"rewards/rejected": -4.3400797843933105, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.5758157389635317, |
|
"grad_norm": 67.55796862724411, |
|
"learning_rate": 2.2752165308894974e-07, |
|
"logits/chosen": -1.2547554969787598, |
|
"logits/rejected": -1.2700796127319336, |
|
"logps/chosen": -388.22125244140625, |
|
"logps/rejected": -563.0255126953125, |
|
"loss": 0.4317, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9077174663543701, |
|
"rewards/margins": 1.8513762950897217, |
|
"rewards/rejected": -3.75909423828125, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5782149712092131, |
|
"grad_norm": 53.06300597839346, |
|
"learning_rate": 2.254371100069005e-07, |
|
"logits/chosen": -1.2128334045410156, |
|
"logits/rejected": -1.1340278387069702, |
|
"logps/chosen": -400.8592529296875, |
|
"logps/rejected": -608.3983764648438, |
|
"loss": 0.3902, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.6949599981307983, |
|
"rewards/margins": 1.897270917892456, |
|
"rewards/rejected": -3.5922305583953857, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5806142034548945, |
|
"grad_norm": 67.83708549754716, |
|
"learning_rate": 2.2335428991399725e-07, |
|
"logits/chosen": -1.2652689218521118, |
|
"logits/rejected": -1.2790223360061646, |
|
"logps/chosen": -512.0337524414062, |
|
"logps/rejected": -827.4010620117188, |
|
"loss": 0.4194, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.9982750415802, |
|
"rewards/margins": 3.151942729949951, |
|
"rewards/rejected": -6.1502180099487305, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.5830134357005758, |
|
"grad_norm": 45.41387584500605, |
|
"learning_rate": 2.2127333891179458e-07, |
|
"logits/chosen": -1.3656853437423706, |
|
"logits/rejected": -1.3812001943588257, |
|
"logps/chosen": -446.84002685546875, |
|
"logps/rejected": -745.3057861328125, |
|
"loss": 0.4553, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.392338275909424, |
|
"rewards/margins": 2.7484543323516846, |
|
"rewards/rejected": -5.140792369842529, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.5854126679462572, |
|
"grad_norm": 110.5756097353505, |
|
"learning_rate": 2.1919440297073782e-07, |
|
"logits/chosen": -1.2845975160598755, |
|
"logits/rejected": -1.3235517740249634, |
|
"logps/chosen": -438.25811767578125, |
|
"logps/rejected": -696.1348876953125, |
|
"loss": 0.4683, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2012147903442383, |
|
"rewards/margins": 2.572181224822998, |
|
"rewards/rejected": -4.773395538330078, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.5878119001919386, |
|
"grad_norm": 54.1923950374701, |
|
"learning_rate": 2.1711762791992368e-07, |
|
"logits/chosen": -1.3080785274505615, |
|
"logits/rejected": -1.344999074935913, |
|
"logps/chosen": -509.382568359375, |
|
"logps/rejected": -613.4410400390625, |
|
"loss": 0.4518, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2509043216705322, |
|
"rewards/margins": 1.362375020980835, |
|
"rewards/rejected": -3.613279342651367, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.5902111324376199, |
|
"grad_norm": 76.8189855205521, |
|
"learning_rate": 2.1504315943687114e-07, |
|
"logits/chosen": -1.190882921218872, |
|
"logits/rejected": -1.1361911296844482, |
|
"logps/chosen": -406.93231201171875, |
|
"logps/rejected": -693.5443725585938, |
|
"loss": 0.4276, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.7856594324111938, |
|
"rewards/margins": 2.4512863159179688, |
|
"rewards/rejected": -4.236945629119873, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.5926103646833013, |
|
"grad_norm": 85.69322529294834, |
|
"learning_rate": 2.1297114303730248e-07, |
|
"logits/chosen": -1.197404146194458, |
|
"logits/rejected": -1.080083966255188, |
|
"logps/chosen": -424.1693420410156, |
|
"logps/rejected": -695.1914672851562, |
|
"loss": 0.4925, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.1172688007354736, |
|
"rewards/margins": 2.2054264545440674, |
|
"rewards/rejected": -4.322694778442383, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.5950095969289827, |
|
"grad_norm": 61.15777752405356, |
|
"learning_rate": 2.1090172406493616e-07, |
|
"logits/chosen": -1.1817920207977295, |
|
"logits/rejected": -1.1163372993469238, |
|
"logps/chosen": -369.00054931640625, |
|
"logps/rejected": -556.2276611328125, |
|
"loss": 0.3871, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.420171856880188, |
|
"rewards/margins": 1.80911386013031, |
|
"rewards/rejected": -3.229285478591919, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.5974088291746641, |
|
"grad_norm": 83.03736161560481, |
|
"learning_rate": 2.0883504768129146e-07, |
|
"logits/chosen": -1.3368021249771118, |
|
"logits/rejected": -1.3336868286132812, |
|
"logps/chosen": -524.5817260742188, |
|
"logps/rejected": -736.2766723632812, |
|
"loss": 0.4307, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.5531392097473145, |
|
"rewards/margins": 2.1611647605895996, |
|
"rewards/rejected": -4.7143049240112305, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.5998080614203455, |
|
"grad_norm": 106.91723764545198, |
|
"learning_rate": 2.0677125885550571e-07, |
|
"logits/chosen": -1.2371995449066162, |
|
"logits/rejected": -1.4159976243972778, |
|
"logps/chosen": -500.94000244140625, |
|
"logps/rejected": -625.9182739257812, |
|
"loss": 0.4678, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.676302433013916, |
|
"rewards/margins": 1.586896300315857, |
|
"rewards/rejected": -4.263198375701904, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6022072936660269, |
|
"grad_norm": 98.24854916731117, |
|
"learning_rate": 2.0471050235416587e-07, |
|
"logits/chosen": -1.1585936546325684, |
|
"logits/rejected": -1.3378689289093018, |
|
"logps/chosen": -570.26806640625, |
|
"logps/rejected": -740.4846801757812, |
|
"loss": 0.4088, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.0679523944854736, |
|
"rewards/margins": 2.1525187492370605, |
|
"rewards/rejected": -5.220470905303955, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6046065259117083, |
|
"grad_norm": 70.50575931793564, |
|
"learning_rate": 2.026529227311532e-07, |
|
"logits/chosen": -1.3343918323516846, |
|
"logits/rejected": -1.3401401042938232, |
|
"logps/chosen": -479.2476501464844, |
|
"logps/rejected": -671.0690307617188, |
|
"loss": 0.4758, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.6294853687286377, |
|
"rewards/margins": 1.830687165260315, |
|
"rewards/rejected": -4.4601731300354, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6070057581573897, |
|
"grad_norm": 64.24239379101434, |
|
"learning_rate": 2.005986643175036e-07, |
|
"logits/chosen": -1.2955334186553955, |
|
"logits/rejected": -1.2314105033874512, |
|
"logps/chosen": -471.40313720703125, |
|
"logps/rejected": -755.314697265625, |
|
"loss": 0.3891, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.193781614303589, |
|
"rewards/margins": 2.968440532684326, |
|
"rewards/rejected": -5.162222862243652, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6094049904030711, |
|
"grad_norm": 80.02360466830004, |
|
"learning_rate": 1.9854787121128328e-07, |
|
"logits/chosen": -1.2517433166503906, |
|
"logits/rejected": -1.4158328771591187, |
|
"logps/chosen": -400.4693908691406, |
|
"logps/rejected": -503.8955993652344, |
|
"loss": 0.4843, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7598693370819092, |
|
"rewards/margins": 1.488473892211914, |
|
"rewards/rejected": -3.2483432292938232, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6118042226487524, |
|
"grad_norm": 41.04039113747286, |
|
"learning_rate": 1.9650068726748106e-07, |
|
"logits/chosen": -1.2276278734207153, |
|
"logits/rejected": -1.341862440109253, |
|
"logps/chosen": -467.55224609375, |
|
"logps/rejected": -600.6633911132812, |
|
"loss": 0.4714, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.9688072204589844, |
|
"rewards/margins": 1.3757946491241455, |
|
"rewards/rejected": -3.34460186958313, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6142034548944337, |
|
"grad_norm": 53.85241430730791, |
|
"learning_rate": 1.9445725608791718e-07, |
|
"logits/chosen": -1.2154093980789185, |
|
"logits/rejected": -1.2377523183822632, |
|
"logps/chosen": -479.86181640625, |
|
"logps/rejected": -831.2470703125, |
|
"loss": 0.4467, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2339017391204834, |
|
"rewards/margins": 3.4522805213928223, |
|
"rewards/rejected": -5.686182975769043, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6166026871401151, |
|
"grad_norm": 63.29432990026096, |
|
"learning_rate": 1.924177210111705e-07, |
|
"logits/chosen": -1.302901029586792, |
|
"logits/rejected": -1.3746470212936401, |
|
"logps/chosen": -419.08673095703125, |
|
"logps/rejected": -662.0726928710938, |
|
"loss": 0.4464, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.846747636795044, |
|
"rewards/margins": 2.3124613761901855, |
|
"rewards/rejected": -4.15920877456665, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6190019193857965, |
|
"grad_norm": 48.25299827375637, |
|
"learning_rate": 1.9038222510252364e-07, |
|
"logits/chosen": -1.290828824043274, |
|
"logits/rejected": -1.2858607769012451, |
|
"logps/chosen": -419.1243591308594, |
|
"logps/rejected": -589.756591796875, |
|
"loss": 0.4235, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.7411692142486572, |
|
"rewards/margins": 1.792761206626892, |
|
"rewards/rejected": -3.533930540084839, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6214011516314779, |
|
"grad_norm": 69.23625636105592, |
|
"learning_rate": 1.883509111439277e-07, |
|
"logits/chosen": -1.2867010831832886, |
|
"logits/rejected": -1.278282880783081, |
|
"logps/chosen": -413.43341064453125, |
|
"logps/rejected": -817.8923950195312, |
|
"loss": 0.4301, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9501148462295532, |
|
"rewards/margins": 3.3637890815734863, |
|
"rewards/rejected": -5.31390380859375, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6238003838771593, |
|
"grad_norm": 76.56766681454758, |
|
"learning_rate": 1.8632392162398665e-07, |
|
"logits/chosen": -1.184441328048706, |
|
"logits/rejected": -1.1581648588180542, |
|
"logps/chosen": -494.22955322265625, |
|
"logps/rejected": -747.8279418945312, |
|
"loss": 0.4215, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.0423431396484375, |
|
"rewards/margins": 2.603577136993408, |
|
"rewards/rejected": -4.6459197998046875, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6261996161228407, |
|
"grad_norm": 62.594952599021646, |
|
"learning_rate": 1.84301398727962e-07, |
|
"logits/chosen": -1.3408887386322021, |
|
"logits/rejected": -1.2393563985824585, |
|
"logps/chosen": -374.20220947265625, |
|
"logps/rejected": -702.144287109375, |
|
"loss": 0.4431, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9351218938827515, |
|
"rewards/margins": 2.950066089630127, |
|
"rewards/rejected": -4.885188102722168, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6285988483685221, |
|
"grad_norm": 95.54656449411624, |
|
"learning_rate": 1.8228348432779966e-07, |
|
"logits/chosen": -1.30184006690979, |
|
"logits/rejected": -1.301501989364624, |
|
"logps/chosen": -444.2120056152344, |
|
"logps/rejected": -676.330078125, |
|
"loss": 0.454, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.1440858840942383, |
|
"rewards/margins": 2.344844341278076, |
|
"rewards/rejected": -4.488929748535156, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6309980806142035, |
|
"grad_norm": 50.932751077914006, |
|
"learning_rate": 1.8027031997217773e-07, |
|
"logits/chosen": -1.4296987056732178, |
|
"logits/rejected": -1.3950811624526978, |
|
"logps/chosen": -546.5574340820312, |
|
"logps/rejected": -892.3458862304688, |
|
"loss": 0.3946, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.1905577182769775, |
|
"rewards/margins": 3.3431007862091064, |
|
"rewards/rejected": -6.533658504486084, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6333973128598849, |
|
"grad_norm": 49.84649756553616, |
|
"learning_rate": 1.7826204687657758e-07, |
|
"logits/chosen": -1.171356201171875, |
|
"logits/rejected": -1.1530375480651855, |
|
"logps/chosen": -500.71783447265625, |
|
"logps/rejected": -566.6802978515625, |
|
"loss": 0.4427, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.245105266571045, |
|
"rewards/margins": 1.0768263339996338, |
|
"rewards/rejected": -3.321931838989258, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6357965451055663, |
|
"grad_norm": 60.344915608566154, |
|
"learning_rate": 1.762588059133781e-07, |
|
"logits/chosen": -1.2368324995040894, |
|
"logits/rejected": -1.367072343826294, |
|
"logps/chosen": -481.23284912109375, |
|
"logps/rejected": -660.7156982421875, |
|
"loss": 0.4512, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.8352482318878174, |
|
"rewards/margins": 2.1524555683135986, |
|
"rewards/rejected": -3.987703800201416, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6381957773512476, |
|
"grad_norm": 50.18340018947516, |
|
"learning_rate": 1.7426073760197406e-07, |
|
"logits/chosen": -1.1454894542694092, |
|
"logits/rejected": -1.0864006280899048, |
|
"logps/chosen": -459.0577087402344, |
|
"logps/rejected": -807.2691650390625, |
|
"loss": 0.4246, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.0700535774230957, |
|
"rewards/margins": 3.1858713626861572, |
|
"rewards/rejected": -5.255925178527832, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6405950095969289, |
|
"grad_norm": 48.64380350571709, |
|
"learning_rate": 1.7226798209891935e-07, |
|
"logits/chosen": -1.1781213283538818, |
|
"logits/rejected": -1.4034216403961182, |
|
"logps/chosen": -472.9505920410156, |
|
"logps/rejected": -646.2312622070312, |
|
"loss": 0.4072, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.156583070755005, |
|
"rewards/margins": 2.2617027759552, |
|
"rewards/rejected": -4.418285369873047, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6429942418426103, |
|
"grad_norm": 70.13615144233104, |
|
"learning_rate": 1.7028067918809535e-07, |
|
"logits/chosen": -1.256744623184204, |
|
"logits/rejected": -1.2579745054244995, |
|
"logps/chosen": -446.1131286621094, |
|
"logps/rejected": -807.1907348632812, |
|
"loss": 0.4259, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.2229795455932617, |
|
"rewards/margins": 3.256349563598633, |
|
"rewards/rejected": -5.4793291091918945, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6453934740882917, |
|
"grad_norm": 90.53079993576371, |
|
"learning_rate": 1.6829896827090584e-07, |
|
"logits/chosen": -1.3745183944702148, |
|
"logits/rejected": -1.4126373529434204, |
|
"logps/chosen": -463.4715881347656, |
|
"logps/rejected": -547.9916381835938, |
|
"loss": 0.4597, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.9967275857925415, |
|
"rewards/margins": 1.1592689752578735, |
|
"rewards/rejected": -3.155996322631836, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.6477927063339731, |
|
"grad_norm": 67.94576160324466, |
|
"learning_rate": 1.6632298835649844e-07, |
|
"logits/chosen": -1.2770214080810547, |
|
"logits/rejected": -1.2181642055511475, |
|
"logps/chosen": -491.872802734375, |
|
"logps/rejected": -741.3040771484375, |
|
"loss": 0.4183, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2546989917755127, |
|
"rewards/margins": 2.2701687812805176, |
|
"rewards/rejected": -4.524867534637451, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6501919385796545, |
|
"grad_norm": 51.51411740739314, |
|
"learning_rate": 1.6435287805201364e-07, |
|
"logits/chosen": -1.42020845413208, |
|
"logits/rejected": -1.3592700958251953, |
|
"logps/chosen": -451.265869140625, |
|
"logps/rejected": -593.9780883789062, |
|
"loss": 0.4433, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.8679488897323608, |
|
"rewards/margins": 1.4463961124420166, |
|
"rewards/rejected": -3.314344882965088, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6525911708253359, |
|
"grad_norm": 50.81929422501917, |
|
"learning_rate": 1.6238877555286207e-07, |
|
"logits/chosen": -1.3951126337051392, |
|
"logits/rejected": -1.3844718933105469, |
|
"logps/chosen": -433.7630920410156, |
|
"logps/rejected": -667.7963256835938, |
|
"loss": 0.3907, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.691265344619751, |
|
"rewards/margins": 2.2821648120880127, |
|
"rewards/rejected": -3.9734299182891846, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.6549904030710173, |
|
"grad_norm": 67.06059702675684, |
|
"learning_rate": 1.60430818633031e-07, |
|
"logits/chosen": -1.1817668676376343, |
|
"logits/rejected": -1.2036738395690918, |
|
"logps/chosen": -421.98236083984375, |
|
"logps/rejected": -603.4003295898438, |
|
"loss": 0.4012, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.750417709350586, |
|
"rewards/margins": 1.8823362588882446, |
|
"rewards/rejected": -3.63275408744812, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6573896353166987, |
|
"grad_norm": 66.36257208214307, |
|
"learning_rate": 1.5847914463541939e-07, |
|
"logits/chosen": -1.3497378826141357, |
|
"logits/rejected": -1.4073352813720703, |
|
"logps/chosen": -385.95751953125, |
|
"logps/rejected": -616.4376220703125, |
|
"loss": 0.3841, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.8801229000091553, |
|
"rewards/margins": 2.074592113494873, |
|
"rewards/rejected": -3.9547152519226074, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6597888675623801, |
|
"grad_norm": 72.43378197120275, |
|
"learning_rate": 1.5653389046220427e-07, |
|
"logits/chosen": -1.3133211135864258, |
|
"logits/rejected": -1.319744348526001, |
|
"logps/chosen": -389.60784912109375, |
|
"logps/rejected": -561.4827880859375, |
|
"loss": 0.489, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.566753625869751, |
|
"rewards/margins": 1.575201392173767, |
|
"rewards/rejected": -3.1419551372528076, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6621880998080614, |
|
"grad_norm": 72.16620019968276, |
|
"learning_rate": 1.545951925652375e-07, |
|
"logits/chosen": -1.2689671516418457, |
|
"logits/rejected": -1.41306734085083, |
|
"logps/chosen": -517.5333862304688, |
|
"logps/rejected": -688.21337890625, |
|
"loss": 0.4433, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.1187641620635986, |
|
"rewards/margins": 2.2521450519561768, |
|
"rewards/rejected": -4.370909214019775, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6645873320537428, |
|
"grad_norm": 82.81608813170432, |
|
"learning_rate": 1.5266318693647423e-07, |
|
"logits/chosen": -1.3315207958221436, |
|
"logits/rejected": -1.3426904678344727, |
|
"logps/chosen": -471.59814453125, |
|
"logps/rejected": -622.7100219726562, |
|
"loss": 0.4119, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.9626314640045166, |
|
"rewards/margins": 1.5937044620513916, |
|
"rewards/rejected": -3.556335926055908, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.6669865642994242, |
|
"grad_norm": 50.06790457176136, |
|
"learning_rate": 1.5073800909843353e-07, |
|
"logits/chosen": -1.2962977886199951, |
|
"logits/rejected": -1.433579683303833, |
|
"logps/chosen": -469.7752990722656, |
|
"logps/rejected": -641.1842041015625, |
|
"loss": 0.4062, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.060004711151123, |
|
"rewards/margins": 2.2752344608306885, |
|
"rewards/rejected": -4.335238933563232, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6693857965451055, |
|
"grad_norm": 51.67259422731343, |
|
"learning_rate": 1.488197940946922e-07, |
|
"logits/chosen": -1.1685657501220703, |
|
"logits/rejected": -1.208673357963562, |
|
"logps/chosen": -441.9027404785156, |
|
"logps/rejected": -606.9600830078125, |
|
"loss": 0.4113, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6688663959503174, |
|
"rewards/margins": 2.1915180683135986, |
|
"rewards/rejected": -3.860384702682495, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.6717850287907869, |
|
"grad_norm": 77.60853331730296, |
|
"learning_rate": 1.4690867648041167e-07, |
|
"logits/chosen": -1.1332224607467651, |
|
"logits/rejected": -1.292792797088623, |
|
"logps/chosen": -453.847412109375, |
|
"logps/rejected": -647.2982177734375, |
|
"loss": 0.4279, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9809976816177368, |
|
"rewards/margins": 2.1735682487487793, |
|
"rewards/rejected": -4.154566287994385, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6741842610364683, |
|
"grad_norm": 66.03903500616863, |
|
"learning_rate": 1.4500479031289987e-07, |
|
"logits/chosen": -1.2553117275238037, |
|
"logits/rejected": -1.3797930479049683, |
|
"logps/chosen": -419.65350341796875, |
|
"logps/rejected": -583.0729370117188, |
|
"loss": 0.4735, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5933611392974854, |
|
"rewards/margins": 1.654476523399353, |
|
"rewards/rejected": -3.247837781906128, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.6765834932821497, |
|
"grad_norm": 42.6539101740705, |
|
"learning_rate": 1.4310826914220747e-07, |
|
"logits/chosen": -1.2346922159194946, |
|
"logits/rejected": -1.3095893859863281, |
|
"logps/chosen": -456.98846435546875, |
|
"logps/rejected": -612.8602294921875, |
|
"loss": 0.459, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.729434609413147, |
|
"rewards/margins": 1.615557074546814, |
|
"rewards/rejected": -3.344991683959961, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.6789827255278311, |
|
"grad_norm": 84.75345605479198, |
|
"learning_rate": 1.412192460017597e-07, |
|
"logits/chosen": -1.3369228839874268, |
|
"logits/rejected": -1.2705490589141846, |
|
"logps/chosen": -474.5061950683594, |
|
"logps/rejected": -645.0938720703125, |
|
"loss": 0.4598, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.3626677989959717, |
|
"rewards/margins": 1.6654049158096313, |
|
"rewards/rejected": -4.028072834014893, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.6813819577735125, |
|
"grad_norm": 67.06503369524259, |
|
"learning_rate": 1.3933785339902504e-07, |
|
"logits/chosen": -1.4095046520233154, |
|
"logits/rejected": -1.2895666360855103, |
|
"logps/chosen": -395.1067199707031, |
|
"logps/rejected": -605.366943359375, |
|
"loss": 0.4617, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8949320316314697, |
|
"rewards/margins": 1.6969797611236572, |
|
"rewards/rejected": -3.591911792755127, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.6837811900191939, |
|
"grad_norm": 39.93390492293781, |
|
"learning_rate": 1.374642233062197e-07, |
|
"logits/chosen": -1.349837064743042, |
|
"logits/rejected": -1.4833858013153076, |
|
"logps/chosen": -487.5069274902344, |
|
"logps/rejected": -665.9603881835938, |
|
"loss": 0.4536, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0987796783447266, |
|
"rewards/margins": 2.1197714805603027, |
|
"rewards/rejected": -4.218550682067871, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6861804222648752, |
|
"grad_norm": 37.8109610801416, |
|
"learning_rate": 1.355984871510511e-07, |
|
"logits/chosen": -1.2923767566680908, |
|
"logits/rejected": -1.2817193269729614, |
|
"logps/chosen": -528.0411376953125, |
|
"logps/rejected": -712.6599731445312, |
|
"loss": 0.3873, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.3655641078948975, |
|
"rewards/margins": 1.8116811513900757, |
|
"rewards/rejected": -4.177245140075684, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.6885796545105566, |
|
"grad_norm": 69.3259789726919, |
|
"learning_rate": 1.3374077580749783e-07, |
|
"logits/chosen": -1.5063129663467407, |
|
"logits/rejected": -1.4583210945129395, |
|
"logps/chosen": -387.3618469238281, |
|
"logps/rejected": -595.7457275390625, |
|
"loss": 0.4364, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9980754852294922, |
|
"rewards/margins": 1.9206987619400024, |
|
"rewards/rejected": -3.918774127960205, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.690978886756238, |
|
"grad_norm": 103.02719824327798, |
|
"learning_rate": 1.3189121958663024e-07, |
|
"logits/chosen": -1.2567538022994995, |
|
"logits/rejected": -1.4682070016860962, |
|
"logps/chosen": -512.067626953125, |
|
"logps/rejected": -630.2479248046875, |
|
"loss": 0.4713, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.413303852081299, |
|
"rewards/margins": 1.4284738302230835, |
|
"rewards/rejected": -3.841777801513672, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.6933781190019194, |
|
"grad_norm": 61.466096297911875, |
|
"learning_rate": 1.3004994822746895e-07, |
|
"logits/chosen": -1.379688024520874, |
|
"logits/rejected": -1.4199955463409424, |
|
"logps/chosen": -412.78369140625, |
|
"logps/rejected": -605.1600341796875, |
|
"loss": 0.433, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.6713438034057617, |
|
"rewards/margins": 1.8622289896011353, |
|
"rewards/rejected": -3.5335726737976074, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.6957773512476008, |
|
"grad_norm": 56.56735513727127, |
|
"learning_rate": 1.2821709088788434e-07, |
|
"logits/chosen": -1.1930772066116333, |
|
"logits/rejected": -1.2361228466033936, |
|
"logps/chosen": -373.34027099609375, |
|
"logps/rejected": -582.1116943359375, |
|
"loss": 0.4406, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.7239477634429932, |
|
"rewards/margins": 2.0639774799346924, |
|
"rewards/rejected": -3.7879252433776855, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6981765834932822, |
|
"grad_norm": 50.93832251487875, |
|
"learning_rate": 1.2639277613553736e-07, |
|
"logits/chosen": -1.447892427444458, |
|
"logits/rejected": -1.4102813005447388, |
|
"logps/chosen": -364.46197509765625, |
|
"logps/rejected": -530.6686401367188, |
|
"loss": 0.446, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5684380531311035, |
|
"rewards/margins": 1.651158094406128, |
|
"rewards/rejected": -3.2195963859558105, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7005758157389635, |
|
"grad_norm": 43.2276930156436, |
|
"learning_rate": 1.2457713193885975e-07, |
|
"logits/chosen": -1.2600657939910889, |
|
"logits/rejected": -1.2159188985824585, |
|
"logps/chosen": -355.7850036621094, |
|
"logps/rejected": -597.0232543945312, |
|
"loss": 0.4108, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7937647104263306, |
|
"rewards/margins": 2.0653727054595947, |
|
"rewards/rejected": -3.859137773513794, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7029750479846449, |
|
"grad_norm": 72.27785496283792, |
|
"learning_rate": 1.2277028565807838e-07, |
|
"logits/chosen": -1.36660635471344, |
|
"logits/rejected": -1.4359194040298462, |
|
"logps/chosen": -425.2757263183594, |
|
"logps/rejected": -566.6820678710938, |
|
"loss": 0.4388, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.7790648937225342, |
|
"rewards/margins": 1.5231965780258179, |
|
"rewards/rejected": -3.3022613525390625, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7053742802303263, |
|
"grad_norm": 72.05535743928472, |
|
"learning_rate": 1.209723640362815e-07, |
|
"logits/chosen": -1.3078200817108154, |
|
"logits/rejected": -1.3207073211669922, |
|
"logps/chosen": -490.8136291503906, |
|
"logps/rejected": -722.05712890625, |
|
"loss": 0.4757, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.272034168243408, |
|
"rewards/margins": 2.4088001251220703, |
|
"rewards/rejected": -4.680834770202637, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7077735124760077, |
|
"grad_norm": 54.476978349000916, |
|
"learning_rate": 1.191834931905277e-07, |
|
"logits/chosen": -1.2736198902130127, |
|
"logits/rejected": -1.28280770778656, |
|
"logps/chosen": -531.6434936523438, |
|
"logps/rejected": -682.0370483398438, |
|
"loss": 0.4566, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.395185947418213, |
|
"rewards/margins": 1.5002753734588623, |
|
"rewards/rejected": -3.895461320877075, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.710172744721689, |
|
"grad_norm": 49.0121212454874, |
|
"learning_rate": 1.1740379860299988e-07, |
|
"logits/chosen": -1.3577029705047607, |
|
"logits/rejected": -1.3682730197906494, |
|
"logps/chosen": -492.86968994140625, |
|
"logps/rejected": -667.6934814453125, |
|
"loss": 0.4496, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.102973461151123, |
|
"rewards/margins": 1.538559913635254, |
|
"rewards/rejected": -3.641533613204956, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.7125719769673704, |
|
"grad_norm": 53.86489014590353, |
|
"learning_rate": 1.1563340511220254e-07, |
|
"logits/chosen": -1.3047479391098022, |
|
"logits/rejected": -1.4146584272384644, |
|
"logps/chosen": -505.7129821777344, |
|
"logps/rejected": -683.0394897460938, |
|
"loss": 0.4351, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.213013172149658, |
|
"rewards/margins": 1.9895169734954834, |
|
"rewards/rejected": -4.202530384063721, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7149712092130518, |
|
"grad_norm": 65.39616806465553, |
|
"learning_rate": 1.1387243690420556e-07, |
|
"logits/chosen": -1.2899211645126343, |
|
"logits/rejected": -1.3665580749511719, |
|
"logps/chosen": -525.3897705078125, |
|
"logps/rejected": -745.2420043945312, |
|
"loss": 0.4695, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.0657598972320557, |
|
"rewards/margins": 2.3017890453338623, |
|
"rewards/rejected": -4.367548942565918, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7173704414587332, |
|
"grad_norm": 109.17338886735457, |
|
"learning_rate": 1.1212101750393235e-07, |
|
"logits/chosen": -1.4184496402740479, |
|
"logits/rejected": -1.4868519306182861, |
|
"logps/chosen": -459.9681701660156, |
|
"logps/rejected": -679.7008666992188, |
|
"loss": 0.4322, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.0537400245666504, |
|
"rewards/margins": 2.383260726928711, |
|
"rewards/rejected": -4.4370012283325195, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7197696737044146, |
|
"grad_norm": 55.756753852863056, |
|
"learning_rate": 1.1037926976649562e-07, |
|
"logits/chosen": -1.3477023839950562, |
|
"logits/rejected": -1.3794200420379639, |
|
"logps/chosen": -488.9522399902344, |
|
"logps/rejected": -725.9136352539062, |
|
"loss": 0.4548, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.273102283477783, |
|
"rewards/margins": 2.1587283611297607, |
|
"rewards/rejected": -4.431830406188965, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.722168905950096, |
|
"grad_norm": 68.80775812348986, |
|
"learning_rate": 1.0864731586857936e-07, |
|
"logits/chosen": -1.3036201000213623, |
|
"logits/rejected": -1.453201174736023, |
|
"logps/chosen": -474.9127502441406, |
|
"logps/rejected": -661.4716186523438, |
|
"loss": 0.4176, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.9301677942276, |
|
"rewards/margins": 2.1636157035827637, |
|
"rewards/rejected": -4.093783855438232, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7245681381957774, |
|
"grad_norm": 79.25153161336767, |
|
"learning_rate": 1.0692527729986839e-07, |
|
"logits/chosen": -1.2558592557907104, |
|
"logits/rejected": -1.3554977178573608, |
|
"logps/chosen": -466.6205139160156, |
|
"logps/rejected": -644.95654296875, |
|
"loss": 0.3843, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.134385347366333, |
|
"rewards/margins": 2.0006089210510254, |
|
"rewards/rejected": -4.1349945068359375, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7269673704414588, |
|
"grad_norm": 86.56858666466508, |
|
"learning_rate": 1.0521327485452692e-07, |
|
"logits/chosen": -1.3626697063446045, |
|
"logits/rejected": -1.4285178184509277, |
|
"logps/chosen": -469.54327392578125, |
|
"logps/rejected": -667.6971435546875, |
|
"loss": 0.4287, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.349073886871338, |
|
"rewards/margins": 2.126603841781616, |
|
"rewards/rejected": -4.475677967071533, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7293666026871402, |
|
"grad_norm": 70.38320684395659, |
|
"learning_rate": 1.0351142862272468e-07, |
|
"logits/chosen": -1.273447036743164, |
|
"logits/rejected": -1.430558204650879, |
|
"logps/chosen": -425.697998046875, |
|
"logps/rejected": -738.443603515625, |
|
"loss": 0.4349, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.1780591011047363, |
|
"rewards/margins": 3.248278856277466, |
|
"rewards/rejected": -5.426337718963623, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7317658349328215, |
|
"grad_norm": 69.1842946105189, |
|
"learning_rate": 1.0181985798221343e-07, |
|
"logits/chosen": -1.324233055114746, |
|
"logits/rejected": -1.3126153945922852, |
|
"logps/chosen": -449.85931396484375, |
|
"logps/rejected": -683.6974487304688, |
|
"loss": 0.4176, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.064021110534668, |
|
"rewards/margins": 2.2973549365997314, |
|
"rewards/rejected": -4.3613762855529785, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7341650671785028, |
|
"grad_norm": 79.20520032825635, |
|
"learning_rate": 1.0013868158995329e-07, |
|
"logits/chosen": -1.3214269876480103, |
|
"logits/rejected": -1.3733516931533813, |
|
"logps/chosen": -499.84619140625, |
|
"logps/rejected": -665.3927001953125, |
|
"loss": 0.4477, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.2968623638153076, |
|
"rewards/margins": 1.8752143383026123, |
|
"rewards/rejected": -4.172077178955078, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.7365642994241842, |
|
"grad_norm": 60.91192909422294, |
|
"learning_rate": 9.84680173737887e-08, |
|
"logits/chosen": -1.4117096662521362, |
|
"logits/rejected": -1.4878652095794678, |
|
"logps/chosen": -454.979736328125, |
|
"logps/rejected": -587.895263671875, |
|
"loss": 0.4438, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8998470306396484, |
|
"rewards/margins": 1.7405235767364502, |
|
"rewards/rejected": -3.6403706073760986, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.7389635316698656, |
|
"grad_norm": 56.810179840146574, |
|
"learning_rate": 9.680798252417713e-08, |
|
"logits/chosen": -1.490818738937378, |
|
"logits/rejected": -1.5360429286956787, |
|
"logps/chosen": -389.6299743652344, |
|
"logps/rejected": -561.4585571289062, |
|
"loss": 0.4186, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7963836193084717, |
|
"rewards/margins": 1.37783944606781, |
|
"rewards/rejected": -3.1742234230041504, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.741362763915547, |
|
"grad_norm": 64.39208946081128, |
|
"learning_rate": 9.515869348596808e-08, |
|
"logits/chosen": -1.2397037744522095, |
|
"logits/rejected": -1.3714118003845215, |
|
"logps/chosen": -469.8956604003906, |
|
"logps/rejected": -645.2073364257812, |
|
"loss": 0.4252, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.875208854675293, |
|
"rewards/margins": 1.950748085975647, |
|
"rewards/rejected": -3.8259568214416504, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.7437619961612284, |
|
"grad_norm": 69.08537862579493, |
|
"learning_rate": 9.352026595023493e-08, |
|
"logits/chosen": -1.2812139987945557, |
|
"logits/rejected": -1.3342682123184204, |
|
"logps/chosen": -455.315673828125, |
|
"logps/rejected": -566.673095703125, |
|
"loss": 0.4451, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.7951748371124268, |
|
"rewards/margins": 1.3370133638381958, |
|
"rewards/rejected": -3.132188320159912, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7461612284069098, |
|
"grad_norm": 65.26028448347373, |
|
"learning_rate": 9.189281484616004e-08, |
|
"logits/chosen": -1.281347393989563, |
|
"logits/rejected": -1.2689536809921265, |
|
"logps/chosen": -385.11810302734375, |
|
"logps/rejected": -632.1613159179688, |
|
"loss": 0.4691, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8461825847625732, |
|
"rewards/margins": 2.0561070442199707, |
|
"rewards/rejected": -3.902289867401123, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.7485604606525912, |
|
"grad_norm": 80.61871004422235, |
|
"learning_rate": 9.027645433297249e-08, |
|
"logits/chosen": -1.1624202728271484, |
|
"logits/rejected": -1.2329314947128296, |
|
"logps/chosen": -566.9415893554688, |
|
"logps/rejected": -743.143798828125, |
|
"loss": 0.5046, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.6038241386413574, |
|
"rewards/margins": 2.1354241371154785, |
|
"rewards/rejected": -4.739248275756836, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.7509596928982726, |
|
"grad_norm": 92.80348247180913, |
|
"learning_rate": 8.867129779194066e-08, |
|
"logits/chosen": -1.304614782333374, |
|
"logits/rejected": -1.4066526889801025, |
|
"logps/chosen": -354.045654296875, |
|
"logps/rejected": -597.7302856445312, |
|
"loss": 0.4367, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.410996437072754, |
|
"rewards/margins": 2.426766872406006, |
|
"rewards/rejected": -3.837763547897339, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.753358925143954, |
|
"grad_norm": 61.53877658813085, |
|
"learning_rate": 8.707745781841866e-08, |
|
"logits/chosen": -1.1687877178192139, |
|
"logits/rejected": -1.2777760028839111, |
|
"logps/chosen": -418.43511962890625, |
|
"logps/rejected": -630.715576171875, |
|
"loss": 0.4253, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9291305541992188, |
|
"rewards/margins": 2.163900852203369, |
|
"rewards/rejected": -4.093031883239746, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.7557581573896354, |
|
"grad_norm": 35.11204054429555, |
|
"learning_rate": 8.549504621394831e-08, |
|
"logits/chosen": -1.354907751083374, |
|
"logits/rejected": -1.3612796068191528, |
|
"logps/chosen": -402.2507019042969, |
|
"logps/rejected": -620.3946533203125, |
|
"loss": 0.3723, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.7278754711151123, |
|
"rewards/margins": 2.2112643718719482, |
|
"rewards/rejected": -3.9391398429870605, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.7581573896353166, |
|
"grad_norm": 88.00288297264423, |
|
"learning_rate": 8.392417397841703e-08, |
|
"logits/chosen": -1.289905309677124, |
|
"logits/rejected": -1.3830498456954956, |
|
"logps/chosen": -455.31707763671875, |
|
"logps/rejected": -634.426025390625, |
|
"loss": 0.4426, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.930456519126892, |
|
"rewards/margins": 1.6569267511367798, |
|
"rewards/rejected": -3.587383270263672, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.760556621880998, |
|
"grad_norm": 74.17199448695689, |
|
"learning_rate": 8.236495130227083e-08, |
|
"logits/chosen": -1.3127448558807373, |
|
"logits/rejected": -1.4850471019744873, |
|
"logps/chosen": -532.6678466796875, |
|
"logps/rejected": -737.5643310546875, |
|
"loss": 0.4652, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.458889961242676, |
|
"rewards/margins": 2.453096389770508, |
|
"rewards/rejected": -4.911986351013184, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.7629558541266794, |
|
"grad_norm": 67.92460340777723, |
|
"learning_rate": 8.081748755878612e-08, |
|
"logits/chosen": -1.310718297958374, |
|
"logits/rejected": -1.4427484273910522, |
|
"logps/chosen": -485.64007568359375, |
|
"logps/rejected": -605.2127685546875, |
|
"loss": 0.4271, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0952415466308594, |
|
"rewards/margins": 1.715080976486206, |
|
"rewards/rejected": -3.8103225231170654, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.7653550863723608, |
|
"grad_norm": 57.095685251850284, |
|
"learning_rate": 7.928189129639632e-08, |
|
"logits/chosen": -1.2018425464630127, |
|
"logits/rejected": -1.2011052370071411, |
|
"logps/chosen": -412.9466857910156, |
|
"logps/rejected": -631.907958984375, |
|
"loss": 0.393, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.932543158531189, |
|
"rewards/margins": 2.0471224784851074, |
|
"rewards/rejected": -3.9796652793884277, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.7677543186180422, |
|
"grad_norm": 72.05124634833409, |
|
"learning_rate": 7.775827023107834e-08, |
|
"logits/chosen": -1.3011525869369507, |
|
"logits/rejected": -1.3565986156463623, |
|
"logps/chosen": -445.1830139160156, |
|
"logps/rejected": -628.5127563476562, |
|
"loss": 0.4404, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.1575093269348145, |
|
"rewards/margins": 1.6431667804718018, |
|
"rewards/rejected": -3.800675868988037, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7701535508637236, |
|
"grad_norm": 67.55127608074025, |
|
"learning_rate": 7.624673123879682e-08, |
|
"logits/chosen": -1.176064372062683, |
|
"logits/rejected": -1.3284848928451538, |
|
"logps/chosen": -422.73468017578125, |
|
"logps/rejected": -569.9011840820312, |
|
"loss": 0.4598, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8534692525863647, |
|
"rewards/margins": 1.6330394744873047, |
|
"rewards/rejected": -3.486508846282959, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.772552783109405, |
|
"grad_norm": 75.19351516985365, |
|
"learning_rate": 7.474738034800663e-08, |
|
"logits/chosen": -1.327171802520752, |
|
"logits/rejected": -1.2900383472442627, |
|
"logps/chosen": -395.79327392578125, |
|
"logps/rejected": -663.9730834960938, |
|
"loss": 0.4643, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7923393249511719, |
|
"rewards/margins": 2.9014949798583984, |
|
"rewards/rejected": -4.6938347816467285, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.7749520153550864, |
|
"grad_norm": 47.848721211003586, |
|
"learning_rate": 7.326032273221606e-08, |
|
"logits/chosen": -1.4597444534301758, |
|
"logits/rejected": -1.4309790134429932, |
|
"logps/chosen": -524.79541015625, |
|
"logps/rejected": -688.5581665039062, |
|
"loss": 0.439, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.4929757118225098, |
|
"rewards/margins": 1.8584671020507812, |
|
"rewards/rejected": -4.351442337036133, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.7773512476007678, |
|
"grad_norm": 74.01694932477116, |
|
"learning_rate": 7.178566270260872e-08, |
|
"logits/chosen": -1.416358232498169, |
|
"logits/rejected": -1.4566500186920166, |
|
"logps/chosen": -498.6495666503906, |
|
"logps/rejected": -710.8546752929688, |
|
"loss": 0.4484, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.396069049835205, |
|
"rewards/margins": 1.9357521533966064, |
|
"rewards/rejected": -4.331821441650391, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.7797504798464492, |
|
"grad_norm": 60.26009757023119, |
|
"learning_rate": 7.032350370072709e-08, |
|
"logits/chosen": -1.2919445037841797, |
|
"logits/rejected": -1.3735246658325195, |
|
"logps/chosen": -451.0271911621094, |
|
"logps/rejected": -627.9866943359375, |
|
"loss": 0.4287, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8197091817855835, |
|
"rewards/margins": 1.8777036666870117, |
|
"rewards/rejected": -3.6974129676818848, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.7821497120921305, |
|
"grad_norm": 46.06757696293524, |
|
"learning_rate": 6.887394829121596e-08, |
|
"logits/chosen": -1.3799974918365479, |
|
"logits/rejected": -1.4900586605072021, |
|
"logps/chosen": -511.42242431640625, |
|
"logps/rejected": -814.45458984375, |
|
"loss": 0.4112, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.4561266899108887, |
|
"rewards/margins": 3.2779293060302734, |
|
"rewards/rejected": -5.73405647277832, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.7845489443378119, |
|
"grad_norm": 66.08707079226782, |
|
"learning_rate": 6.743709815462833e-08, |
|
"logits/chosen": -1.3536878824234009, |
|
"logits/rejected": -1.424392819404602, |
|
"logps/chosen": -499.06951904296875, |
|
"logps/rejected": -663.4131469726562, |
|
"loss": 0.4438, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.306351900100708, |
|
"rewards/margins": 1.9852672815322876, |
|
"rewards/rejected": -4.291619300842285, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.7869481765834933, |
|
"grad_norm": 56.522101390783696, |
|
"learning_rate": 6.601305408029287e-08, |
|
"logits/chosen": -1.3996652364730835, |
|
"logits/rejected": -1.5223954916000366, |
|
"logps/chosen": -453.1273498535156, |
|
"logps/rejected": -657.0367431640625, |
|
"loss": 0.4095, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.1622931957244873, |
|
"rewards/margins": 2.026867151260376, |
|
"rewards/rejected": -4.189160346984863, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.7893474088291746, |
|
"grad_norm": 58.94344273406338, |
|
"learning_rate": 6.460191595924366e-08, |
|
"logits/chosen": -1.3207144737243652, |
|
"logits/rejected": -1.3845504522323608, |
|
"logps/chosen": -452.1018981933594, |
|
"logps/rejected": -652.0777587890625, |
|
"loss": 0.4001, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.1246771812438965, |
|
"rewards/margins": 1.9852821826934814, |
|
"rewards/rejected": -4.109959125518799, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.791746641074856, |
|
"grad_norm": 73.72070547985138, |
|
"learning_rate": 6.320378277721342e-08, |
|
"logits/chosen": -1.445039987564087, |
|
"logits/rejected": -1.4531352519989014, |
|
"logps/chosen": -474.85272216796875, |
|
"logps/rejected": -595.89013671875, |
|
"loss": 0.4363, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.2441697120666504, |
|
"rewards/margins": 1.3221468925476074, |
|
"rewards/rejected": -3.566316604614258, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7941458733205374, |
|
"grad_norm": 99.15702080965917, |
|
"learning_rate": 6.181875260769032e-08, |
|
"logits/chosen": -1.3354700803756714, |
|
"logits/rejected": -1.5110574960708618, |
|
"logps/chosen": -463.51544189453125, |
|
"logps/rejected": -614.9706420898438, |
|
"loss": 0.4487, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8257434368133545, |
|
"rewards/margins": 2.170253038406372, |
|
"rewards/rejected": -3.9959964752197266, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.7965451055662188, |
|
"grad_norm": 74.68786765448685, |
|
"learning_rate": 6.044692260503797e-08, |
|
"logits/chosen": -1.2728173732757568, |
|
"logits/rejected": -1.4052507877349854, |
|
"logps/chosen": -539.398681640625, |
|
"logps/rejected": -777.3851928710938, |
|
"loss": 0.3819, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.54783034324646, |
|
"rewards/margins": 2.600435972213745, |
|
"rewards/rejected": -5.148266792297363, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.7989443378119002, |
|
"grad_norm": 49.2779890787065, |
|
"learning_rate": 5.9088388997680984e-08, |
|
"logits/chosen": -1.2829517126083374, |
|
"logits/rejected": -1.413627028465271, |
|
"logps/chosen": -531.353271484375, |
|
"logps/rejected": -713.0965576171875, |
|
"loss": 0.4236, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.072298765182495, |
|
"rewards/margins": 2.5900845527648926, |
|
"rewards/rejected": -4.662383079528809, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8013435700575816, |
|
"grad_norm": 59.17229679593995, |
|
"learning_rate": 5.774324708135439e-08, |
|
"logits/chosen": -1.450781226158142, |
|
"logits/rejected": -1.532762885093689, |
|
"logps/chosen": -397.7679443359375, |
|
"logps/rejected": -564.6585693359375, |
|
"loss": 0.4507, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.851607322692871, |
|
"rewards/margins": 1.8835060596466064, |
|
"rewards/rejected": -3.7351138591766357, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.803742802303263, |
|
"grad_norm": 34.8860528557667, |
|
"learning_rate": 5.641159121241953e-08, |
|
"logits/chosen": -1.4477955102920532, |
|
"logits/rejected": -1.3717939853668213, |
|
"logps/chosen": -461.99151611328125, |
|
"logps/rejected": -707.8023681640625, |
|
"loss": 0.4269, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.312293529510498, |
|
"rewards/margins": 2.164968729019165, |
|
"rewards/rejected": -4.477262020111084, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8061420345489443, |
|
"grad_norm": 59.873519110220755, |
|
"learning_rate": 5.5093514801245106e-08, |
|
"logits/chosen": -1.3825056552886963, |
|
"logits/rejected": -1.3870340585708618, |
|
"logps/chosen": -470.130859375, |
|
"logps/rejected": -666.9625244140625, |
|
"loss": 0.4412, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.260951042175293, |
|
"rewards/margins": 1.7683954238891602, |
|
"rewards/rejected": -4.029345989227295, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8085412667946257, |
|
"grad_norm": 49.58444606035955, |
|
"learning_rate": 5.378911030565453e-08, |
|
"logits/chosen": -1.3055133819580078, |
|
"logits/rejected": -1.3414740562438965, |
|
"logps/chosen": -552.4376220703125, |
|
"logps/rejected": -754.7117309570312, |
|
"loss": 0.4357, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.7371158599853516, |
|
"rewards/margins": 1.7635667324066162, |
|
"rewards/rejected": -4.500682830810547, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8109404990403071, |
|
"grad_norm": 68.89475959244419, |
|
"learning_rate": 5.249846922444101e-08, |
|
"logits/chosen": -1.4738832712173462, |
|
"logits/rejected": -1.525614619255066, |
|
"logps/chosen": -470.60821533203125, |
|
"logps/rejected": -818.2075805664062, |
|
"loss": 0.4114, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.500138282775879, |
|
"rewards/margins": 3.7085862159729004, |
|
"rewards/rejected": -6.2087249755859375, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8133397312859885, |
|
"grad_norm": 79.56262016402897, |
|
"learning_rate": 5.122168209094865e-08, |
|
"logits/chosen": -1.385946273803711, |
|
"logits/rejected": -1.4624695777893066, |
|
"logps/chosen": -425.1856384277344, |
|
"logps/rejected": -532.6458740234375, |
|
"loss": 0.4265, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.1394572257995605, |
|
"rewards/margins": 1.0579640865325928, |
|
"rewards/rejected": -3.197421073913574, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8157389635316699, |
|
"grad_norm": 52.696107408105114, |
|
"learning_rate": 4.995883846672222e-08, |
|
"logits/chosen": -1.2016537189483643, |
|
"logits/rejected": -1.3861643075942993, |
|
"logps/chosen": -596.3150024414062, |
|
"logps/rejected": -683.9937744140625, |
|
"loss": 0.4343, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.35935378074646, |
|
"rewards/margins": 1.6371619701385498, |
|
"rewards/rejected": -3.9965157508850098, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8181381957773513, |
|
"grad_norm": 49.46829805686536, |
|
"learning_rate": 4.871002693522486e-08, |
|
"logits/chosen": -1.346164345741272, |
|
"logits/rejected": -1.3557155132293701, |
|
"logps/chosen": -499.1998596191406, |
|
"logps/rejected": -619.0379638671875, |
|
"loss": 0.4461, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2972307205200195, |
|
"rewards/margins": 1.5589020252227783, |
|
"rewards/rejected": -3.856133222579956, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8205374280230326, |
|
"grad_norm": 49.216158194263706, |
|
"learning_rate": 4.7475335095623956e-08, |
|
"logits/chosen": -1.4238225221633911, |
|
"logits/rejected": -1.3791711330413818, |
|
"logps/chosen": -494.3877868652344, |
|
"logps/rejected": -666.6940307617188, |
|
"loss": 0.4389, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.409292221069336, |
|
"rewards/margins": 1.8552162647247314, |
|
"rewards/rejected": -4.264508247375488, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.822936660268714, |
|
"grad_norm": 99.88268703047527, |
|
"learning_rate": 4.6254849556646714e-08, |
|
"logits/chosen": -1.2449895143508911, |
|
"logits/rejected": -1.3020665645599365, |
|
"logps/chosen": -563.0958862304688, |
|
"logps/rejected": -768.5317993164062, |
|
"loss": 0.4356, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.662400007247925, |
|
"rewards/margins": 2.4422526359558105, |
|
"rewards/rejected": -5.104652404785156, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.8253358925143954, |
|
"grad_norm": 62.76497827080244, |
|
"learning_rate": 4.504865593050483e-08, |
|
"logits/chosen": -1.3190138339996338, |
|
"logits/rejected": -1.3208320140838623, |
|
"logps/chosen": -483.7323303222656, |
|
"logps/rejected": -660.0289306640625, |
|
"loss": 0.4451, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.17266583442688, |
|
"rewards/margins": 1.7615305185317993, |
|
"rewards/rejected": -3.9341964721679688, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.8277351247600768, |
|
"grad_norm": 59.22990924876174, |
|
"learning_rate": 4.385683882688895e-08, |
|
"logits/chosen": -1.1815550327301025, |
|
"logits/rejected": -1.2879037857055664, |
|
"logps/chosen": -483.38427734375, |
|
"logps/rejected": -521.2240600585938, |
|
"loss": 0.4899, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.1154184341430664, |
|
"rewards/margins": 0.7992331981658936, |
|
"rewards/rejected": -2.91465163230896, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.8301343570057581, |
|
"grad_norm": 82.86527709001345, |
|
"learning_rate": 4.2679481847033985e-08, |
|
"logits/chosen": -1.3511393070220947, |
|
"logits/rejected": -1.4024460315704346, |
|
"logps/chosen": -451.1329040527344, |
|
"logps/rejected": -676.7293701171875, |
|
"loss": 0.476, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8962301015853882, |
|
"rewards/margins": 2.275341749191284, |
|
"rewards/rejected": -4.171571731567383, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.8325335892514395, |
|
"grad_norm": 52.66863159129303, |
|
"learning_rate": 4.151666757785435e-08, |
|
"logits/chosen": -1.2461029291152954, |
|
"logits/rejected": -1.2992231845855713, |
|
"logps/chosen": -400.0786437988281, |
|
"logps/rejected": -668.3745727539062, |
|
"loss": 0.3924, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.6190965175628662, |
|
"rewards/margins": 2.7610526084899902, |
|
"rewards/rejected": -4.380148887634277, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.8349328214971209, |
|
"grad_norm": 56.576155344810864, |
|
"learning_rate": 4.036847758615136e-08, |
|
"logits/chosen": -1.1943762302398682, |
|
"logits/rejected": -1.332884430885315, |
|
"logps/chosen": -517.8956298828125, |
|
"logps/rejected": -671.5089111328125, |
|
"loss": 0.4592, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.780350685119629, |
|
"rewards/margins": 1.4910480976104736, |
|
"rewards/rejected": -4.271399021148682, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.8373320537428023, |
|
"grad_norm": 46.825486174943826, |
|
"learning_rate": 3.923499241289113e-08, |
|
"logits/chosen": -1.2725188732147217, |
|
"logits/rejected": -1.41265070438385, |
|
"logps/chosen": -526.8565063476562, |
|
"logps/rejected": -632.02880859375, |
|
"loss": 0.4499, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.301103115081787, |
|
"rewards/margins": 1.6291067600250244, |
|
"rewards/rejected": -3.9302101135253906, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.8397312859884837, |
|
"grad_norm": 53.125011909511024, |
|
"learning_rate": 3.811629156755541e-08, |
|
"logits/chosen": -1.28065824508667, |
|
"logits/rejected": -1.2841136455535889, |
|
"logps/chosen": -484.08251953125, |
|
"logps/rejected": -641.4912719726562, |
|
"loss": 0.4517, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.014495372772217, |
|
"rewards/margins": 1.684488296508789, |
|
"rewards/rejected": -3.698983669281006, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8421305182341651, |
|
"grad_norm": 66.99344284990306, |
|
"learning_rate": 3.701245352256391e-08, |
|
"logits/chosen": -1.3107590675354004, |
|
"logits/rejected": -1.4500467777252197, |
|
"logps/chosen": -464.06268310546875, |
|
"logps/rejected": -548.866455078125, |
|
"loss": 0.4163, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.7597360610961914, |
|
"rewards/margins": 1.0881340503692627, |
|
"rewards/rejected": -2.847869873046875, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.8445297504798465, |
|
"grad_norm": 71.85929092000093, |
|
"learning_rate": 3.592355570776984e-08, |
|
"logits/chosen": -1.2988982200622559, |
|
"logits/rejected": -1.3713183403015137, |
|
"logps/chosen": -366.3793029785156, |
|
"logps/rejected": -544.4915771484375, |
|
"loss": 0.4361, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.464356541633606, |
|
"rewards/margins": 1.7605937719345093, |
|
"rewards/rejected": -3.2249503135681152, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.8469289827255279, |
|
"grad_norm": 44.064848069845205, |
|
"learning_rate": 3.484967450502904e-08, |
|
"logits/chosen": -1.257299542427063, |
|
"logits/rejected": -1.3451262712478638, |
|
"logps/chosen": -373.0574645996094, |
|
"logps/rejected": -586.8876953125, |
|
"loss": 0.4248, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.7843666076660156, |
|
"rewards/margins": 1.750450849533081, |
|
"rewards/rejected": -3.534817934036255, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.8493282149712092, |
|
"grad_norm": 84.80584097617765, |
|
"learning_rate": 3.3790885242841296e-08, |
|
"logits/chosen": -1.2018029689788818, |
|
"logits/rejected": -1.2710869312286377, |
|
"logps/chosen": -486.6034240722656, |
|
"logps/rejected": -715.1466064453125, |
|
"loss": 0.4212, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.3987205028533936, |
|
"rewards/margins": 2.365349292755127, |
|
"rewards/rejected": -4.764069080352783, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.8517274472168906, |
|
"grad_norm": 61.61146272483576, |
|
"learning_rate": 3.274726219106677e-08, |
|
"logits/chosen": -1.1959749460220337, |
|
"logits/rejected": -1.265887975692749, |
|
"logps/chosen": -500.03045654296875, |
|
"logps/rejected": -700.082763671875, |
|
"loss": 0.4673, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.255139112472534, |
|
"rewards/margins": 2.0479519367218018, |
|
"rewards/rejected": -4.303091526031494, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.8541266794625719, |
|
"grad_norm": 48.622523103355604, |
|
"learning_rate": 3.171887855571642e-08, |
|
"logits/chosen": -1.3621529340744019, |
|
"logits/rejected": -1.3234202861785889, |
|
"logps/chosen": -397.14666748046875, |
|
"logps/rejected": -537.7269287109375, |
|
"loss": 0.3959, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7443649768829346, |
|
"rewards/margins": 1.4419325590133667, |
|
"rewards/rejected": -3.186297655105591, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.8565259117082533, |
|
"grad_norm": 79.36925636948864, |
|
"learning_rate": 3.070580647381643e-08, |
|
"logits/chosen": -1.2797110080718994, |
|
"logits/rejected": -1.3679635524749756, |
|
"logps/chosen": -430.72607421875, |
|
"logps/rejected": -691.2688598632812, |
|
"loss": 0.4679, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.9860126972198486, |
|
"rewards/margins": 2.591972827911377, |
|
"rewards/rejected": -4.577984809875488, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.8589251439539347, |
|
"grad_norm": 49.95738385204341, |
|
"learning_rate": 2.9708117008348576e-08, |
|
"logits/chosen": -1.3578163385391235, |
|
"logits/rejected": -1.4846515655517578, |
|
"logps/chosen": -500.74542236328125, |
|
"logps/rejected": -625.261474609375, |
|
"loss": 0.4209, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.9553468227386475, |
|
"rewards/margins": 1.7270301580429077, |
|
"rewards/rejected": -3.6823768615722656, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.8613243761996161, |
|
"grad_norm": 53.31282017405371, |
|
"learning_rate": 2.8725880143264992e-08, |
|
"logits/chosen": -1.3193708658218384, |
|
"logits/rejected": -1.3403939008712769, |
|
"logps/chosen": -466.86053466796875, |
|
"logps/rejected": -651.1651611328125, |
|
"loss": 0.4863, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.282682418823242, |
|
"rewards/margins": 1.4440171718597412, |
|
"rewards/rejected": -3.7266998291015625, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.8637236084452975, |
|
"grad_norm": 79.7283378065534, |
|
"learning_rate": 2.775916477857948e-08, |
|
"logits/chosen": -1.2309401035308838, |
|
"logits/rejected": -1.263295292854309, |
|
"logps/chosen": -408.5320739746094, |
|
"logps/rejected": -557.888916015625, |
|
"loss": 0.3998, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.013779640197754, |
|
"rewards/margins": 1.4895678758621216, |
|
"rewards/rejected": -3.503347396850586, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.8661228406909789, |
|
"grad_norm": 70.03228325462196, |
|
"learning_rate": 2.680803872553408e-08, |
|
"logits/chosen": -1.3484076261520386, |
|
"logits/rejected": -1.4340890645980835, |
|
"logps/chosen": -430.97967529296875, |
|
"logps/rejected": -752.5364379882812, |
|
"loss": 0.4204, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.7587569952011108, |
|
"rewards/margins": 3.472735643386841, |
|
"rewards/rejected": -5.23149299621582, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.8685220729366603, |
|
"grad_norm": 92.81223070149015, |
|
"learning_rate": 2.5872568701842706e-08, |
|
"logits/chosen": -1.3619986772537231, |
|
"logits/rejected": -1.4464303255081177, |
|
"logps/chosen": -365.88458251953125, |
|
"logps/rejected": -585.7950439453125, |
|
"loss": 0.472, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.5396031141281128, |
|
"rewards/margins": 1.9823001623153687, |
|
"rewards/rejected": -3.5219035148620605, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.8709213051823417, |
|
"grad_norm": 75.34177531865268, |
|
"learning_rate": 2.495282032701096e-08, |
|
"logits/chosen": -1.248120665550232, |
|
"logits/rejected": -1.437888264656067, |
|
"logps/chosen": -346.2172546386719, |
|
"logps/rejected": -514.9561767578125, |
|
"loss": 0.4199, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4829394817352295, |
|
"rewards/margins": 1.9923450946807861, |
|
"rewards/rejected": -3.4752845764160156, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.8733205374280231, |
|
"grad_norm": 50.2253211425811, |
|
"learning_rate": 2.4048858117733133e-08, |
|
"logits/chosen": -1.3424656391143799, |
|
"logits/rejected": -1.4596822261810303, |
|
"logps/chosen": -480.2220153808594, |
|
"logps/rejected": -694.8543701171875, |
|
"loss": 0.3756, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.1350576877593994, |
|
"rewards/margins": 2.600602865219116, |
|
"rewards/rejected": -4.735660552978516, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.8757197696737045, |
|
"grad_norm": 62.6157678185224, |
|
"learning_rate": 2.3160745483366938e-08, |
|
"logits/chosen": -1.315459966659546, |
|
"logits/rejected": -1.327807903289795, |
|
"logps/chosen": -441.5907287597656, |
|
"logps/rejected": -655.9127807617188, |
|
"loss": 0.4272, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.082794666290283, |
|
"rewards/margins": 1.818223237991333, |
|
"rewards/rejected": -3.901017665863037, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.8781190019193857, |
|
"grad_norm": 77.24793221419046, |
|
"learning_rate": 2.2288544721485197e-08, |
|
"logits/chosen": -1.2426245212554932, |
|
"logits/rejected": -1.2283251285552979, |
|
"logps/chosen": -385.5768127441406, |
|
"logps/rejected": -649.7178344726562, |
|
"loss": 0.4319, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.7174314260482788, |
|
"rewards/margins": 2.4441914558410645, |
|
"rewards/rejected": -4.161623001098633, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.8805182341650671, |
|
"grad_norm": 69.06274330727302, |
|
"learning_rate": 2.1432317013506117e-08, |
|
"logits/chosen": -1.4047738313674927, |
|
"logits/rejected": -1.5020328760147095, |
|
"logps/chosen": -470.654052734375, |
|
"logps/rejected": -579.29345703125, |
|
"loss": 0.4543, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.1058661937713623, |
|
"rewards/margins": 1.4943852424621582, |
|
"rewards/rejected": -3.6002509593963623, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.8829174664107485, |
|
"grad_norm": 93.57949742759158, |
|
"learning_rate": 2.0592122420401704e-08, |
|
"logits/chosen": -1.205072283744812, |
|
"logits/rejected": -1.3298732042312622, |
|
"logps/chosen": -474.8382873535156, |
|
"logps/rejected": -616.9782104492188, |
|
"loss": 0.4672, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.427994966506958, |
|
"rewards/margins": 1.4084312915802002, |
|
"rewards/rejected": -3.83642578125, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.8853166986564299, |
|
"grad_norm": 64.61385860680265, |
|
"learning_rate": 1.976801987848459e-08, |
|
"logits/chosen": -1.3770363330841064, |
|
"logits/rejected": -1.410556435585022, |
|
"logps/chosen": -488.1275939941406, |
|
"logps/rejected": -752.9757690429688, |
|
"loss": 0.4352, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2426881790161133, |
|
"rewards/margins": 2.5078392028808594, |
|
"rewards/rejected": -4.750527381896973, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.8877159309021113, |
|
"grad_norm": 71.4887011288993, |
|
"learning_rate": 1.8960067195273987e-08, |
|
"logits/chosen": -1.3647260665893555, |
|
"logits/rejected": -1.4772412776947021, |
|
"logps/chosen": -388.358154296875, |
|
"logps/rejected": -628.3886108398438, |
|
"loss": 0.4365, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.7657787799835205, |
|
"rewards/margins": 2.391746997833252, |
|
"rewards/rejected": -4.157525539398193, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8901151631477927, |
|
"grad_norm": 63.6446466769933, |
|
"learning_rate": 1.816832104544072e-08, |
|
"logits/chosen": -1.2321730852127075, |
|
"logits/rejected": -1.274123191833496, |
|
"logps/chosen": -496.39849853515625, |
|
"logps/rejected": -621.4663696289062, |
|
"loss": 0.3994, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.2261962890625, |
|
"rewards/margins": 1.5047309398651123, |
|
"rewards/rejected": -3.7309272289276123, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.8925143953934741, |
|
"grad_norm": 53.35919816815898, |
|
"learning_rate": 1.7392836966831553e-08, |
|
"logits/chosen": -1.2064793109893799, |
|
"logits/rejected": -1.3003342151641846, |
|
"logps/chosen": -481.5274963378906, |
|
"logps/rejected": -657.7899780273438, |
|
"loss": 0.4145, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.1689791679382324, |
|
"rewards/margins": 2.0934038162231445, |
|
"rewards/rejected": -4.262383460998535, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.8949136276391555, |
|
"grad_norm": 77.1830594419351, |
|
"learning_rate": 1.663366935657373e-08, |
|
"logits/chosen": -1.3655575513839722, |
|
"logits/rejected": -1.5064128637313843, |
|
"logps/chosen": -393.8701171875, |
|
"logps/rejected": -586.9609375, |
|
"loss": 0.4555, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6656783819198608, |
|
"rewards/margins": 1.8161319494247437, |
|
"rewards/rejected": -3.4818103313446045, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.8973128598848369, |
|
"grad_norm": 80.81720382253343, |
|
"learning_rate": 1.5890871467258898e-08, |
|
"logits/chosen": -1.1568529605865479, |
|
"logits/rejected": -1.2427462339401245, |
|
"logps/chosen": -553.2216796875, |
|
"logps/rejected": -702.3026123046875, |
|
"loss": 0.4235, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.3667430877685547, |
|
"rewards/margins": 1.8206110000610352, |
|
"rewards/rejected": -4.18735408782959, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.8997120921305183, |
|
"grad_norm": 71.27801287402498, |
|
"learning_rate": 1.5164495403207967e-08, |
|
"logits/chosen": -1.313631534576416, |
|
"logits/rejected": -1.3183560371398926, |
|
"logps/chosen": -476.8546447753906, |
|
"logps/rejected": -734.6409912109375, |
|
"loss": 0.4195, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.2888331413269043, |
|
"rewards/margins": 2.343087673187256, |
|
"rewards/rejected": -4.631920337677002, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.9021113243761996, |
|
"grad_norm": 60.1603274481907, |
|
"learning_rate": 1.4454592116815962e-08, |
|
"logits/chosen": -1.2539043426513672, |
|
"logits/rejected": -1.2726166248321533, |
|
"logps/chosen": -430.16302490234375, |
|
"logps/rejected": -626.4070434570312, |
|
"loss": 0.369, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.85342538356781, |
|
"rewards/margins": 1.881243109703064, |
|
"rewards/rejected": -3.734668254852295, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.904510556621881, |
|
"grad_norm": 56.052739726722386, |
|
"learning_rate": 1.3761211404977934e-08, |
|
"logits/chosen": -1.3558170795440674, |
|
"logits/rejected": -1.3669774532318115, |
|
"logps/chosen": -496.6163635253906, |
|
"logps/rejected": -738.0737915039062, |
|
"loss": 0.3665, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.6606838703155518, |
|
"rewards/margins": 2.5214426517486572, |
|
"rewards/rejected": -5.182126045227051, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9069097888675623, |
|
"grad_norm": 77.97728054957587, |
|
"learning_rate": 1.3084401905596177e-08, |
|
"logits/chosen": -1.210475206375122, |
|
"logits/rejected": -1.3625085353851318, |
|
"logps/chosen": -500.43927001953125, |
|
"logps/rejected": -618.0930786132812, |
|
"loss": 0.445, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.0452969074249268, |
|
"rewards/margins": 1.7742798328399658, |
|
"rewards/rejected": -3.8195769786834717, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9093090211132437, |
|
"grad_norm": 89.65951610082931, |
|
"learning_rate": 1.2424211094168053e-08, |
|
"logits/chosen": -1.2439241409301758, |
|
"logits/rejected": -1.3879896402359009, |
|
"logps/chosen": -515.781005859375, |
|
"logps/rejected": -729.7136840820312, |
|
"loss": 0.4271, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.0545547008514404, |
|
"rewards/margins": 2.210728168487549, |
|
"rewards/rejected": -4.265283107757568, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9117082533589251, |
|
"grad_norm": 122.7685693253469, |
|
"learning_rate": 1.1780685280456143e-08, |
|
"logits/chosen": -1.3709090948104858, |
|
"logits/rejected": -1.4039212465286255, |
|
"logps/chosen": -562.2694091796875, |
|
"logps/rejected": -850.7741088867188, |
|
"loss": 0.4475, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.7632789611816406, |
|
"rewards/margins": 2.866482734680176, |
|
"rewards/rejected": -5.629761695861816, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9141074856046065, |
|
"grad_norm": 63.82642729554356, |
|
"learning_rate": 1.1153869605239564e-08, |
|
"logits/chosen": -1.3386048078536987, |
|
"logits/rejected": -1.4672105312347412, |
|
"logps/chosen": -443.0409240722656, |
|
"logps/rejected": -555.0245971679688, |
|
"loss": 0.4234, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.8594999313354492, |
|
"rewards/margins": 1.5090968608856201, |
|
"rewards/rejected": -3.3685965538024902, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9165067178502879, |
|
"grad_norm": 55.265923914478826, |
|
"learning_rate": 1.0543808037147606e-08, |
|
"logits/chosen": -1.3850547075271606, |
|
"logits/rejected": -1.3882572650909424, |
|
"logps/chosen": -501.9463806152344, |
|
"logps/rejected": -765.228515625, |
|
"loss": 0.4109, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.4593310356140137, |
|
"rewards/margins": 2.621159315109253, |
|
"rewards/rejected": -5.0804901123046875, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.9189059500959693, |
|
"grad_norm": 41.44845350613424, |
|
"learning_rate": 9.95054336957557e-09, |
|
"logits/chosen": -1.3671766519546509, |
|
"logits/rejected": -1.353990912437439, |
|
"logps/chosen": -438.7281188964844, |
|
"logps/rejected": -604.5745239257812, |
|
"loss": 0.3935, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.9596534967422485, |
|
"rewards/margins": 1.6154146194458008, |
|
"rewards/rejected": -3.575068235397339, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.9213051823416507, |
|
"grad_norm": 83.64590537901655, |
|
"learning_rate": 9.37411721768286e-09, |
|
"logits/chosen": -1.4458494186401367, |
|
"logits/rejected": -1.469053030014038, |
|
"logps/chosen": -509.1304626464844, |
|
"logps/rejected": -748.7863159179688, |
|
"loss": 0.421, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.341409921646118, |
|
"rewards/margins": 2.1367950439453125, |
|
"rewards/rejected": -4.47820520401001, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.9237044145873321, |
|
"grad_norm": 57.94036452379268, |
|
"learning_rate": 8.81457001547392e-09, |
|
"logits/chosen": -1.2755274772644043, |
|
"logits/rejected": -1.2877039909362793, |
|
"logps/chosen": -447.0801696777344, |
|
"logps/rejected": -612.2420043945312, |
|
"loss": 0.4167, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.0958006381988525, |
|
"rewards/margins": 1.5613038539886475, |
|
"rewards/rejected": -3.6571044921875, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.9261036468330134, |
|
"grad_norm": 59.794827391985145, |
|
"learning_rate": 8.271941012961942e-09, |
|
"logits/chosen": -1.30680251121521, |
|
"logits/rejected": -1.2633702754974365, |
|
"logps/chosen": -441.3211364746094, |
|
"logps/rejected": -780.7581787109375, |
|
"loss": 0.3915, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.2775466442108154, |
|
"rewards/margins": 2.8263659477233887, |
|
"rewards/rejected": -5.103912353515625, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.9285028790786948, |
|
"grad_norm": 90.04034076724625, |
|
"learning_rate": 7.746268273415568e-09, |
|
"logits/chosen": -1.44291090965271, |
|
"logits/rejected": -1.3620684146881104, |
|
"logps/chosen": -457.104248046875, |
|
"logps/rejected": -608.0975341796875, |
|
"loss": 0.426, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.997802495956421, |
|
"rewards/margins": 1.0049917697906494, |
|
"rewards/rejected": -3.002794027328491, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.9309021113243762, |
|
"grad_norm": 56.08532487513005, |
|
"learning_rate": 7.237588670689076e-09, |
|
"logits/chosen": -1.2944238185882568, |
|
"logits/rejected": -1.4328762292861938, |
|
"logps/chosen": -487.39019775390625, |
|
"logps/rejected": -697.9758911132812, |
|
"loss": 0.4261, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.2080299854278564, |
|
"rewards/margins": 2.519274950027466, |
|
"rewards/rejected": -4.727304458618164, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.9333013435700576, |
|
"grad_norm": 89.11995129718771, |
|
"learning_rate": 6.745937886635606e-09, |
|
"logits/chosen": -1.389968752861023, |
|
"logits/rejected": -1.4157402515411377, |
|
"logps/chosen": -507.00897216796875, |
|
"logps/rejected": -853.0285034179688, |
|
"loss": 0.4189, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.2686798572540283, |
|
"rewards/margins": 3.43501615524292, |
|
"rewards/rejected": -5.703696250915527, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.935700575815739, |
|
"grad_norm": 91.52948017854044, |
|
"learning_rate": 6.271350408604409e-09, |
|
"logits/chosen": -1.4022436141967773, |
|
"logits/rejected": -1.406585454940796, |
|
"logps/chosen": -399.2693786621094, |
|
"logps/rejected": -612.744873046875, |
|
"loss": 0.4141, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.8768850564956665, |
|
"rewards/margins": 1.962498664855957, |
|
"rewards/rejected": -3.839383602142334, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9380998080614203, |
|
"grad_norm": 52.039893504735, |
|
"learning_rate": 5.813859527021487e-09, |
|
"logits/chosen": -1.3069987297058105, |
|
"logits/rejected": -1.3816003799438477, |
|
"logps/chosen": -500.3255920410156, |
|
"logps/rejected": -697.2518310546875, |
|
"loss": 0.3715, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.350861072540283, |
|
"rewards/margins": 2.23738956451416, |
|
"rewards/rejected": -4.588251113891602, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.9404990403071017, |
|
"grad_norm": 49.34387167178098, |
|
"learning_rate": 5.373497333054616e-09, |
|
"logits/chosen": -1.3901867866516113, |
|
"logits/rejected": -1.4163486957550049, |
|
"logps/chosen": -488.06903076171875, |
|
"logps/rejected": -603.2286987304688, |
|
"loss": 0.4687, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.2455077171325684, |
|
"rewards/margins": 1.2376153469085693, |
|
"rewards/rejected": -3.4831230640411377, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.9428982725527831, |
|
"grad_norm": 59.69376404826322, |
|
"learning_rate": 4.950294716362213e-09, |
|
"logits/chosen": -1.3077560663223267, |
|
"logits/rejected": -1.4218385219573975, |
|
"logps/chosen": -501.00457763671875, |
|
"logps/rejected": -623.9380493164062, |
|
"loss": 0.4254, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.2068111896514893, |
|
"rewards/margins": 1.2557148933410645, |
|
"rewards/rejected": -3.4625256061553955, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.9452975047984645, |
|
"grad_norm": 63.61485606948048, |
|
"learning_rate": 4.544281362926422e-09, |
|
"logits/chosen": -1.2970178127288818, |
|
"logits/rejected": -1.3176815509796143, |
|
"logps/chosen": -494.03515625, |
|
"logps/rejected": -667.9765625, |
|
"loss": 0.4567, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.9948374032974243, |
|
"rewards/margins": 1.8618943691253662, |
|
"rewards/rejected": -3.85673189163208, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.9476967370441459, |
|
"grad_norm": 40.97335214555011, |
|
"learning_rate": 4.15548575297095e-09, |
|
"logits/chosen": -1.234287142753601, |
|
"logits/rejected": -1.3237779140472412, |
|
"logps/chosen": -482.68072509765625, |
|
"logps/rejected": -734.9530639648438, |
|
"loss": 0.3526, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -2.3361096382141113, |
|
"rewards/margins": 2.631274938583374, |
|
"rewards/rejected": -4.967384338378906, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.9500959692898272, |
|
"grad_norm": 43.454652393867875, |
|
"learning_rate": 3.7839351589631366e-09, |
|
"logits/chosen": -1.3561676740646362, |
|
"logits/rejected": -1.1930186748504639, |
|
"logps/chosen": -454.8728942871094, |
|
"logps/rejected": -687.5897827148438, |
|
"loss": 0.4226, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.5094499588012695, |
|
"rewards/margins": 1.905800461769104, |
|
"rewards/rejected": -4.415250778198242, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.9524952015355086, |
|
"grad_norm": 81.2377732744721, |
|
"learning_rate": 3.4296556437010405e-09, |
|
"logits/chosen": -1.3651882410049438, |
|
"logits/rejected": -1.383840799331665, |
|
"logps/chosen": -446.6869201660156, |
|
"logps/rejected": -608.2105712890625, |
|
"loss": 0.4052, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.4141621589660645, |
|
"rewards/margins": 1.6848506927490234, |
|
"rewards/rejected": -4.099012851715088, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.95489443378119, |
|
"grad_norm": 54.227114337789175, |
|
"learning_rate": 3.092672058485124e-09, |
|
"logits/chosen": -1.5264198780059814, |
|
"logits/rejected": -1.4988195896148682, |
|
"logps/chosen": -509.2149353027344, |
|
"logps/rejected": -768.14404296875, |
|
"loss": 0.426, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.823775053024292, |
|
"rewards/margins": 2.4652981758117676, |
|
"rewards/rejected": -5.289073944091797, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.9572936660268714, |
|
"grad_norm": 78.81638905697392, |
|
"learning_rate": 2.7730080413750356e-09, |
|
"logits/chosen": -1.2725681066513062, |
|
"logits/rejected": -1.3801984786987305, |
|
"logps/chosen": -467.46343994140625, |
|
"logps/rejected": -614.0919189453125, |
|
"loss": 0.4265, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0063366889953613, |
|
"rewards/margins": 1.4813331365585327, |
|
"rewards/rejected": -3.4876697063446045, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.9596928982725528, |
|
"grad_norm": 59.40509449378525, |
|
"learning_rate": 2.4706860155316033e-09, |
|
"logits/chosen": -1.28085196018219, |
|
"logits/rejected": -1.3779548406600952, |
|
"logps/chosen": -599.7765502929688, |
|
"logps/rejected": -758.8671875, |
|
"loss": 0.4413, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.598813533782959, |
|
"rewards/margins": 1.6893723011016846, |
|
"rewards/rejected": -4.288186073303223, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9620921305182342, |
|
"grad_norm": 73.30482756852622, |
|
"learning_rate": 2.185727187643843e-09, |
|
"logits/chosen": -1.2994678020477295, |
|
"logits/rejected": -1.3173738718032837, |
|
"logps/chosen": -446.7986755371094, |
|
"logps/rejected": -717.6575317382812, |
|
"loss": 0.4822, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.304640531539917, |
|
"rewards/margins": 2.731722593307495, |
|
"rewards/rejected": -5.036363124847412, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.9644913627639156, |
|
"grad_norm": 93.67770845413142, |
|
"learning_rate": 1.9181515464413434e-09, |
|
"logits/chosen": -1.1958959102630615, |
|
"logits/rejected": -1.2263587713241577, |
|
"logps/chosen": -557.4002685546875, |
|
"logps/rejected": -799.4276733398438, |
|
"loss": 0.4172, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -2.232234477996826, |
|
"rewards/margins": 2.496126890182495, |
|
"rewards/rejected": -4.7283616065979, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.966890595009597, |
|
"grad_norm": 53.26258013949856, |
|
"learning_rate": 1.6679778612923302e-09, |
|
"logits/chosen": -1.3231043815612793, |
|
"logits/rejected": -1.475568413734436, |
|
"logps/chosen": -514.719482421875, |
|
"logps/rejected": -669.9471435546875, |
|
"loss": 0.4052, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.4012928009033203, |
|
"rewards/margins": 1.4460484981536865, |
|
"rewards/rejected": -3.8473410606384277, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.9692898272552783, |
|
"grad_norm": 72.94634856988112, |
|
"learning_rate": 1.43522368088686e-09, |
|
"logits/chosen": -1.3796308040618896, |
|
"logits/rejected": -1.4548887014389038, |
|
"logps/chosen": -524.4054565429688, |
|
"logps/rejected": -795.0631713867188, |
|
"loss": 0.4778, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.7115061283111572, |
|
"rewards/margins": 2.742997646331787, |
|
"rewards/rejected": -5.454503536224365, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.9716890595009597, |
|
"grad_norm": 83.28511968100013, |
|
"learning_rate": 1.2199053320059993e-09, |
|
"logits/chosen": -1.3421388864517212, |
|
"logits/rejected": -1.3489320278167725, |
|
"logps/chosen": -483.73052978515625, |
|
"logps/rejected": -671.6555786132812, |
|
"loss": 0.4179, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.17889666557312, |
|
"rewards/margins": 1.7280422449111938, |
|
"rewards/rejected": -3.9069390296936035, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.974088291746641, |
|
"grad_norm": 65.34944649763949, |
|
"learning_rate": 1.0220379183764338e-09, |
|
"logits/chosen": -1.3666952848434448, |
|
"logits/rejected": -1.358161449432373, |
|
"logps/chosen": -379.8059387207031, |
|
"logps/rejected": -614.4222412109375, |
|
"loss": 0.3993, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.6943544149398804, |
|
"rewards/margins": 2.2639663219451904, |
|
"rewards/rejected": -3.9583206176757812, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.9764875239923224, |
|
"grad_norm": 70.1049047850288, |
|
"learning_rate": 8.416353196111503e-10, |
|
"logits/chosen": -1.375160813331604, |
|
"logits/rejected": -1.340598225593567, |
|
"logps/chosen": -499.16876220703125, |
|
"logps/rejected": -708.440185546875, |
|
"loss": 0.4537, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.6097168922424316, |
|
"rewards/margins": 2.180154323577881, |
|
"rewards/rejected": -4.789872169494629, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.9788867562380038, |
|
"grad_norm": 66.82721727412559, |
|
"learning_rate": 6.787101902356873e-10, |
|
"logits/chosen": -1.4533971548080444, |
|
"logits/rejected": -1.4280126094818115, |
|
"logps/chosen": -495.2933654785156, |
|
"logps/rejected": -711.2422485351562, |
|
"loss": 0.4494, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.3556466102600098, |
|
"rewards/margins": 1.9516353607177734, |
|
"rewards/rejected": -4.307281970977783, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.9812859884836852, |
|
"grad_norm": 101.18233866719163, |
|
"learning_rate": 5.332739588005953e-10, |
|
"logits/chosen": -1.381775975227356, |
|
"logits/rejected": -1.4199464321136475, |
|
"logps/chosen": -389.28411865234375, |
|
"logps/rejected": -655.2838134765625, |
|
"loss": 0.4249, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.8873682022094727, |
|
"rewards/margins": 2.4491372108459473, |
|
"rewards/rejected": -4.336504936218262, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.9836852207293666, |
|
"grad_norm": 76.0835459397726, |
|
"learning_rate": 4.053368270797164e-10, |
|
"logits/chosen": -1.371535062789917, |
|
"logits/rejected": -1.3988001346588135, |
|
"logps/chosen": -473.8731994628906, |
|
"logps/rejected": -699.0272827148438, |
|
"loss": 0.401, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.4284753799438477, |
|
"rewards/margins": 2.310962677001953, |
|
"rewards/rejected": -4.739438056945801, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.986084452975048, |
|
"grad_norm": 41.658180713138364, |
|
"learning_rate": 2.949077693545354e-10, |
|
"logits/chosen": -1.3081371784210205, |
|
"logits/rejected": -1.4109846353530884, |
|
"logps/chosen": -503.1234436035156, |
|
"logps/rejected": -697.5470581054688, |
|
"loss": 0.4703, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -2.3372557163238525, |
|
"rewards/margins": 1.6467803716659546, |
|
"rewards/rejected": -3.9840362071990967, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.9884836852207294, |
|
"grad_norm": 53.16215503139034, |
|
"learning_rate": 2.0199453178471047e-10, |
|
"logits/chosen": -1.2432799339294434, |
|
"logits/rejected": -1.3958756923675537, |
|
"logps/chosen": -532.030029296875, |
|
"logps/rejected": -602.0155639648438, |
|
"loss": 0.4235, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.2111809253692627, |
|
"rewards/margins": 1.1188135147094727, |
|
"rewards/rejected": -3.3299942016601562, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.9908829174664108, |
|
"grad_norm": 62.61946131554966, |
|
"learning_rate": 1.266036318647301e-10, |
|
"logits/chosen": -1.373792052268982, |
|
"logits/rejected": -1.4291802644729614, |
|
"logps/chosen": -555.7026977539062, |
|
"logps/rejected": -774.6337890625, |
|
"loss": 0.4632, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.6357736587524414, |
|
"rewards/margins": 2.4022860527038574, |
|
"rewards/rejected": -5.038060188293457, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.9932821497120922, |
|
"grad_norm": 76.41090561563904, |
|
"learning_rate": 6.874035796672339e-11, |
|
"logits/chosen": -1.2975847721099854, |
|
"logits/rejected": -1.378894329071045, |
|
"logps/chosen": -503.6324157714844, |
|
"logps/rejected": -720.40576171875, |
|
"loss": 0.4241, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.0853748321533203, |
|
"rewards/margins": 2.7113640308380127, |
|
"rewards/rejected": -4.796738624572754, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.9956813819577736, |
|
"grad_norm": 76.34576359226114, |
|
"learning_rate": 2.8408768969423458e-11, |
|
"logits/chosen": -1.3285682201385498, |
|
"logits/rejected": -1.330243706703186, |
|
"logps/chosen": -468.9698181152344, |
|
"logps/rejected": -653.3128051757812, |
|
"loss": 0.4075, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0265982151031494, |
|
"rewards/margins": 1.8034579753875732, |
|
"rewards/rejected": -3.8300559520721436, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.9980806142034548, |
|
"grad_norm": 66.48623115115576, |
|
"learning_rate": 5.611693973617271e-12, |
|
"logits/chosen": -1.3855040073394775, |
|
"logits/rejected": -1.3751308917999268, |
|
"logps/chosen": -445.00225830078125, |
|
"logps/rejected": -637.5364990234375, |
|
"loss": 0.4574, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -2.166611909866333, |
|
"rewards/margins": 1.7966630458831787, |
|
"rewards/rejected": -3.9632747173309326, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 4168, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4822349088434523, |
|
"train_runtime": 13347.0673, |
|
"train_samples_per_second": 9.992, |
|
"train_steps_per_second": 0.312 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 4168, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|