|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9973828840617638, |
|
"eval_steps": 100, |
|
"global_step": 954, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002093692750588851, |
|
"grad_norm": 38.333577570579074, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": 5.468747138977051, |
|
"logits/rejected": 5.353150367736816, |
|
"logps/chosen": -399.0700988769531, |
|
"logps/rejected": -414.2703857421875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 36.45097456473781, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": 4.634159088134766, |
|
"logits/rejected": 4.8650617599487305, |
|
"logps/chosen": -481.9865417480469, |
|
"logps/rejected": -402.9172668457031, |
|
"loss": 0.7192, |
|
"rewards/accuracies": 0.4236111044883728, |
|
"rewards/chosen": 0.036201052367687225, |
|
"rewards/margins": 0.05521820858120918, |
|
"rewards/rejected": -0.01901715248823166, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 38.31461130932718, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": 4.8017449378967285, |
|
"logits/rejected": 5.193596363067627, |
|
"logps/chosen": -428.74591064453125, |
|
"logps/rejected": -379.7098693847656, |
|
"loss": 0.7525, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.014457901008427143, |
|
"rewards/margins": -0.03727109357714653, |
|
"rewards/rejected": 0.0517289862036705, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 38.39033659648525, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": 4.625308513641357, |
|
"logits/rejected": 4.913487434387207, |
|
"logps/chosen": -459.8934631347656, |
|
"logps/rejected": -365.87176513671875, |
|
"loss": 0.7389, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.032367587089538574, |
|
"rewards/margins": 0.07106685638427734, |
|
"rewards/rejected": -0.03869926929473877, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 34.959297165636315, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": 5.135643005371094, |
|
"logits/rejected": 5.29467248916626, |
|
"logps/chosen": -388.5003662109375, |
|
"logps/rejected": -341.11138916015625, |
|
"loss": 0.7521, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.061884332448244095, |
|
"rewards/margins": -0.051958512514829636, |
|
"rewards/rejected": -0.009925814345479012, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 40.59746593571697, |
|
"learning_rate": 5.208333333333334e-07, |
|
"logits/chosen": 4.794947147369385, |
|
"logits/rejected": 5.206262111663818, |
|
"logps/chosen": -418.7637634277344, |
|
"logps/rejected": -366.21783447265625, |
|
"loss": 0.7539, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.0046131848357617855, |
|
"rewards/margins": 0.009517465718090534, |
|
"rewards/rejected": -0.014130651950836182, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 35.116262927070615, |
|
"learning_rate": 6.249999999999999e-07, |
|
"logits/chosen": 4.984349250793457, |
|
"logits/rejected": 5.210784435272217, |
|
"logps/chosen": -389.5479431152344, |
|
"logps/rejected": -355.3258361816406, |
|
"loss": 0.7337, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.09027661383152008, |
|
"rewards/margins": 0.13924987614154816, |
|
"rewards/rejected": -0.048973266035318375, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 37.65630163274615, |
|
"learning_rate": 7.291666666666666e-07, |
|
"logits/chosen": 5.079476356506348, |
|
"logits/rejected": 5.1062331199646, |
|
"logps/chosen": -472.6788635253906, |
|
"logps/rejected": -410.6566467285156, |
|
"loss": 0.7532, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.01137494295835495, |
|
"rewards/margins": -0.019245151430368423, |
|
"rewards/rejected": 0.030620098114013672, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 36.35209638887961, |
|
"learning_rate": 8.333333333333333e-07, |
|
"logits/chosen": 4.831971645355225, |
|
"logits/rejected": 5.179555892944336, |
|
"logps/chosen": -465.8661193847656, |
|
"logps/rejected": -352.46063232421875, |
|
"loss": 0.7337, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.029347699135541916, |
|
"rewards/margins": 0.011580700054764748, |
|
"rewards/rejected": 0.017767000943422318, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 40.247342074541066, |
|
"learning_rate": 9.374999999999999e-07, |
|
"logits/chosen": 4.667853355407715, |
|
"logits/rejected": 5.083367347717285, |
|
"logps/chosen": -410.145263671875, |
|
"logps/rejected": -347.24871826171875, |
|
"loss": 0.7325, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.04706032946705818, |
|
"rewards/margins": 0.05596155673265457, |
|
"rewards/rejected": -0.008901228196918964, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 36.88448509348576, |
|
"learning_rate": 9.999463737538052e-07, |
|
"logits/chosen": 5.017066955566406, |
|
"logits/rejected": 5.157826900482178, |
|
"logps/chosen": -453.6114196777344, |
|
"logps/rejected": -376.13214111328125, |
|
"loss": 0.7296, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.03123999759554863, |
|
"rewards/margins": 0.038888636976480484, |
|
"rewards/rejected": -0.007648637983947992, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"eval_logits/chosen": 4.755386829376221, |
|
"eval_logits/rejected": 5.127224445343018, |
|
"eval_logps/chosen": -443.48101806640625, |
|
"eval_logps/rejected": -377.5273742675781, |
|
"eval_loss": 0.7357296347618103, |
|
"eval_rewards/accuracies": 0.5515872836112976, |
|
"eval_rewards/chosen": 0.01168334111571312, |
|
"eval_rewards/margins": 0.03692733868956566, |
|
"eval_rewards/rejected": -0.025244001299142838, |
|
"eval_runtime": 21.3186, |
|
"eval_samples_per_second": 93.815, |
|
"eval_steps_per_second": 2.955, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 36.21924654761057, |
|
"learning_rate": 9.993432105822034e-07, |
|
"logits/chosen": 4.768385887145996, |
|
"logits/rejected": 5.076653957366943, |
|
"logps/chosen": -449.16375732421875, |
|
"logps/rejected": -369.2919006347656, |
|
"loss": 0.7211, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.04003235697746277, |
|
"rewards/margins": -0.059906214475631714, |
|
"rewards/rejected": 0.019873863086104393, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 36.12599439727971, |
|
"learning_rate": 9.980706626858607e-07, |
|
"logits/chosen": 5.0697174072265625, |
|
"logits/rejected": 5.350961208343506, |
|
"logps/chosen": -392.9084777832031, |
|
"logps/rejected": -342.9964294433594, |
|
"loss": 0.7213, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.005513651762157679, |
|
"rewards/margins": 0.07320307195186615, |
|
"rewards/rejected": -0.06768941879272461, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 43.81008737879554, |
|
"learning_rate": 9.961304359538434e-07, |
|
"logits/chosen": 4.7396440505981445, |
|
"logits/rejected": 5.110291957855225, |
|
"logps/chosen": -445.08209228515625, |
|
"logps/rejected": -356.9689636230469, |
|
"loss": 0.7319, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.03245898336172104, |
|
"rewards/margins": 0.0837341919541359, |
|
"rewards/rejected": -0.05127520486712456, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 37.12091478913465, |
|
"learning_rate": 9.935251313189563e-07, |
|
"logits/chosen": 4.5339274406433105, |
|
"logits/rejected": 5.020459175109863, |
|
"logps/chosen": -473.4126892089844, |
|
"logps/rejected": -364.12939453125, |
|
"loss": 0.7193, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.008073748089373112, |
|
"rewards/margins": 0.035515300929546356, |
|
"rewards/rejected": -0.02744155190885067, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 36.18564480374988, |
|
"learning_rate": 9.902582412711118e-07, |
|
"logits/chosen": 4.540812969207764, |
|
"logits/rejected": 4.964258193969727, |
|
"logps/chosen": -426.5033264160156, |
|
"logps/rejected": -353.1463317871094, |
|
"loss": 0.7232, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.01480414904654026, |
|
"rewards/margins": 0.06241898611187935, |
|
"rewards/rejected": -0.07722313702106476, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 32.90233228487631, |
|
"learning_rate": 9.86334145175542e-07, |
|
"logits/chosen": 4.807779788970947, |
|
"logits/rejected": 5.042156219482422, |
|
"logps/chosen": -396.0440673828125, |
|
"logps/rejected": -360.52886962890625, |
|
"loss": 0.7013, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.010227044112980366, |
|
"rewards/margins": 0.055318038910627365, |
|
"rewards/rejected": -0.045090995728969574, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 37.311964741290105, |
|
"learning_rate": 9.817581034021272e-07, |
|
"logits/chosen": 4.897703170776367, |
|
"logits/rejected": 5.062272071838379, |
|
"logps/chosen": -389.55810546875, |
|
"logps/rejected": -329.748779296875, |
|
"loss": 0.7043, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.008063828572630882, |
|
"rewards/margins": 0.049024712294340134, |
|
"rewards/rejected": -0.05708853527903557, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 34.14102924438106, |
|
"learning_rate": 9.765362502737097e-07, |
|
"logits/chosen": 5.039429187774658, |
|
"logits/rejected": 5.049492835998535, |
|
"logps/chosen": -384.9471130371094, |
|
"logps/rejected": -381.6601257324219, |
|
"loss": 0.7091, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.004380516707897186, |
|
"rewards/margins": 0.038515396416187286, |
|
"rewards/rejected": -0.0341348834335804, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 35.88568083270778, |
|
"learning_rate": 9.706755858428485e-07, |
|
"logits/chosen": 5.025214195251465, |
|
"logits/rejected": 5.097342014312744, |
|
"logps/chosen": -397.56402587890625, |
|
"logps/rejected": -396.12799072265625, |
|
"loss": 0.7161, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.056682147085666656, |
|
"rewards/margins": 0.023842817172408104, |
|
"rewards/rejected": -0.08052496612071991, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 38.60485416145746, |
|
"learning_rate": 9.641839665080363e-07, |
|
"logits/chosen": 5.1590471267700195, |
|
"logits/rejected": 5.290652275085449, |
|
"logps/chosen": -399.9522399902344, |
|
"logps/rejected": -363.53936767578125, |
|
"loss": 0.7062, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.04297986626625061, |
|
"rewards/margins": 0.06266864389181137, |
|
"rewards/rejected": -0.019688773900270462, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"eval_logits/chosen": 4.725487232208252, |
|
"eval_logits/rejected": 5.087867736816406, |
|
"eval_logps/chosen": -443.55450439453125, |
|
"eval_logps/rejected": -377.6705627441406, |
|
"eval_loss": 0.6988219022750854, |
|
"eval_rewards/accuracies": 0.567460298538208, |
|
"eval_rewards/chosen": -0.025081120431423187, |
|
"eval_rewards/margins": 0.07174728065729141, |
|
"eval_rewards/rejected": -0.0968284010887146, |
|
"eval_runtime": 21.5315, |
|
"eval_samples_per_second": 92.887, |
|
"eval_steps_per_second": 2.926, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 36.10684145537435, |
|
"learning_rate": 9.570700944819582e-07, |
|
"logits/chosen": 4.827897548675537, |
|
"logits/rejected": 5.154609680175781, |
|
"logps/chosen": -451.2969665527344, |
|
"logps/rejected": -372.116455078125, |
|
"loss": 0.7141, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.07117662578821182, |
|
"rewards/margins": 0.02444976009428501, |
|
"rewards/rejected": -0.09562637656927109, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 33.13953697631782, |
|
"learning_rate": 9.493435061259129e-07, |
|
"logits/chosen": 5.24191427230835, |
|
"logits/rejected": 5.477172374725342, |
|
"logps/chosen": -365.3572692871094, |
|
"logps/rejected": -345.34814453125, |
|
"loss": 0.7138, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.07684006541967392, |
|
"rewards/margins": -0.033464811742305756, |
|
"rewards/rejected": -0.043375253677368164, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 36.46869252662201, |
|
"learning_rate": 9.4101455916603e-07, |
|
"logits/chosen": 4.996638298034668, |
|
"logits/rejected": 5.203185081481934, |
|
"logps/chosen": -390.2169494628906, |
|
"logps/rejected": -381.74090576171875, |
|
"loss": 0.7027, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.006761978380382061, |
|
"rewards/margins": 0.11298926174640656, |
|
"rewards/rejected": -0.11975125223398209, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 32.465417090707724, |
|
"learning_rate": 9.320944188084241e-07, |
|
"logits/chosen": 4.961588382720947, |
|
"logits/rejected": 5.104936122894287, |
|
"logps/chosen": -405.73651123046875, |
|
"logps/rejected": -368.39312744140625, |
|
"loss": 0.6966, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.04590672254562378, |
|
"rewards/margins": 0.21882851421833038, |
|
"rewards/rejected": -0.1729217916727066, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 32.56446548910322, |
|
"learning_rate": 9.225950427718974e-07, |
|
"logits/chosen": 4.295259475708008, |
|
"logits/rejected": 4.731950759887695, |
|
"logps/chosen": -457.6085510253906, |
|
"logps/rejected": -378.92083740234375, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.08796132355928421, |
|
"rewards/margins": 0.07854396849870682, |
|
"rewards/rejected": -0.16650527715682983, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 37.3555609307497, |
|
"learning_rate": 9.125291652582547e-07, |
|
"logits/chosen": 4.772681713104248, |
|
"logits/rejected": 4.774602890014648, |
|
"logps/chosen": -429.554931640625, |
|
"logps/rejected": -350.5638122558594, |
|
"loss": 0.697, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.017600687220692635, |
|
"rewards/margins": 0.15435068309307098, |
|
"rewards/rejected": -0.1367500126361847, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 33.42195670184768, |
|
"learning_rate": 9.019102798817195e-07, |
|
"logits/chosen": 4.580355644226074, |
|
"logits/rejected": 5.011557102203369, |
|
"logps/chosen": -446.68438720703125, |
|
"logps/rejected": -380.5400390625, |
|
"loss": 0.6778, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.019351882860064507, |
|
"rewards/margins": 0.15824225544929504, |
|
"rewards/rejected": -0.13889038562774658, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 39.119947805234894, |
|
"learning_rate": 8.90752621580335e-07, |
|
"logits/chosen": 5.025314807891846, |
|
"logits/rejected": 5.18468713760376, |
|
"logps/chosen": -424.27191162109375, |
|
"logps/rejected": -344.4115295410156, |
|
"loss": 0.7073, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.023305395618081093, |
|
"rewards/margins": 0.14927226305007935, |
|
"rewards/rejected": -0.125966876745224, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 33.472133866310024, |
|
"learning_rate": 8.79071147533597e-07, |
|
"logits/chosen": 4.961835861206055, |
|
"logits/rejected": 5.123082637786865, |
|
"logps/chosen": -400.4245300292969, |
|
"logps/rejected": -388.8963623046875, |
|
"loss": 0.6687, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.08449111878871918, |
|
"rewards/margins": 0.2874522805213928, |
|
"rewards/rejected": -0.20296116173267365, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 38.29458361274397, |
|
"learning_rate": 8.668815171119019e-07, |
|
"logits/chosen": 4.6071085929870605, |
|
"logits/rejected": 4.85768985748291, |
|
"logps/chosen": -445.59393310546875, |
|
"logps/rejected": -373.83636474609375, |
|
"loss": 0.6782, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.043323811143636703, |
|
"rewards/margins": 0.2540794312953949, |
|
"rewards/rejected": -0.2107556313276291, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"eval_logits/chosen": 4.662118434906006, |
|
"eval_logits/rejected": 5.016141414642334, |
|
"eval_logps/chosen": -443.56884765625, |
|
"eval_logps/rejected": -377.883056640625, |
|
"eval_loss": 0.6942777037620544, |
|
"eval_rewards/accuracies": 0.567460298538208, |
|
"eval_rewards/chosen": -0.03225937858223915, |
|
"eval_rewards/margins": 0.17080551385879517, |
|
"eval_rewards/rejected": -0.20306488871574402, |
|
"eval_runtime": 21.6344, |
|
"eval_samples_per_second": 92.445, |
|
"eval_steps_per_second": 2.912, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 35.277636776310345, |
|
"learning_rate": 8.54200070884685e-07, |
|
"logits/chosen": 4.7398271560668945, |
|
"logits/rejected": 5.0438690185546875, |
|
"logps/chosen": -455.08074951171875, |
|
"logps/rejected": -346.21905517578125, |
|
"loss": 0.6648, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.00532907247543335, |
|
"rewards/margins": 0.22818481922149658, |
|
"rewards/rejected": -0.23351387679576874, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 36.1242464612453, |
|
"learning_rate": 8.410438087153911e-07, |
|
"logits/chosen": 4.823008060455322, |
|
"logits/rejected": 4.949624538421631, |
|
"logps/chosen": -420.04150390625, |
|
"logps/rejected": -346.31134033203125, |
|
"loss": 0.6633, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.02057427167892456, |
|
"rewards/margins": 0.1925923228263855, |
|
"rewards/rejected": -0.21316656470298767, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 34.485635067716444, |
|
"learning_rate": 8.274303669726426e-07, |
|
"logits/chosen": 4.866278171539307, |
|
"logits/rejected": 5.084838390350342, |
|
"logps/chosen": -413.07647705078125, |
|
"logps/rejected": -359.72637939453125, |
|
"loss": 0.6964, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.0661710649728775, |
|
"rewards/margins": 0.11275775730609894, |
|
"rewards/rejected": -0.17892882227897644, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 36.021204090397475, |
|
"learning_rate": 8.133779948881513e-07, |
|
"logits/chosen": 4.962647914886475, |
|
"logits/rejected": 5.274256229400635, |
|
"logps/chosen": -423.34796142578125, |
|
"logps/rejected": -374.83831787109375, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.06308840215206146, |
|
"rewards/margins": 0.14286582171916962, |
|
"rewards/rejected": -0.20595422387123108, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 40.80633953486743, |
|
"learning_rate": 7.989055300930704e-07, |
|
"logits/chosen": 4.9410552978515625, |
|
"logits/rejected": 5.171365737915039, |
|
"logps/chosen": -401.3800048828125, |
|
"logps/rejected": -339.8207702636719, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.05132729932665825, |
|
"rewards/margins": 0.14917483925819397, |
|
"rewards/rejected": -0.20050212740898132, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 32.290367261199215, |
|
"learning_rate": 7.840323733655778e-07, |
|
"logits/chosen": 4.760105609893799, |
|
"logits/rejected": 4.936800956726074, |
|
"logps/chosen": -475.94305419921875, |
|
"logps/rejected": -373.4317626953125, |
|
"loss": 0.6723, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.040363796055316925, |
|
"rewards/margins": 0.26596716046333313, |
|
"rewards/rejected": -0.2256033718585968, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 33.44911142948228, |
|
"learning_rate": 7.687784626235447e-07, |
|
"logits/chosen": 4.649796485900879, |
|
"logits/rejected": 4.882054328918457, |
|
"logps/chosen": -437.54791259765625, |
|
"logps/rejected": -343.1330871582031, |
|
"loss": 0.6722, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03584844991564751, |
|
"rewards/margins": 0.2080194056034088, |
|
"rewards/rejected": -0.2438678741455078, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 32.64019199720913, |
|
"learning_rate": 7.531642461971514e-07, |
|
"logits/chosen": 4.7331953048706055, |
|
"logits/rejected": 5.047934532165527, |
|
"logps/chosen": -434.0751953125, |
|
"logps/rejected": -363.1179504394531, |
|
"loss": 0.673, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.03724004700779915, |
|
"rewards/margins": 0.23209133744239807, |
|
"rewards/rejected": -0.2693313956260681, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 33.38515796622506, |
|
"learning_rate": 7.372106554172801e-07, |
|
"logits/chosen": 4.660643577575684, |
|
"logits/rejected": 4.7719621658325195, |
|
"logps/chosen": -434.41015625, |
|
"logps/rejected": -394.9471130371094, |
|
"loss": 0.6778, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.021881069988012314, |
|
"rewards/margins": 0.2415298968553543, |
|
"rewards/rejected": -0.2196488082408905, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 32.60957530709497, |
|
"learning_rate": 7.209390765564318e-07, |
|
"logits/chosen": 4.807684421539307, |
|
"logits/rejected": 5.217709541320801, |
|
"logps/chosen": -368.07122802734375, |
|
"logps/rejected": -328.12066650390625, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.03271085396409035, |
|
"rewards/margins": 0.1988353729248047, |
|
"rewards/rejected": -0.23154623806476593, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"eval_logits/chosen": 4.645900249481201, |
|
"eval_logits/rejected": 4.999230861663818, |
|
"eval_logps/chosen": -443.68084716796875, |
|
"eval_logps/rejected": -378.0348205566406, |
|
"eval_loss": 0.6756832003593445, |
|
"eval_rewards/accuracies": 0.5992063283920288, |
|
"eval_rewards/chosen": -0.08822782337665558, |
|
"eval_rewards/margins": 0.19070643186569214, |
|
"eval_rewards/rejected": -0.2789342403411865, |
|
"eval_runtime": 21.4973, |
|
"eval_samples_per_second": 93.035, |
|
"eval_steps_per_second": 2.931, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 80.32344495768098, |
|
"learning_rate": 7.043713221597773e-07, |
|
"logits/chosen": 4.9558234214782715, |
|
"logits/rejected": 5.171336650848389, |
|
"logps/chosen": -464.4634704589844, |
|
"logps/rejected": -378.52130126953125, |
|
"loss": 0.6691, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.028427015990018845, |
|
"rewards/margins": 0.16248683631420135, |
|
"rewards/rejected": -0.1909138560295105, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 38.003974257278905, |
|
"learning_rate": 6.875296018047809e-07, |
|
"logits/chosen": 5.062918663024902, |
|
"logits/rejected": 5.093894958496094, |
|
"logps/chosen": -414.597900390625, |
|
"logps/rejected": -392.76422119140625, |
|
"loss": 0.6778, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.05205658823251724, |
|
"rewards/margins": 0.166357159614563, |
|
"rewards/rejected": -0.21841374039649963, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 33.39326538286459, |
|
"learning_rate": 6.704364923285857e-07, |
|
"logits/chosen": 4.783626556396484, |
|
"logits/rejected": 5.061443328857422, |
|
"logps/chosen": -454.7694396972656, |
|
"logps/rejected": -349.71099853515625, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.03129550814628601, |
|
"rewards/margins": 0.2672887146472931, |
|
"rewards/rejected": -0.2985842227935791, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 33.88594593881104, |
|
"learning_rate": 6.531149075630796e-07, |
|
"logits/chosen": 4.762629985809326, |
|
"logits/rejected": 4.992688179016113, |
|
"logps/chosen": -422.49639892578125, |
|
"logps/rejected": -342.6626892089844, |
|
"loss": 0.6829, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.08267354220151901, |
|
"rewards/margins": 0.18921074271202087, |
|
"rewards/rejected": -0.2718842923641205, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 34.194378360359096, |
|
"learning_rate": 6.355880676182085e-07, |
|
"logits/chosen": 4.86130952835083, |
|
"logits/rejected": 5.088041305541992, |
|
"logps/chosen": -423.82366943359375, |
|
"logps/rejected": -386.20172119140625, |
|
"loss": 0.6777, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.029518108814954758, |
|
"rewards/margins": 0.2812921106815338, |
|
"rewards/rejected": -0.3108102083206177, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 35.220161431379815, |
|
"learning_rate": 6.178794677547137e-07, |
|
"logits/chosen": 4.96859073638916, |
|
"logits/rejected": 5.295912265777588, |
|
"logps/chosen": -408.28228759765625, |
|
"logps/rejected": -337.6819763183594, |
|
"loss": 0.6573, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.02989841438829899, |
|
"rewards/margins": 0.3131854832172394, |
|
"rewards/rejected": -0.343083918094635, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 36.11741005068747, |
|
"learning_rate": 6.000128468880222e-07, |
|
"logits/chosen": 4.616504669189453, |
|
"logits/rejected": 4.935946464538574, |
|
"logps/chosen": -435.3017578125, |
|
"logps/rejected": -375.13800048828125, |
|
"loss": 0.6647, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.11524273455142975, |
|
"rewards/margins": 0.2546766698360443, |
|
"rewards/rejected": -0.3699193596839905, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0049725202826485, |
|
"grad_norm": 32.471857091487834, |
|
"learning_rate": 5.820121557655108e-07, |
|
"logits/chosen": 4.9493536949157715, |
|
"logits/rejected": 5.226868152618408, |
|
"logps/chosen": -423.6285095214844, |
|
"logps/rejected": -362.1949768066406, |
|
"loss": 0.6629, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.05261852219700813, |
|
"rewards/margins": 0.3280298113822937, |
|
"rewards/rejected": -0.27541130781173706, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.025909447788537, |
|
"grad_norm": 39.51652905408664, |
|
"learning_rate": 5.639015248598023e-07, |
|
"logits/chosen": 4.762259006500244, |
|
"logits/rejected": 5.021244525909424, |
|
"logps/chosen": -424.96697998046875, |
|
"logps/rejected": -342.76666259765625, |
|
"loss": 0.6644, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.03716667741537094, |
|
"rewards/margins": 0.2011403739452362, |
|
"rewards/rejected": -0.23830704391002655, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.0468463752944255, |
|
"grad_norm": 34.43579926142672, |
|
"learning_rate": 5.457052320211339e-07, |
|
"logits/chosen": 4.543593406677246, |
|
"logits/rejected": 4.786489009857178, |
|
"logps/chosen": -434.46746826171875, |
|
"logps/rejected": -367.75689697265625, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.08059108257293701, |
|
"rewards/margins": 0.23346829414367676, |
|
"rewards/rejected": -0.3140593469142914, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0468463752944255, |
|
"eval_logits/chosen": 4.617003440856934, |
|
"eval_logits/rejected": 4.9695563316345215, |
|
"eval_logps/chosen": -443.69580078125, |
|
"eval_logps/rejected": -378.1418762207031, |
|
"eval_loss": 0.6708300113677979, |
|
"eval_rewards/accuracies": 0.6349206566810608, |
|
"eval_rewards/chosen": -0.09571509808301926, |
|
"eval_rewards/margins": 0.23677198588848114, |
|
"eval_rewards/rejected": -0.3324871063232422, |
|
"eval_runtime": 20.9626, |
|
"eval_samples_per_second": 95.408, |
|
"eval_steps_per_second": 3.005, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.067783302800314, |
|
"grad_norm": 33.3201987416808, |
|
"learning_rate": 5.274476699321637e-07, |
|
"logits/chosen": 4.583409786224365, |
|
"logits/rejected": 4.803020477294922, |
|
"logps/chosen": -390.48565673828125, |
|
"logps/rejected": -351.6776123046875, |
|
"loss": 0.6779, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.0327589213848114, |
|
"rewards/margins": 0.32544368505477905, |
|
"rewards/rejected": -0.35820263624191284, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.0887202303062025, |
|
"grad_norm": 30.742730814185435, |
|
"learning_rate": 5.091533134088387e-07, |
|
"logits/chosen": 4.493949890136719, |
|
"logits/rejected": 4.9839911460876465, |
|
"logps/chosen": -383.7958984375, |
|
"logps/rejected": -354.36480712890625, |
|
"loss": 0.656, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.07341745495796204, |
|
"rewards/margins": 0.19709812104701996, |
|
"rewards/rejected": -0.2705155909061432, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.109657157812091, |
|
"grad_norm": 34.05900047947194, |
|
"learning_rate": 4.908466865911614e-07, |
|
"logits/chosen": 4.793222904205322, |
|
"logits/rejected": 5.078155517578125, |
|
"logps/chosen": -401.0002746582031, |
|
"logps/rejected": -340.4061279296875, |
|
"loss": 0.6618, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.05051114410161972, |
|
"rewards/margins": 0.27152642607688904, |
|
"rewards/rejected": -0.32203757762908936, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.1305940853179797, |
|
"grad_norm": 30.483486401054424, |
|
"learning_rate": 4.7255233006783624e-07, |
|
"logits/chosen": 4.857717990875244, |
|
"logits/rejected": 5.0497636795043945, |
|
"logps/chosen": -375.65362548828125, |
|
"logps/rejected": -330.26165771484375, |
|
"loss": 0.6544, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.02352207899093628, |
|
"rewards/margins": 0.35729408264160156, |
|
"rewards/rejected": -0.38081610202789307, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.151531012823868, |
|
"grad_norm": 35.09603470685652, |
|
"learning_rate": 4.5429476797886617e-07, |
|
"logits/chosen": 4.932369232177734, |
|
"logits/rejected": 5.050224781036377, |
|
"logps/chosen": -430.0126953125, |
|
"logps/rejected": -331.1691589355469, |
|
"loss": 0.6599, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.0060789333656430244, |
|
"rewards/margins": 0.25288811326026917, |
|
"rewards/rejected": -0.25896701216697693, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.1724679403297567, |
|
"grad_norm": 40.64422646125966, |
|
"learning_rate": 4.3609847514019763e-07, |
|
"logits/chosen": 4.637743949890137, |
|
"logits/rejected": 5.000674724578857, |
|
"logps/chosen": -420.3258361816406, |
|
"logps/rejected": -362.2751159667969, |
|
"loss": 0.6718, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0094971414655447, |
|
"rewards/margins": 0.22678783535957336, |
|
"rewards/rejected": -0.23628497123718262, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.193404867835645, |
|
"grad_norm": 32.638009640148645, |
|
"learning_rate": 4.179878442344892e-07, |
|
"logits/chosen": 4.855754375457764, |
|
"logits/rejected": 4.871184349060059, |
|
"logps/chosen": -384.08660888671875, |
|
"logps/rejected": -371.4262390136719, |
|
"loss": 0.6766, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.05095939710736275, |
|
"rewards/margins": 0.28148993849754333, |
|
"rewards/rejected": -0.332449346780777, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.2143417953415336, |
|
"grad_norm": 35.519971577107064, |
|
"learning_rate": 3.9998715311197783e-07, |
|
"logits/chosen": 4.73850679397583, |
|
"logits/rejected": 5.173120021820068, |
|
"logps/chosen": -414.8775329589844, |
|
"logps/rejected": -341.5818786621094, |
|
"loss": 0.6508, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.09668377041816711, |
|
"rewards/margins": 0.25211262702941895, |
|
"rewards/rejected": -0.34879642724990845, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.235278722847422, |
|
"grad_norm": 34.20580765627037, |
|
"learning_rate": 3.821205322452863e-07, |
|
"logits/chosen": 4.916988372802734, |
|
"logits/rejected": 5.1998610496521, |
|
"logps/chosen": -448.5626525878906, |
|
"logps/rejected": -367.84027099609375, |
|
"loss": 0.644, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.07886572182178497, |
|
"rewards/margins": 0.3578983247280121, |
|
"rewards/rejected": -0.43676406145095825, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.2562156503533106, |
|
"grad_norm": 33.854286929995176, |
|
"learning_rate": 3.6441193238179146e-07, |
|
"logits/chosen": 4.852269649505615, |
|
"logits/rejected": 4.903324127197266, |
|
"logps/chosen": -446.4149475097656, |
|
"logps/rejected": -423.3356018066406, |
|
"loss": 0.6349, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.14010193943977356, |
|
"rewards/margins": 0.15067996084690094, |
|
"rewards/rejected": -0.2907818853855133, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.2562156503533106, |
|
"eval_logits/chosen": 4.62031364440918, |
|
"eval_logits/rejected": 4.9707465171813965, |
|
"eval_logps/chosen": -443.61212158203125, |
|
"eval_logps/rejected": -378.1197204589844, |
|
"eval_loss": 0.6720485091209412, |
|
"eval_rewards/accuracies": 0.5992063283920288, |
|
"eval_rewards/chosen": -0.053870752453804016, |
|
"eval_rewards/margins": 0.267531156539917, |
|
"eval_rewards/rejected": -0.321401983499527, |
|
"eval_runtime": 20.8046, |
|
"eval_samples_per_second": 96.133, |
|
"eval_steps_per_second": 3.028, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.2771525778591992, |
|
"grad_norm": 36.085842973391074, |
|
"learning_rate": 3.4688509243692034e-07, |
|
"logits/chosen": 4.767918586730957, |
|
"logits/rejected": 4.757430553436279, |
|
"logps/chosen": -407.41668701171875, |
|
"logps/rejected": -317.3873596191406, |
|
"loss": 0.6402, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.08183420449495316, |
|
"rewards/margins": 0.33883604407310486, |
|
"rewards/rejected": -0.42067021131515503, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.2980895053650876, |
|
"grad_norm": 29.698333183198105, |
|
"learning_rate": 3.295635076714144e-07, |
|
"logits/chosen": 5.085806846618652, |
|
"logits/rejected": 5.415268898010254, |
|
"logps/chosen": -395.627685546875, |
|
"logps/rejected": -331.7653503417969, |
|
"loss": 0.6266, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.09060301631689072, |
|
"rewards/margins": 0.3094441294670105, |
|
"rewards/rejected": -0.4000471234321594, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.3190264328709762, |
|
"grad_norm": 35.208773349468885, |
|
"learning_rate": 3.12470398195219e-07, |
|
"logits/chosen": 4.828533172607422, |
|
"logits/rejected": 4.925856113433838, |
|
"logps/chosen": -418.5848083496094, |
|
"logps/rejected": -376.3353576660156, |
|
"loss": 0.6486, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.06391973793506622, |
|
"rewards/margins": 0.44674786925315857, |
|
"rewards/rejected": -0.3828281760215759, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.3399633603768648, |
|
"grad_norm": 29.673309842493335, |
|
"learning_rate": 2.956286778402226e-07, |
|
"logits/chosen": 4.896113872528076, |
|
"logits/rejected": 5.183098793029785, |
|
"logps/chosen": -394.4980773925781, |
|
"logps/rejected": -374.76422119140625, |
|
"loss": 0.6394, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.04267222806811333, |
|
"rewards/margins": 0.2913575768470764, |
|
"rewards/rejected": -0.33402982354164124, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.3609002878827532, |
|
"grad_norm": 35.03684415648848, |
|
"learning_rate": 2.7906092344356826e-07, |
|
"logits/chosen": 4.610795021057129, |
|
"logits/rejected": 4.8373188972473145, |
|
"logps/chosen": -379.4288024902344, |
|
"logps/rejected": -345.05596923828125, |
|
"loss": 0.6646, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.1311950385570526, |
|
"rewards/margins": 0.2357216328382492, |
|
"rewards/rejected": -0.366916686296463, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.3818372153886418, |
|
"grad_norm": 33.06984951084542, |
|
"learning_rate": 2.6278934458271996e-07, |
|
"logits/chosen": 4.830328941345215, |
|
"logits/rejected": 5.017812252044678, |
|
"logps/chosen": -377.4278564453125, |
|
"logps/rejected": -343.86529541015625, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.10503290593624115, |
|
"rewards/margins": 0.11762680858373642, |
|
"rewards/rejected": -0.22265975177288055, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.4027741428945302, |
|
"grad_norm": 31.761556922593446, |
|
"learning_rate": 2.468357538028487e-07, |
|
"logits/chosen": 4.728631496429443, |
|
"logits/rejected": 4.90619421005249, |
|
"logps/chosen": -413.2724609375, |
|
"logps/rejected": -346.9877624511719, |
|
"loss": 0.6393, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.03663766756653786, |
|
"rewards/margins": 0.2855125069618225, |
|
"rewards/rejected": -0.32215017080307007, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.4237110704004188, |
|
"grad_norm": 34.93162849349177, |
|
"learning_rate": 2.312215373764551e-07, |
|
"logits/chosen": 4.728277206420898, |
|
"logits/rejected": 5.018845558166504, |
|
"logps/chosen": -421.8961486816406, |
|
"logps/rejected": -403.57354736328125, |
|
"loss": 0.6533, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.1260446161031723, |
|
"rewards/margins": 0.2200162708759308, |
|
"rewards/rejected": -0.3460609018802643, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.4446479979063072, |
|
"grad_norm": 33.66523822793528, |
|
"learning_rate": 2.1596762663442213e-07, |
|
"logits/chosen": 4.863284111022949, |
|
"logits/rejected": 4.840500354766846, |
|
"logps/chosen": -422.4331970214844, |
|
"logps/rejected": -355.96868896484375, |
|
"loss": 0.6477, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.10065089166164398, |
|
"rewards/margins": 0.24511468410491943, |
|
"rewards/rejected": -0.3457655906677246, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.4655849254121958, |
|
"grad_norm": 34.48257400044076, |
|
"learning_rate": 2.0109446990692963e-07, |
|
"logits/chosen": 4.709015846252441, |
|
"logits/rejected": 4.914425849914551, |
|
"logps/chosen": -452.9461364746094, |
|
"logps/rejected": -442.56658935546875, |
|
"loss": 0.6427, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.08032918721437454, |
|
"rewards/margins": 0.39584842324256897, |
|
"rewards/rejected": -0.3155192732810974, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.4655849254121958, |
|
"eval_logits/chosen": 4.592012882232666, |
|
"eval_logits/rejected": 4.943046569824219, |
|
"eval_logps/chosen": -443.6796875, |
|
"eval_logps/rejected": -378.1680908203125, |
|
"eval_loss": 0.6795812845230103, |
|
"eval_rewards/accuracies": 0.60317462682724, |
|
"eval_rewards/chosen": -0.08766676485538483, |
|
"eval_rewards/margins": 0.25792673230171204, |
|
"eval_rewards/rejected": -0.34559354186058044, |
|
"eval_runtime": 21.1978, |
|
"eval_samples_per_second": 94.349, |
|
"eval_steps_per_second": 2.972, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.4865218529180844, |
|
"grad_norm": 36.350524448045036, |
|
"learning_rate": 1.8662200511184872e-07, |
|
"logits/chosen": 4.871232509613037, |
|
"logits/rejected": 4.886293411254883, |
|
"logps/chosen": -417.8133850097656, |
|
"logps/rejected": -384.177490234375, |
|
"loss": 0.6669, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.12399878352880478, |
|
"rewards/margins": 0.27855515480041504, |
|
"rewards/rejected": -0.40255388617515564, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.5074587804239727, |
|
"grad_norm": 34.52058371975813, |
|
"learning_rate": 1.725696330273575e-07, |
|
"logits/chosen": 4.8079633712768555, |
|
"logits/rejected": 5.118483543395996, |
|
"logps/chosen": -433.02032470703125, |
|
"logps/rejected": -383.21539306640625, |
|
"loss": 0.6234, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.049599818885326385, |
|
"rewards/margins": 0.36388009786605835, |
|
"rewards/rejected": -0.31428030133247375, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.5283957079298613, |
|
"grad_norm": 36.62094520000859, |
|
"learning_rate": 1.589561912846089e-07, |
|
"logits/chosen": 4.67967414855957, |
|
"logits/rejected": 4.974714756011963, |
|
"logps/chosen": -402.2828063964844, |
|
"logps/rejected": -343.87939453125, |
|
"loss": 0.6419, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.02172028087079525, |
|
"rewards/margins": 0.3966042995452881, |
|
"rewards/rejected": -0.4183245599269867, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.54933263543575, |
|
"grad_norm": 34.85140828972076, |
|
"learning_rate": 1.4579992911531496e-07, |
|
"logits/chosen": 4.999066352844238, |
|
"logits/rejected": 5.089913845062256, |
|
"logps/chosen": -442.08538818359375, |
|
"logps/rejected": -387.76953125, |
|
"loss": 0.6641, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.024145543575286865, |
|
"rewards/margins": 0.3119828999042511, |
|
"rewards/rejected": -0.28783735632896423, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.5702695629416383, |
|
"grad_norm": 33.55559408410901, |
|
"learning_rate": 1.3311848288809813e-07, |
|
"logits/chosen": 4.944571018218994, |
|
"logits/rejected": 4.949963569641113, |
|
"logps/chosen": -422.9165954589844, |
|
"logps/rejected": -378.2356262207031, |
|
"loss": 0.6431, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.10791780799627304, |
|
"rewards/margins": 0.16808216273784637, |
|
"rewards/rejected": -0.2759999632835388, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.5912064904475267, |
|
"grad_norm": 33.284252993746314, |
|
"learning_rate": 1.209288524664029e-07, |
|
"logits/chosen": 4.269396781921387, |
|
"logits/rejected": 4.640176296234131, |
|
"logps/chosen": -513.432861328125, |
|
"logps/rejected": -464.742431640625, |
|
"loss": 0.6505, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.00313050439581275, |
|
"rewards/margins": 0.3427557051181793, |
|
"rewards/rejected": -0.33962517976760864, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.6121434179534153, |
|
"grad_norm": 33.301123590813035, |
|
"learning_rate": 1.0924737841966497e-07, |
|
"logits/chosen": 4.588865756988525, |
|
"logits/rejected": 4.75103235244751, |
|
"logps/chosen": -465.42059326171875, |
|
"logps/rejected": -370.064697265625, |
|
"loss": 0.653, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.01498096901923418, |
|
"rewards/margins": 0.34728002548217773, |
|
"rewards/rejected": -0.3622610569000244, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.633080345459304, |
|
"grad_norm": 33.707974100314466, |
|
"learning_rate": 9.808972011828054e-08, |
|
"logits/chosen": 4.657374382019043, |
|
"logits/rejected": 5.004950523376465, |
|
"logps/chosen": -452.0787048339844, |
|
"logps/rejected": -383.26824951171875, |
|
"loss": 0.6419, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.06604544818401337, |
|
"rewards/margins": 0.4590230882167816, |
|
"rewards/rejected": -0.39297762513160706, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.6540172729651923, |
|
"grad_norm": 36.400512256730096, |
|
"learning_rate": 8.747083474174527e-08, |
|
"logits/chosen": 4.775164604187012, |
|
"logits/rejected": 5.237417221069336, |
|
"logps/chosen": -431.0052185058594, |
|
"logps/rejected": -372.1168212890625, |
|
"loss": 0.6398, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.017582783475518227, |
|
"rewards/margins": 0.35258156061172485, |
|
"rewards/rejected": -0.37016433477401733, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.674954200471081, |
|
"grad_norm": 29.96252260731642, |
|
"learning_rate": 7.740495722810269e-08, |
|
"logits/chosen": 4.998331546783447, |
|
"logits/rejected": 4.909043312072754, |
|
"logps/chosen": -489.783447265625, |
|
"logps/rejected": -415.0606384277344, |
|
"loss": 0.6128, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.04877934604883194, |
|
"rewards/margins": 0.3542923033237457, |
|
"rewards/rejected": -0.3055129647254944, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.674954200471081, |
|
"eval_logits/chosen": 4.6106181144714355, |
|
"eval_logits/rejected": 4.968925476074219, |
|
"eval_logps/chosen": -443.625244140625, |
|
"eval_logps/rejected": -378.2127990722656, |
|
"eval_loss": 0.6703739166259766, |
|
"eval_rewards/accuracies": 0.6071428656578064, |
|
"eval_rewards/chosen": -0.06042463704943657, |
|
"eval_rewards/margins": 0.30752548575401306, |
|
"eval_rewards/rejected": -0.3679501414299011, |
|
"eval_runtime": 21.1621, |
|
"eval_samples_per_second": 94.509, |
|
"eval_steps_per_second": 2.977, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.6958911279769695, |
|
"grad_norm": 34.07376933070953, |
|
"learning_rate": 6.790558119157597e-08, |
|
"logits/chosen": 4.842529773712158, |
|
"logits/rejected": 4.945174217224121, |
|
"logps/chosen": -446.68682861328125, |
|
"logps/rejected": -379.9209899902344, |
|
"loss": 0.6409, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.010895573534071445, |
|
"rewards/margins": 0.47363200783729553, |
|
"rewards/rejected": -0.46273642778396606, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.7168280554828579, |
|
"grad_norm": 31.41564508164701, |
|
"learning_rate": 5.898544083397e-08, |
|
"logits/chosen": 4.57013463973999, |
|
"logits/rejected": 4.8762030601501465, |
|
"logps/chosen": -459.298583984375, |
|
"logps/rejected": -376.189208984375, |
|
"loss": 0.6381, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04928427189588547, |
|
"rewards/margins": 0.33450883626937866, |
|
"rewards/rejected": -0.38379308581352234, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.7377649829887463, |
|
"grad_norm": 38.55984096337612, |
|
"learning_rate": 5.065649387408705e-08, |
|
"logits/chosen": 4.863150596618652, |
|
"logits/rejected": 4.996617317199707, |
|
"logps/chosen": -405.2935485839844, |
|
"logps/rejected": -383.06756591796875, |
|
"loss": 0.6587, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.14060600101947784, |
|
"rewards/margins": 0.1646648645401001, |
|
"rewards/rejected": -0.30527088046073914, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.7587019104946349, |
|
"grad_norm": 32.69891650352482, |
|
"learning_rate": 4.292990551804171e-08, |
|
"logits/chosen": 4.561503887176514, |
|
"logits/rejected": 4.661375522613525, |
|
"logps/chosen": -374.9468688964844, |
|
"logps/rejected": -359.5188293457031, |
|
"loss": 0.6426, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.05613694339990616, |
|
"rewards/margins": 0.3448534607887268, |
|
"rewards/rejected": -0.40099042654037476, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.7796388380005235, |
|
"grad_norm": 32.82316724445512, |
|
"learning_rate": 3.581603349196371e-08, |
|
"logits/chosen": 4.668177604675293, |
|
"logits/rejected": 5.044764518737793, |
|
"logps/chosen": -391.29534912109375, |
|
"logps/rejected": -374.1195068359375, |
|
"loss": 0.6501, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.14754648506641388, |
|
"rewards/margins": 0.16757197678089142, |
|
"rewards/rejected": -0.3151184618473053, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.8005757655064119, |
|
"grad_norm": 32.36442235611696, |
|
"learning_rate": 2.9324414157151367e-08, |
|
"logits/chosen": 4.706895351409912, |
|
"logits/rejected": 5.021437644958496, |
|
"logps/chosen": -417.41021728515625, |
|
"logps/rejected": -335.3275451660156, |
|
"loss": 0.6534, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.06291428953409195, |
|
"rewards/margins": 0.3059840798377991, |
|
"rewards/rejected": -0.36889833211898804, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.8215126930123005, |
|
"grad_norm": 29.740377909388123, |
|
"learning_rate": 2.3463749726290284e-08, |
|
"logits/chosen": 4.696743965148926, |
|
"logits/rejected": 4.8797287940979, |
|
"logps/chosen": -477.77783203125, |
|
"logps/rejected": -390.98175048828125, |
|
"loss": 0.6614, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.07511474192142487, |
|
"rewards/margins": 0.17798468470573425, |
|
"rewards/rejected": -0.2530994415283203, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.842449620518189, |
|
"grad_norm": 30.952090476967147, |
|
"learning_rate": 1.824189659787284e-08, |
|
"logits/chosen": 4.781184196472168, |
|
"logits/rejected": 5.032862663269043, |
|
"logps/chosen": -387.22906494140625, |
|
"logps/rejected": -360.9486389160156, |
|
"loss": 0.6618, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.11400938034057617, |
|
"rewards/margins": 0.21471650898456573, |
|
"rewards/rejected": -0.3287258744239807, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.8633865480240774, |
|
"grad_norm": 31.64887361264221, |
|
"learning_rate": 1.3665854824458035e-08, |
|
"logits/chosen": 4.322469234466553, |
|
"logits/rejected": 4.672883033752441, |
|
"logps/chosen": -445.35699462890625, |
|
"logps/rejected": -390.5237731933594, |
|
"loss": 0.624, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.032880861312150955, |
|
"rewards/margins": 0.3543739914894104, |
|
"rewards/rejected": -0.38725486397743225, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.8843234755299658, |
|
"grad_norm": 33.85502008551422, |
|
"learning_rate": 9.741758728888217e-09, |
|
"logits/chosen": 4.4365644454956055, |
|
"logits/rejected": 4.837357997894287, |
|
"logps/chosen": -472.887451171875, |
|
"logps/rejected": -367.82611083984375, |
|
"loss": 0.6474, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.05196143314242363, |
|
"rewards/margins": 0.3476230800151825, |
|
"rewards/rejected": -0.39958447217941284, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.8843234755299658, |
|
"eval_logits/chosen": 4.5737175941467285, |
|
"eval_logits/rejected": 4.921082496643066, |
|
"eval_logps/chosen": -443.622314453125, |
|
"eval_logps/rejected": -378.2174377441406, |
|
"eval_loss": 0.6692253351211548, |
|
"eval_rewards/accuracies": 0.6269841194152832, |
|
"eval_rewards/chosen": -0.05897674709558487, |
|
"eval_rewards/margins": 0.31128397583961487, |
|
"eval_rewards/rejected": -0.37026071548461914, |
|
"eval_runtime": 21.3225, |
|
"eval_samples_per_second": 93.797, |
|
"eval_steps_per_second": 2.955, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.9052604030358546, |
|
"grad_norm": 33.28147635399462, |
|
"learning_rate": 6.474868681043577e-09, |
|
"logits/chosen": 4.713411808013916, |
|
"logits/rejected": 4.913935661315918, |
|
"logps/chosen": -384.9287109375, |
|
"logps/rejected": -316.16265869140625, |
|
"loss": 0.6491, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.02645047940313816, |
|
"rewards/margins": 0.40163594484329224, |
|
"rewards/rejected": -0.42808642983436584, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.926197330541743, |
|
"grad_norm": 35.875215811609834, |
|
"learning_rate": 3.869564046156459e-09, |
|
"logits/chosen": 4.6749348640441895, |
|
"logits/rejected": 4.898279190063477, |
|
"logps/chosen": -441.083740234375, |
|
"logps/rejected": -361.4406433105469, |
|
"loss": 0.6389, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0031513571739196777, |
|
"rewards/margins": 0.41674357652664185, |
|
"rewards/rejected": -0.4198949337005615, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.9471342580476314, |
|
"grad_norm": 32.946064523302205, |
|
"learning_rate": 1.929337314139412e-09, |
|
"logits/chosen": 4.862700462341309, |
|
"logits/rejected": 4.817538261413574, |
|
"logps/chosen": -429.21051025390625, |
|
"logps/rejected": -370.45745849609375, |
|
"loss": 0.6312, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0764947384595871, |
|
"rewards/margins": 0.20215356349945068, |
|
"rewards/rejected": -0.2786482870578766, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.96807118555352, |
|
"grad_norm": 37.53766060677335, |
|
"learning_rate": 6.567894177967325e-10, |
|
"logits/chosen": 5.056074142456055, |
|
"logits/rejected": 5.200203895568848, |
|
"logps/chosen": -382.3914489746094, |
|
"logps/rejected": -319.7542419433594, |
|
"loss": 0.6475, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.012996235862374306, |
|
"rewards/margins": 0.2734270989894867, |
|
"rewards/rejected": -0.28642335534095764, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.9890081130594086, |
|
"grad_norm": 32.67422145978211, |
|
"learning_rate": 5.3626246194704575e-11, |
|
"logits/chosen": 4.634739875793457, |
|
"logits/rejected": 4.890820503234863, |
|
"logps/chosen": -425.7994689941406, |
|
"logps/rejected": -344.5509033203125, |
|
"loss": 0.6372, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.18660762906074524, |
|
"rewards/margins": 0.23620739579200745, |
|
"rewards/rejected": -0.4228149950504303, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.9973828840617638, |
|
"step": 954, |
|
"total_flos": 0.0, |
|
"train_loss": 0.675485389037702, |
|
"train_runtime": 5897.7907, |
|
"train_samples_per_second": 20.731, |
|
"train_steps_per_second": 0.162 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 954, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|