|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 50, |
|
"global_step": 436, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022935779816513763, |
|
"grad_norm": 1.6432102724209243, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"logits/chosen": -2.619717597961426, |
|
"logits/rejected": -2.552694320678711, |
|
"logps/chosen": -265.4180908203125, |
|
"logps/rejected": -236.1053924560547, |
|
"loss": 0.0154, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": 0.00027717588818632066, |
|
"rewards/margins": 3.1426665373146534e-05, |
|
"rewards/rejected": 0.0002457492519170046, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.045871559633027525, |
|
"grad_norm": 1.5125635433247804, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits/chosen": -2.6578421592712402, |
|
"logits/rejected": -2.5760762691497803, |
|
"logps/chosen": -298.83837890625, |
|
"logps/rejected": -274.30596923828125, |
|
"loss": 0.0155, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.0005307936808094382, |
|
"rewards/margins": 0.0014294281136244535, |
|
"rewards/rejected": -0.0019602221436798573, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06880733944954129, |
|
"grad_norm": 1.3722478894206875, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"logits/chosen": -2.6761069297790527, |
|
"logits/rejected": -2.602224826812744, |
|
"logps/chosen": -290.43157958984375, |
|
"logps/rejected": -234.3854522705078, |
|
"loss": 0.0146, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.00901214499026537, |
|
"rewards/margins": 0.013047738000750542, |
|
"rewards/rejected": -0.004035593010485172, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09174311926605505, |
|
"grad_norm": 1.2692399849923888, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -2.6601767539978027, |
|
"logits/rejected": -2.610305070877075, |
|
"logps/chosen": -280.9813537597656, |
|
"logps/rejected": -267.60333251953125, |
|
"loss": 0.013, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.04187846928834915, |
|
"rewards/margins": 0.0399814248085022, |
|
"rewards/rejected": 0.0018970475066453218, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11467889908256881, |
|
"grad_norm": 1.4063277329047579, |
|
"learning_rate": 4.997110275491701e-07, |
|
"logits/chosen": -2.6256864070892334, |
|
"logits/rejected": -2.6204254627227783, |
|
"logps/chosen": -289.7392578125, |
|
"logps/rejected": -299.10064697265625, |
|
"loss": 0.012, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.06511950492858887, |
|
"rewards/margins": 0.06428412348031998, |
|
"rewards/rejected": 0.0008353829616680741, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11467889908256881, |
|
"eval_logits/chosen": -2.578613519668579, |
|
"eval_logits/rejected": -2.502546787261963, |
|
"eval_logps/chosen": -277.33428955078125, |
|
"eval_logps/rejected": -247.25254821777344, |
|
"eval_loss": 0.01130068488419056, |
|
"eval_rewards/accuracies": 0.6982758641242981, |
|
"eval_rewards/chosen": 0.0775580108165741, |
|
"eval_rewards/margins": 0.0848592072725296, |
|
"eval_rewards/rejected": -0.007301207631826401, |
|
"eval_runtime": 93.3854, |
|
"eval_samples_per_second": 19.468, |
|
"eval_steps_per_second": 0.311, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13761467889908258, |
|
"grad_norm": 1.422544468484653, |
|
"learning_rate": 4.979475034558115e-07, |
|
"logits/chosen": -2.580706834793091, |
|
"logits/rejected": -2.525763750076294, |
|
"logps/chosen": -284.323486328125, |
|
"logps/rejected": -259.6044006347656, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.05161212012171745, |
|
"rewards/margins": 0.07209066301584244, |
|
"rewards/rejected": -0.020478537306189537, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16055045871559634, |
|
"grad_norm": 1.4554279086187263, |
|
"learning_rate": 4.945923025551788e-07, |
|
"logits/chosen": -2.5086066722869873, |
|
"logits/rejected": -2.471884250640869, |
|
"logps/chosen": -319.5674743652344, |
|
"logps/rejected": -265.2959899902344, |
|
"loss": 0.0115, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.08003517985343933, |
|
"rewards/margins": 0.11891887336969376, |
|
"rewards/rejected": -0.038883693516254425, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1834862385321101, |
|
"grad_norm": 1.3828352586845378, |
|
"learning_rate": 4.896669632591651e-07, |
|
"logits/chosen": -2.480229616165161, |
|
"logits/rejected": -2.3888580799102783, |
|
"logps/chosen": -279.42529296875, |
|
"logps/rejected": -256.35528564453125, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.06424586474895477, |
|
"rewards/margins": 0.10123773664236069, |
|
"rewards/rejected": -0.036991871893405914, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.20642201834862386, |
|
"grad_norm": 1.341734639019618, |
|
"learning_rate": 4.832031033425662e-07, |
|
"logits/chosen": -2.430302381515503, |
|
"logits/rejected": -2.3838858604431152, |
|
"logps/chosen": -280.6355895996094, |
|
"logps/rejected": -252.35107421875, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.06957819312810898, |
|
"rewards/margins": 0.12507636845111847, |
|
"rewards/rejected": -0.0554981604218483, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22935779816513763, |
|
"grad_norm": 1.503281183515388, |
|
"learning_rate": 4.752422169756047e-07, |
|
"logits/chosen": -2.392822265625, |
|
"logits/rejected": -2.337982654571533, |
|
"logps/chosen": -269.06829833984375, |
|
"logps/rejected": -280.2345886230469, |
|
"loss": 0.0111, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.033564209938049316, |
|
"rewards/margins": 0.11662591993808746, |
|
"rewards/rejected": -0.08306171000003815, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22935779816513763, |
|
"eval_logits/chosen": -2.445643424987793, |
|
"eval_logits/rejected": -2.345205545425415, |
|
"eval_logps/chosen": -281.08892822265625, |
|
"eval_logps/rejected": -254.68817138671875, |
|
"eval_loss": 0.01000975538045168, |
|
"eval_rewards/accuracies": 0.7112069129943848, |
|
"eval_rewards/chosen": 0.04001150280237198, |
|
"eval_rewards/margins": 0.1216687560081482, |
|
"eval_rewards/rejected": -0.08165725320577621, |
|
"eval_runtime": 92.3022, |
|
"eval_samples_per_second": 19.696, |
|
"eval_steps_per_second": 0.314, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25229357798165136, |
|
"grad_norm": 2.297190949384611, |
|
"learning_rate": 4.658354083558188e-07, |
|
"logits/chosen": -2.466749668121338, |
|
"logits/rejected": -2.361680030822754, |
|
"logps/chosen": -263.682373046875, |
|
"logps/rejected": -241.5611572265625, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.015057370066642761, |
|
"rewards/margins": 0.09333699941635132, |
|
"rewards/rejected": -0.07827961444854736, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27522935779816515, |
|
"grad_norm": 1.422856756971901, |
|
"learning_rate": 4.550430636492389e-07, |
|
"logits/chosen": -2.3867597579956055, |
|
"logits/rejected": -2.3550314903259277, |
|
"logps/chosen": -275.4966125488281, |
|
"logps/rejected": -260.20843505859375, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0032271749805659056, |
|
"rewards/margins": 0.09067237377166748, |
|
"rewards/rejected": -0.08744519203901291, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2981651376146789, |
|
"grad_norm": 2.508161764096464, |
|
"learning_rate": 4.429344633468004e-07, |
|
"logits/chosen": -2.4056849479675293, |
|
"logits/rejected": -2.361074686050415, |
|
"logps/chosen": -254.7312469482422, |
|
"logps/rejected": -252.63455200195312, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.014488674700260162, |
|
"rewards/margins": 0.11470804363489151, |
|
"rewards/rejected": -0.10021935403347015, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3211009174311927, |
|
"grad_norm": 3.762306745768528, |
|
"learning_rate": 4.2958733752443187e-07, |
|
"logits/chosen": -2.412415027618408, |
|
"logits/rejected": -2.3416905403137207, |
|
"logps/chosen": -273.1282043457031, |
|
"logps/rejected": -231.5812225341797, |
|
"loss": 0.0122, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.03056950867176056, |
|
"rewards/margins": 0.10341660678386688, |
|
"rewards/rejected": -0.07284711301326752, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3440366972477064, |
|
"grad_norm": 1.323990928834917, |
|
"learning_rate": 4.150873668617898e-07, |
|
"logits/chosen": -2.454942226409912, |
|
"logits/rejected": -2.3723654747009277, |
|
"logps/chosen": -268.99053955078125, |
|
"logps/rejected": -247.30032348632812, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.02446950413286686, |
|
"rewards/margins": 0.11665806919336319, |
|
"rewards/rejected": -0.1411275714635849, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3440366972477064, |
|
"eval_logits/chosen": -2.4970576763153076, |
|
"eval_logits/rejected": -2.400646924972534, |
|
"eval_logps/chosen": -286.011474609375, |
|
"eval_logps/rejected": -260.7337646484375, |
|
"eval_loss": 0.009798681363463402, |
|
"eval_rewards/accuracies": 0.7284482717514038, |
|
"eval_rewards/chosen": -0.009214152581989765, |
|
"eval_rewards/margins": 0.13289883732795715, |
|
"eval_rewards/rejected": -0.14211300015449524, |
|
"eval_runtime": 93.2661, |
|
"eval_samples_per_second": 19.493, |
|
"eval_steps_per_second": 0.311, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3669724770642202, |
|
"grad_norm": 2.7039803270775495, |
|
"learning_rate": 3.9952763262280397e-07, |
|
"logits/chosen": -2.460155963897705, |
|
"logits/rejected": -2.39618182182312, |
|
"logps/chosen": -293.5860595703125, |
|
"logps/rejected": -295.9964294433594, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.02965056337416172, |
|
"rewards/margins": 0.1299286037683487, |
|
"rewards/rejected": -0.10027804225683212, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.38990825688073394, |
|
"grad_norm": 1.3210709780236793, |
|
"learning_rate": 3.8300801912883414e-07, |
|
"logits/chosen": -2.503695249557495, |
|
"logits/rejected": -2.4564812183380127, |
|
"logps/chosen": -267.4649353027344, |
|
"logps/rejected": -279.4906921386719, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.041359588503837585, |
|
"rewards/margins": 0.13557755947113037, |
|
"rewards/rejected": -0.09421796351671219, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.41284403669724773, |
|
"grad_norm": 1.4526636355174283, |
|
"learning_rate": 3.6563457256020884e-07, |
|
"logits/chosen": -2.5071005821228027, |
|
"logits/rejected": -2.4085752964019775, |
|
"logps/chosen": -306.22930908203125, |
|
"logps/rejected": -253.84475708007812, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.028713583946228027, |
|
"rewards/margins": 0.12478481233119965, |
|
"rewards/rejected": -0.15349839627742767, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.43577981651376146, |
|
"grad_norm": 4.8242221106390115, |
|
"learning_rate": 3.475188202022617e-07, |
|
"logits/chosen": -2.414685010910034, |
|
"logits/rejected": -2.3840391635894775, |
|
"logps/chosen": -256.085205078125, |
|
"logps/rejected": -276.09039306640625, |
|
"loss": 0.0106, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.020293032750487328, |
|
"rewards/margins": 0.13775238394737244, |
|
"rewards/rejected": -0.11745934188365936, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.45871559633027525, |
|
"grad_norm": 1.2637397948323732, |
|
"learning_rate": 3.287770545059052e-07, |
|
"logits/chosen": -2.5293562412261963, |
|
"logits/rejected": -2.431912899017334, |
|
"logps/chosen": -277.83734130859375, |
|
"logps/rejected": -256.05206298828125, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.004443052224814892, |
|
"rewards/margins": 0.12640419602394104, |
|
"rewards/rejected": -0.12196113169193268, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45871559633027525, |
|
"eval_logits/chosen": -2.5115060806274414, |
|
"eval_logits/rejected": -2.4205849170684814, |
|
"eval_logps/chosen": -282.7939147949219, |
|
"eval_logps/rejected": -258.3851013183594, |
|
"eval_loss": 0.00931267999112606, |
|
"eval_rewards/accuracies": 0.7887930870056152, |
|
"eval_rewards/chosen": 0.022961637005209923, |
|
"eval_rewards/margins": 0.14158831536769867, |
|
"eval_rewards/rejected": -0.1186266764998436, |
|
"eval_runtime": 92.3817, |
|
"eval_samples_per_second": 19.679, |
|
"eval_steps_per_second": 0.314, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.481651376146789, |
|
"grad_norm": 1.093362739669567, |
|
"learning_rate": 3.0952958655864954e-07, |
|
"logits/chosen": -2.4653537273406982, |
|
"logits/rejected": -2.448239803314209, |
|
"logps/chosen": -271.6222229003906, |
|
"logps/rejected": -265.7300109863281, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.01682201772928238, |
|
"rewards/margins": 0.13070014119148254, |
|
"rewards/rejected": -0.11387811601161957, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5045871559633027, |
|
"grad_norm": 1.5040749987763142, |
|
"learning_rate": 2.898999737583448e-07, |
|
"logits/chosen": -2.50272798538208, |
|
"logits/rejected": -2.4043869972229004, |
|
"logps/chosen": -322.4222717285156, |
|
"logps/rejected": -300.2145690917969, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.0025803411845117807, |
|
"rewards/margins": 0.15502406656742096, |
|
"rewards/rejected": -0.15244372189044952, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5275229357798165, |
|
"grad_norm": 1.2805773201998596, |
|
"learning_rate": 2.7001422664752333e-07, |
|
"logits/chosen": -2.396461009979248, |
|
"logits/rejected": -2.344364881515503, |
|
"logps/chosen": -268.88946533203125, |
|
"logps/rejected": -278.57122802734375, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.009459306485950947, |
|
"rewards/margins": 0.12005350738763809, |
|
"rewards/rejected": -0.12951281666755676, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5504587155963303, |
|
"grad_norm": 1.346452258119606, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -2.471860885620117, |
|
"logits/rejected": -2.379460573196411, |
|
"logps/chosen": -288.55877685546875, |
|
"logps/rejected": -274.3955078125, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.010269422084093094, |
|
"rewards/margins": 0.12018589675426483, |
|
"rewards/rejected": -0.13045531511306763, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.573394495412844, |
|
"grad_norm": 1.1636311800865655, |
|
"learning_rate": 2.2998577335247667e-07, |
|
"logits/chosen": -2.5101351737976074, |
|
"logits/rejected": -2.4231343269348145, |
|
"logps/chosen": -303.5893249511719, |
|
"logps/rejected": -274.129638671875, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.0020445596892386675, |
|
"rewards/margins": 0.13890263438224792, |
|
"rewards/rejected": -0.13685807585716248, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.573394495412844, |
|
"eval_logits/chosen": -2.459127187728882, |
|
"eval_logits/rejected": -2.365269899368286, |
|
"eval_logps/chosen": -286.2547912597656, |
|
"eval_logps/rejected": -263.3385925292969, |
|
"eval_loss": 0.008932252414524555, |
|
"eval_rewards/accuracies": 0.7844827771186829, |
|
"eval_rewards/chosen": -0.011646694503724575, |
|
"eval_rewards/margins": 0.1565149575471878, |
|
"eval_rewards/rejected": -0.16816167533397675, |
|
"eval_runtime": 93.0437, |
|
"eval_samples_per_second": 19.539, |
|
"eval_steps_per_second": 0.312, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5963302752293578, |
|
"grad_norm": 1.1997721634582499, |
|
"learning_rate": 2.1010002624165524e-07, |
|
"logits/chosen": -2.4458436965942383, |
|
"logits/rejected": -2.4075264930725098, |
|
"logps/chosen": -273.95843505859375, |
|
"logps/rejected": -292.99114990234375, |
|
"loss": 0.0097, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.032438840717077255, |
|
"rewards/margins": 0.1535351574420929, |
|
"rewards/rejected": -0.18597400188446045, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6192660550458715, |
|
"grad_norm": 1.2414114105136553, |
|
"learning_rate": 1.9047041344135043e-07, |
|
"logits/chosen": -2.437638759613037, |
|
"logits/rejected": -2.409536600112915, |
|
"logps/chosen": -270.0658264160156, |
|
"logps/rejected": -270.24798583984375, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.04003521427512169, |
|
"rewards/margins": 0.14397411048412323, |
|
"rewards/rejected": -0.1840093433856964, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6422018348623854, |
|
"grad_norm": 1.288255010907884, |
|
"learning_rate": 1.7122294549409482e-07, |
|
"logits/chosen": -2.5124118328094482, |
|
"logits/rejected": -2.4503026008605957, |
|
"logps/chosen": -273.12841796875, |
|
"logps/rejected": -280.5234375, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.0012958184815943241, |
|
"rewards/margins": 0.165980726480484, |
|
"rewards/rejected": -0.167276531457901, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6651376146788991, |
|
"grad_norm": 1.3451525886655082, |
|
"learning_rate": 1.524811797977383e-07, |
|
"logits/chosen": -2.494507312774658, |
|
"logits/rejected": -2.4191977977752686, |
|
"logps/chosen": -287.3388366699219, |
|
"logps/rejected": -267.7834167480469, |
|
"loss": 0.009, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.003920386545360088, |
|
"rewards/margins": 0.1425868272781372, |
|
"rewards/rejected": -0.13866642117500305, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6880733944954128, |
|
"grad_norm": 1.3558837079563066, |
|
"learning_rate": 1.3436542743979125e-07, |
|
"logits/chosen": -2.498835563659668, |
|
"logits/rejected": -2.4762415885925293, |
|
"logps/chosen": -306.97705078125, |
|
"logps/rejected": -270.83367919921875, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.010277172550559044, |
|
"rewards/margins": 0.10946089029312134, |
|
"rewards/rejected": -0.11973806470632553, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6880733944954128, |
|
"eval_logits/chosen": -2.4814062118530273, |
|
"eval_logits/rejected": -2.38907790184021, |
|
"eval_logps/chosen": -285.9173278808594, |
|
"eval_logps/rejected": -262.4115295410156, |
|
"eval_loss": 0.008779831230640411, |
|
"eval_rewards/accuracies": 0.7844827771186829, |
|
"eval_rewards/chosen": -0.008272184059023857, |
|
"eval_rewards/margins": 0.15061868727207184, |
|
"eval_rewards/rejected": -0.15889087319374084, |
|
"eval_runtime": 92.1897, |
|
"eval_samples_per_second": 19.72, |
|
"eval_steps_per_second": 0.315, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7110091743119266, |
|
"grad_norm": 1.3381129170655774, |
|
"learning_rate": 1.1699198087116588e-07, |
|
"logits/chosen": -2.515507221221924, |
|
"logits/rejected": -2.4294919967651367, |
|
"logps/chosen": -280.1512756347656, |
|
"logps/rejected": -280.0994873046875, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.02186095342040062, |
|
"rewards/margins": 0.11463706195354462, |
|
"rewards/rejected": -0.13649800419807434, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7339449541284404, |
|
"grad_norm": 1.338991121862378, |
|
"learning_rate": 1.00472367377196e-07, |
|
"logits/chosen": -2.451615571975708, |
|
"logits/rejected": -2.404048442840576, |
|
"logps/chosen": -275.2835388183594, |
|
"logps/rejected": -251.0447998046875, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.010515814647078514, |
|
"rewards/margins": 0.15502998232841492, |
|
"rewards/rejected": -0.16554580628871918, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7568807339449541, |
|
"grad_norm": 1.3896883263831044, |
|
"learning_rate": 8.49126331382102e-08, |
|
"logits/chosen": -2.4412286281585693, |
|
"logits/rejected": -2.392331600189209, |
|
"logps/chosen": -275.5787353515625, |
|
"logps/rejected": -260.4464111328125, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.010565539821982384, |
|
"rewards/margins": 0.12289756536483765, |
|
"rewards/rejected": -0.13346309959888458, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7798165137614679, |
|
"grad_norm": 1.360283909503132, |
|
"learning_rate": 7.041266247556812e-08, |
|
"logits/chosen": -2.525031566619873, |
|
"logits/rejected": -2.481128215789795, |
|
"logps/chosen": -289.6778564453125, |
|
"logps/rejected": -272.6752014160156, |
|
"loss": 0.009, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.016852576285600662, |
|
"rewards/margins": 0.11005936563014984, |
|
"rewards/rejected": -0.1269119530916214, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8027522935779816, |
|
"grad_norm": 1.3078891508366295, |
|
"learning_rate": 5.706553665319955e-08, |
|
"logits/chosen": -2.501155138015747, |
|
"logits/rejected": -2.399773597717285, |
|
"logps/chosen": -277.02081298828125, |
|
"logps/rejected": -253.2749786376953, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.025297433137893677, |
|
"rewards/margins": 0.13083800673484802, |
|
"rewards/rejected": -0.1561354398727417, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8027522935779816, |
|
"eval_logits/chosen": -2.4845948219299316, |
|
"eval_logits/rejected": -2.390641927719116, |
|
"eval_logps/chosen": -285.5014343261719, |
|
"eval_logps/rejected": -262.4816589355469, |
|
"eval_loss": 0.008705741725862026, |
|
"eval_rewards/accuracies": 0.7801724076271057, |
|
"eval_rewards/chosen": -0.004113705363124609, |
|
"eval_rewards/margins": 0.15547847747802734, |
|
"eval_rewards/rejected": -0.15959219634532928, |
|
"eval_runtime": 93.024, |
|
"eval_samples_per_second": 19.543, |
|
"eval_steps_per_second": 0.312, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8256880733944955, |
|
"grad_norm": 1.1167320443463302, |
|
"learning_rate": 4.4956936350761005e-08, |
|
"logits/chosen": -2.4814512729644775, |
|
"logits/rejected": -2.4325201511383057, |
|
"logps/chosen": -252.41159057617188, |
|
"logps/rejected": -271.5174865722656, |
|
"loss": 0.0088, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.0009152599377557635, |
|
"rewards/margins": 0.128888800740242, |
|
"rewards/rejected": -0.12980404496192932, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8486238532110092, |
|
"grad_norm": 1.2531440295822198, |
|
"learning_rate": 3.416459164418123e-08, |
|
"logits/chosen": -2.5166125297546387, |
|
"logits/rejected": -2.4559273719787598, |
|
"logps/chosen": -299.89935302734375, |
|
"logps/rejected": -276.8605041503906, |
|
"loss": 0.0087, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.003148370888084173, |
|
"rewards/margins": 0.13971056044101715, |
|
"rewards/rejected": -0.14285892248153687, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8715596330275229, |
|
"grad_norm": 1.494922388543688, |
|
"learning_rate": 2.475778302439524e-08, |
|
"logits/chosen": -2.4861674308776855, |
|
"logits/rejected": -2.4356298446655273, |
|
"logps/chosen": -296.1441650390625, |
|
"logps/rejected": -272.86956787109375, |
|
"loss": 0.009, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.0023386895190924406, |
|
"rewards/margins": 0.15447205305099487, |
|
"rewards/rejected": -0.15213337540626526, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8944954128440367, |
|
"grad_norm": 1.505735003806321, |
|
"learning_rate": 1.6796896657433805e-08, |
|
"logits/chosen": -2.4833850860595703, |
|
"logits/rejected": -2.381284236907959, |
|
"logps/chosen": -252.93844604492188, |
|
"logps/rejected": -242.3744659423828, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.009809909388422966, |
|
"rewards/margins": 0.1208723783493042, |
|
"rewards/rejected": -0.13068227469921112, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"grad_norm": 1.3636138422869082, |
|
"learning_rate": 1.0333036740834855e-08, |
|
"logits/chosen": -2.41188383102417, |
|
"logits/rejected": -2.378366231918335, |
|
"logps/chosen": -223.8693389892578, |
|
"logps/rejected": -241.79000854492188, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.006902736611664295, |
|
"rewards/margins": 0.13264694809913635, |
|
"rewards/rejected": -0.12574420869350433, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"eval_logits/chosen": -2.4834423065185547, |
|
"eval_logits/rejected": -2.389298439025879, |
|
"eval_logps/chosen": -285.1916809082031, |
|
"eval_logps/rejected": -261.8979797363281, |
|
"eval_loss": 0.008685999549925327, |
|
"eval_rewards/accuracies": 0.7758620977401733, |
|
"eval_rewards/chosen": -0.0010154928313568234, |
|
"eval_rewards/margins": 0.15273970365524292, |
|
"eval_rewards/rejected": -0.15375518798828125, |
|
"eval_runtime": 92.569, |
|
"eval_samples_per_second": 19.639, |
|
"eval_steps_per_second": 0.313, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9403669724770642, |
|
"grad_norm": 1.3075058243660094, |
|
"learning_rate": 5.4076974448211685e-09, |
|
"logits/chosen": -2.419281005859375, |
|
"logits/rejected": -2.368946075439453, |
|
"logps/chosen": -271.1397705078125, |
|
"logps/rejected": -253.4750213623047, |
|
"loss": 0.009, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.0018791112815961242, |
|
"rewards/margins": 0.15240374207496643, |
|
"rewards/rejected": -0.1542828381061554, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.963302752293578, |
|
"grad_norm": 2.8175803183046364, |
|
"learning_rate": 2.052496544188487e-09, |
|
"logits/chosen": -2.462890148162842, |
|
"logits/rejected": -2.377373218536377, |
|
"logps/chosen": -260.7967834472656, |
|
"logps/rejected": -261.8900451660156, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.013578072190284729, |
|
"rewards/margins": 0.14654859900474548, |
|
"rewards/rejected": -0.16012665629386902, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9862385321100917, |
|
"grad_norm": 1.3745801458045255, |
|
"learning_rate": 2.889724508297886e-10, |
|
"logits/chosen": -2.4770209789276123, |
|
"logits/rejected": -2.3631789684295654, |
|
"logps/chosen": -303.4706115722656, |
|
"logps/rejected": -256.81878662109375, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.0029516436625272036, |
|
"rewards/margins": 0.12487111985683441, |
|
"rewards/rejected": -0.12782277166843414, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 436, |
|
"total_flos": 0.0, |
|
"train_loss": 0.010263856175705927, |
|
"train_runtime": 11602.0724, |
|
"train_samples_per_second": 4.806, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 436, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|