{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 1083, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "grad_norm": 129536.0, "learning_rate": 5.000000000000001e-07, "log_odds_chosen": 0.2670513093471527, "log_odds_ratio": -0.5927332043647766, "logits/chosen": 4.400671482086182, "logits/rejected": 4.455313682556152, "logps/chosen": -0.7621899843215942, "logps/rejected": -0.9112717509269714, "loss": 1.7067, "nll_loss": 1.2433254718780518, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03810950368642807, "rewards/margins": 0.007454085163772106, "rewards/rejected": -0.04556358978152275, "step": 10 }, { "epoch": 0.06, "grad_norm": 372736.0, "learning_rate": 1.0000000000000002e-06, "log_odds_chosen": 0.6244336366653442, "log_odds_ratio": -0.49496006965637207, "logits/chosen": 4.977335453033447, "logits/rejected": 4.971369743347168, "logps/chosen": -0.8002179861068726, "logps/rejected": -1.1836291551589966, "loss": 1.9551, "nll_loss": 1.6530841588974, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.040010906755924225, "rewards/margins": 0.019170550629496574, "rewards/rejected": -0.05918145924806595, "step": 20 }, { "epoch": 0.08, "grad_norm": 198656.0, "learning_rate": 1.5e-06, "log_odds_chosen": 0.22288911044597626, "log_odds_ratio": -0.6639400720596313, "logits/chosen": 4.187830924987793, "logits/rejected": 4.613868713378906, "logps/chosen": -0.8369601368904114, "logps/rejected": -0.9581049680709839, "loss": 2.0627, "nll_loss": 2.1578612327575684, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04184800758957863, "rewards/margins": 0.006057241465896368, "rewards/rejected": -0.04790525510907173, "step": 30 }, { "epoch": 0.11, "grad_norm": 9792.0, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": 0.2988000512123108, "log_odds_ratio": -0.5841063261032104, "logits/chosen": 3.96022367477417, "logits/rejected": 5.4938645362854, "logps/chosen": -0.9292069673538208, "logps/rejected": -1.1179070472717285, "loss": 1.7005, "nll_loss": 2.407797336578369, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0464603491127491, "rewards/margins": 0.00943500641733408, "rewards/rejected": -0.0558953583240509, "step": 40 }, { "epoch": 0.14, "grad_norm": 30592.0, "learning_rate": 2.5e-06, "log_odds_chosen": 0.17792589962482452, "log_odds_ratio": -0.6552326083183289, "logits/chosen": 3.499319076538086, "logits/rejected": 3.937030792236328, "logps/chosen": -0.9279796481132507, "logps/rejected": -1.08034348487854, "loss": 1.4593, "nll_loss": 1.7558667659759521, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.046398986130952835, "rewards/margins": 0.007618183735758066, "rewards/rejected": -0.054017167538404465, "step": 50 }, { "epoch": 0.17, "grad_norm": 118272.0, "learning_rate": 3e-06, "log_odds_chosen": 0.4226877689361572, "log_odds_ratio": -0.6192396283149719, "logits/chosen": 3.7764244079589844, "logits/rejected": 3.936429262161255, "logps/chosen": -1.0400720834732056, "logps/rejected": -1.394266963005066, "loss": 1.833, "nll_loss": 2.083087921142578, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05200360342860222, "rewards/margins": 0.017709745094180107, "rewards/rejected": -0.06971334666013718, "step": 60 }, { "epoch": 0.19, "grad_norm": 21760.0, "learning_rate": 3.5e-06, "log_odds_chosen": -0.028518375009298325, "log_odds_ratio": -0.7405228614807129, "logits/chosen": 3.4970717430114746, "logits/rejected": 3.979139804840088, "logps/chosen": -0.9001835584640503, "logps/rejected": -0.8650320172309875, "loss": 1.89, "nll_loss": 2.863569498062134, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.045009177178144455, "rewards/margins": -0.0017575754318386316, "rewards/rejected": -0.043251603841781616, "step": 70 }, { "epoch": 0.22, "grad_norm": 18432.0, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": 0.20453815162181854, "log_odds_ratio": -0.674200713634491, "logits/chosen": 5.348904132843018, "logits/rejected": 5.157883167266846, "logps/chosen": -0.9463122487068176, "logps/rejected": -1.1002007722854614, "loss": 1.6847, "nll_loss": 1.1438477039337158, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04731561243534088, "rewards/margins": 0.007694427855312824, "rewards/rejected": -0.05501004308462143, "step": 80 }, { "epoch": 0.25, "grad_norm": 40448.0, "learning_rate": 4.5e-06, "log_odds_chosen": 0.04343784973025322, "log_odds_ratio": -0.6950091123580933, "logits/chosen": 3.8532798290252686, "logits/rejected": 4.101541519165039, "logps/chosen": -0.8504306077957153, "logps/rejected": -0.8647142648696899, "loss": 2.0381, "nll_loss": 1.738358736038208, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.042521532624959946, "rewards/margins": 0.0007141813402995467, "rewards/rejected": -0.04323571175336838, "step": 90 }, { "epoch": 0.28, "grad_norm": 26880.0, "learning_rate": 5e-06, "log_odds_chosen": 0.0891367569565773, "log_odds_ratio": -0.6793378591537476, "logits/chosen": 4.815893650054932, "logits/rejected": 4.600794315338135, "logps/chosen": -0.8133799433708191, "logps/rejected": -0.8459997177124023, "loss": 1.6753, "nll_loss": 0.9453862309455872, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.040668997913599014, "rewards/margins": 0.0016309904167428613, "rewards/rejected": -0.042299989610910416, "step": 100 }, { "epoch": 0.3, "grad_norm": 398.0, "learning_rate": 4.767312946227961e-06, "log_odds_chosen": 0.2612980902194977, "log_odds_ratio": -0.6079257130622864, "logits/chosen": 4.852027416229248, "logits/rejected": 5.277764320373535, "logps/chosen": -0.7108517289161682, "logps/rejected": -0.8703864812850952, "loss": 1.084, "nll_loss": 0.8523097038269043, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03554258495569229, "rewards/margins": 0.007976744323968887, "rewards/rejected": -0.04351933300495148, "step": 110 }, { "epoch": 0.33, "grad_norm": 3.40625, "learning_rate": 4.564354645876385e-06, "log_odds_chosen": 0.6404939889907837, "log_odds_ratio": -0.44226688146591187, "logits/chosen": 4.647592544555664, "logits/rejected": 4.496420860290527, "logps/chosen": -0.6522791981697083, "logps/rejected": -1.016757607460022, "loss": 0.7584, "nll_loss": 0.7253808975219727, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.03261395916342735, "rewards/margins": 0.018223917111754417, "rewards/rejected": -0.05083787441253662, "step": 120 }, { "epoch": 0.36, "grad_norm": 2.484375, "learning_rate": 4.385290096535147e-06, "log_odds_chosen": 0.40130940079689026, "log_odds_ratio": -0.5459367632865906, "logits/chosen": 3.2656028270721436, "logits/rejected": 4.282083034515381, "logps/chosen": -0.7449339032173157, "logps/rejected": -1.005130410194397, "loss": 0.7507, "nll_loss": 0.6981132626533508, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.037246692925691605, "rewards/margins": 0.013009825721383095, "rewards/rejected": -0.05025652050971985, "step": 130 }, { "epoch": 0.39, "grad_norm": 2.453125, "learning_rate": 4.2257712736425835e-06, "log_odds_chosen": 0.167119100689888, "log_odds_ratio": -0.781502366065979, "logits/chosen": 4.5941596031188965, "logits/rejected": 3.490717649459839, "logps/chosen": -0.849592387676239, "logps/rejected": -0.8088783025741577, "loss": 0.7401, "nll_loss": 0.8015066981315613, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04247962310910225, "rewards/margins": -0.0020357039757072926, "rewards/rejected": -0.040443919599056244, "step": 140 }, { "epoch": 0.42, "grad_norm": 2.25, "learning_rate": 4.082482904638631e-06, "log_odds_chosen": 0.01697809062898159, "log_odds_ratio": -0.7280154228210449, "logits/chosen": 3.6419081687927246, "logits/rejected": 4.471678733825684, "logps/chosen": -0.8600558042526245, "logps/rejected": -0.8721025586128235, "loss": 0.7625, "nll_loss": 0.6598131656646729, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.043002791702747345, "rewards/margins": 0.0006023403257131577, "rewards/rejected": -0.04360513016581535, "step": 150 }, { "epoch": 0.44, "grad_norm": 1.703125, "learning_rate": 3.952847075210474e-06, "log_odds_chosen": 0.21897530555725098, "log_odds_ratio": -0.6258933544158936, "logits/chosen": 5.113783836364746, "logits/rejected": 5.189225196838379, "logps/chosen": -0.9097244143486023, "logps/rejected": -1.0366160869598389, "loss": 0.6979, "nll_loss": 0.8092159032821655, "rewards/accuracies": 0.5, "rewards/chosen": -0.04548622667789459, "rewards/margins": 0.0063445777632296085, "rewards/rejected": -0.05183080583810806, "step": 160 }, { "epoch": 0.47, "grad_norm": 1.875, "learning_rate": 3.834824944236852e-06, "log_odds_chosen": 0.354801744222641, "log_odds_ratio": -0.5426136255264282, "logits/chosen": 3.2793831825256348, "logits/rejected": 4.266524314880371, "logps/chosen": -0.6823564171791077, "logps/rejected": -0.8897114992141724, "loss": 0.7135, "nll_loss": 0.6986647844314575, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.03411782532930374, "rewards/margins": 0.01036775577813387, "rewards/rejected": -0.04448557645082474, "step": 170 }, { "epoch": 0.5, "grad_norm": 1.734375, "learning_rate": 3.72677996249965e-06, "log_odds_chosen": 0.6848920583724976, "log_odds_ratio": -0.48101407289505005, "logits/chosen": 3.578697681427002, "logits/rejected": 3.992805004119873, "logps/chosen": -0.8704670667648315, "logps/rejected": -1.3336102962493896, "loss": 0.7157, "nll_loss": 0.7008849382400513, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04352335259318352, "rewards/margins": 0.023157157003879547, "rewards/rejected": -0.06668050587177277, "step": 180 }, { "epoch": 0.53, "grad_norm": 2.21875, "learning_rate": 3.6273812505500587e-06, "log_odds_chosen": 0.5638074278831482, "log_odds_ratio": -0.5914163589477539, "logits/chosen": 3.691683530807495, "logits/rejected": 5.060125827789307, "logps/chosen": -0.7228246927261353, "logps/rejected": -0.9139431118965149, "loss": 0.7447, "nll_loss": 0.6561239957809448, "rewards/accuracies": 0.5, "rewards/chosen": -0.03614123538136482, "rewards/margins": 0.009555922821164131, "rewards/rejected": -0.0456971600651741, "step": 190 }, { "epoch": 0.55, "grad_norm": 1.890625, "learning_rate": 3.5355339059327378e-06, "log_odds_chosen": 0.4812771677970886, "log_odds_ratio": -0.5113085508346558, "logits/chosen": 3.8571255207061768, "logits/rejected": 4.72374153137207, "logps/chosen": -0.7712750434875488, "logps/rejected": -0.9863548278808594, "loss": 0.7045, "nll_loss": 0.6923601031303406, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03856375068426132, "rewards/margins": 0.010753988288342953, "rewards/rejected": -0.04931773990392685, "step": 200 }, { "epoch": 0.58, "grad_norm": 2.265625, "learning_rate": 3.450327796711771e-06, "log_odds_chosen": 0.40674924850463867, "log_odds_ratio": -0.5564650297164917, "logits/chosen": 4.317850112915039, "logits/rejected": 5.421323299407959, "logps/chosen": -0.6844028234481812, "logps/rejected": -0.8700541257858276, "loss": 0.7064, "nll_loss": 0.6271991729736328, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.034220144152641296, "rewards/margins": 0.009282568469643593, "rewards/rejected": -0.04350270703434944, "step": 210 }, { "epoch": 0.61, "grad_norm": 2.140625, "learning_rate": 3.3709993123162106e-06, "log_odds_chosen": 0.5274091958999634, "log_odds_ratio": -0.4882450997829437, "logits/chosen": 4.161227703094482, "logits/rejected": 4.616461277008057, "logps/chosen": -0.7146799564361572, "logps/rejected": -1.0237388610839844, "loss": 0.6941, "nll_loss": 0.6821693181991577, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03573400154709816, "rewards/margins": 0.01545293815433979, "rewards/rejected": -0.0511869378387928, "step": 220 }, { "epoch": 0.64, "grad_norm": 1.8984375, "learning_rate": 3.296902366978936e-06, "log_odds_chosen": 0.7328251004219055, "log_odds_ratio": -0.5591307878494263, "logits/chosen": 1.1708077192306519, "logits/rejected": 4.127654075622559, "logps/chosen": -0.6000097990036011, "logps/rejected": -0.7907406091690063, "loss": 0.7189, "nll_loss": 0.5180870294570923, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.030000487342476845, "rewards/margins": 0.009536544792354107, "rewards/rejected": -0.039537034928798676, "step": 230 }, { "epoch": 0.66, "grad_norm": 2.390625, "learning_rate": 3.2274861218395142e-06, "log_odds_chosen": 0.30477339029312134, "log_odds_ratio": -0.6216728091239929, "logits/chosen": 4.052184104919434, "logits/rejected": 4.9365081787109375, "logps/chosen": -0.9283322095870972, "logps/rejected": -1.132279634475708, "loss": 0.7021, "nll_loss": 0.74329674243927, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04641660675406456, "rewards/margins": 0.010197381488978863, "rewards/rejected": -0.056613992899656296, "step": 240 }, { "epoch": 0.69, "grad_norm": 1.7890625, "learning_rate": 3.1622776601683796e-06, "log_odds_chosen": 0.1895931363105774, "log_odds_ratio": -0.6505658626556396, "logits/chosen": 4.295980930328369, "logits/rejected": 4.204621315002441, "logps/chosen": -0.8170398473739624, "logps/rejected": -0.962540328502655, "loss": 0.6787, "nll_loss": 0.7326347231864929, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04085199162364006, "rewards/margins": 0.007275022566318512, "rewards/rejected": -0.04812701791524887, "step": 250 }, { "epoch": 0.72, "grad_norm": 2.375, "learning_rate": 3.1008683647302113e-06, "log_odds_chosen": 0.643774688243866, "log_odds_ratio": -0.5065270066261292, "logits/chosen": 3.066134452819824, "logits/rejected": 5.140274524688721, "logps/chosen": -0.6760501861572266, "logps/rejected": -1.0565364360809326, "loss": 0.7083, "nll_loss": 0.5696229934692383, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03380250930786133, "rewards/margins": 0.019024310633540154, "rewards/rejected": -0.05282682180404663, "step": 260 }, { "epoch": 0.75, "grad_norm": 1.921875, "learning_rate": 3.0429030972509227e-06, "log_odds_chosen": 0.11027543246746063, "log_odds_ratio": -0.7136895656585693, "logits/chosen": 3.8440871238708496, "logits/rejected": 5.5886664390563965, "logps/chosen": -0.9002073407173157, "logps/rejected": -0.8725664019584656, "loss": 0.6887, "nll_loss": 0.6460383534431458, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.045010361820459366, "rewards/margins": -0.0013820428866893053, "rewards/rejected": -0.04362832009792328, "step": 270 }, { "epoch": 0.78, "grad_norm": 2.4375, "learning_rate": 2.988071523335984e-06, "log_odds_chosen": 0.19881311058998108, "log_odds_ratio": -0.645098865032196, "logits/chosen": 3.4119772911071777, "logits/rejected": 3.858569622039795, "logps/chosen": -0.6236559748649597, "logps/rejected": -0.6720820665359497, "loss": 0.6831, "nll_loss": 0.5412070155143738, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.031182799488306046, "rewards/margins": 0.0024213031865656376, "rewards/rejected": -0.033604104071855545, "step": 280 }, { "epoch": 0.8, "grad_norm": 1.796875, "learning_rate": 2.9361010975735177e-06, "log_odds_chosen": 0.8000429272651672, "log_odds_ratio": -0.5133927464485168, "logits/chosen": 3.1116268634796143, "logits/rejected": 4.1175031661987305, "logps/chosen": -0.6604128479957581, "logps/rejected": -0.9541538953781128, "loss": 0.7189, "nll_loss": 0.6099768877029419, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.033020637929439545, "rewards/margins": 0.014687063172459602, "rewards/rejected": -0.047707699239254, "step": 290 }, { "epoch": 0.83, "grad_norm": 2.140625, "learning_rate": 2.8867513459481293e-06, "log_odds_chosen": 0.27028822898864746, "log_odds_ratio": -0.5845714807510376, "logits/chosen": 4.801875114440918, "logits/rejected": 4.0431060791015625, "logps/chosen": -0.9752612113952637, "logps/rejected": -1.1325619220733643, "loss": 0.7067, "nll_loss": 0.8859332799911499, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.048763059079647064, "rewards/margins": 0.007865036837756634, "rewards/rejected": -0.05662809684872627, "step": 300 }, { "epoch": 0.86, "grad_norm": 2.203125, "learning_rate": 2.839809171235324e-06, "log_odds_chosen": 0.3040723502635956, "log_odds_ratio": -0.5971912145614624, "logits/chosen": 3.540631055831909, "logits/rejected": 4.819446563720703, "logps/chosen": -0.618737518787384, "logps/rejected": -0.8253359794616699, "loss": 0.7267, "nll_loss": 0.5355499982833862, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.030936872586607933, "rewards/margins": 0.01032992172986269, "rewards/rejected": -0.0412667952477932, "step": 310 }, { "epoch": 0.89, "grad_norm": 2.09375, "learning_rate": 2.7950849718747376e-06, "log_odds_chosen": 0.4747926592826843, "log_odds_ratio": -0.6426443457603455, "logits/chosen": 4.275177955627441, "logits/rejected": 5.392439365386963, "logps/chosen": -0.9479950070381165, "logps/rejected": -1.074181318283081, "loss": 0.7026, "nll_loss": 0.81135094165802, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.047399748116731644, "rewards/margins": 0.006309316959232092, "rewards/rejected": -0.053709059953689575, "step": 320 }, { "epoch": 0.91, "grad_norm": 2.046875, "learning_rate": 2.752409412815902e-06, "log_odds_chosen": 0.05597902089357376, "log_odds_ratio": -0.6852544546127319, "logits/chosen": 4.019442558288574, "logits/rejected": 4.838677406311035, "logps/chosen": -0.8511530756950378, "logps/rejected": -0.8828445672988892, "loss": 0.7044, "nll_loss": 0.694568932056427, "rewards/accuracies": 0.5, "rewards/chosen": -0.04255765676498413, "rewards/margins": 0.001584571204148233, "rewards/rejected": -0.0441422276198864, "step": 330 }, { "epoch": 0.94, "grad_norm": 2.078125, "learning_rate": 2.711630722733202e-06, "log_odds_chosen": 0.4387446343898773, "log_odds_ratio": -0.5828881859779358, "logits/chosen": 5.145287990570068, "logits/rejected": 4.682337760925293, "logps/chosen": -0.8366853594779968, "logps/rejected": -1.0714147090911865, "loss": 0.7106, "nll_loss": 0.7823752164840698, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0418342649936676, "rewards/margins": 0.01173648051917553, "rewards/rejected": -0.05357074737548828, "step": 340 }, { "epoch": 0.97, "grad_norm": 1.640625, "learning_rate": 2.6726124191242444e-06, "log_odds_chosen": 0.646438717842102, "log_odds_ratio": -0.4320458471775055, "logits/chosen": 3.6300628185272217, "logits/rejected": 4.811415195465088, "logps/chosen": -0.697738528251648, "logps/rejected": -1.0509469509124756, "loss": 0.7094, "nll_loss": 0.6207277178764343, "rewards/accuracies": 1.0, "rewards/chosen": -0.0348869264125824, "rewards/margins": 0.017660418525338173, "rewards/rejected": -0.05254734680056572, "step": 350 }, { "epoch": 1.0, "grad_norm": 1.859375, "learning_rate": 2.6352313834736496e-06, "log_odds_chosen": 0.2544732391834259, "log_odds_ratio": -0.6412532925605774, "logits/chosen": 3.9895572662353516, "logits/rejected": 4.779620170593262, "logps/chosen": -0.9361473321914673, "logps/rejected": -1.1174229383468628, "loss": 0.7128, "nll_loss": 0.7654691934585571, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04680735990405083, "rewards/margins": 0.009063784033060074, "rewards/rejected": -0.0558711476624012, "step": 360 }, { "epoch": 1.02, "grad_norm": 2.0, "learning_rate": 2.599376224550182e-06, "log_odds_chosen": 0.40497034788131714, "log_odds_ratio": -0.5424339175224304, "logits/chosen": 3.386862277984619, "logits/rejected": 4.2417168617248535, "logps/chosen": -0.58955979347229, "logps/rejected": -0.7452008128166199, "loss": 0.6815, "nll_loss": 0.5987212061882019, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.029477989301085472, "rewards/margins": 0.007782056927680969, "rewards/rejected": -0.03726004809141159, "step": 370 }, { "epoch": 1.05, "grad_norm": 1.796875, "learning_rate": 2.564945880212886e-06, "log_odds_chosen": -0.028061389923095703, "log_odds_ratio": -0.8072420954704285, "logits/chosen": 3.5768370628356934, "logits/rejected": 5.4233012199401855, "logps/chosen": -0.7529481053352356, "logps/rejected": -0.8536372184753418, "loss": 0.6656, "nll_loss": 0.5306578278541565, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03764740750193596, "rewards/margins": 0.005034452769905329, "rewards/rejected": -0.04268186166882515, "step": 380 }, { "epoch": 1.08, "grad_norm": 2.046875, "learning_rate": 2.5318484177091667e-06, "log_odds_chosen": 0.4068288803100586, "log_odds_ratio": -0.5929966568946838, "logits/chosen": 3.2761809825897217, "logits/rejected": 4.4970245361328125, "logps/chosen": -0.6948614120483398, "logps/rejected": -0.865265965461731, "loss": 0.6794, "nll_loss": 0.6040965914726257, "rewards/accuracies": 0.5, "rewards/chosen": -0.03474307060241699, "rewards/margins": 0.008520226925611496, "rewards/rejected": -0.04326330125331879, "step": 390 }, { "epoch": 1.11, "grad_norm": 2.015625, "learning_rate": 2.5e-06, "log_odds_chosen": 0.1341879665851593, "log_odds_ratio": -0.6464511752128601, "logits/chosen": 3.4316935539245605, "logits/rejected": 3.2954697608947754, "logps/chosen": -0.8758536577224731, "logps/rejected": -0.933131992816925, "loss": 0.6744, "nll_loss": 0.7761722803115845, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.04379267990589142, "rewards/margins": 0.002863916801288724, "rewards/rejected": -0.046656593680381775, "step": 400 }, { "epoch": 1.14, "grad_norm": 2.15625, "learning_rate": 2.4693239916239746e-06, "log_odds_chosen": 0.2867754399776459, "log_odds_ratio": -0.5857530236244202, "logits/chosen": 3.6360816955566406, "logits/rejected": 3.9970450401306152, "logps/chosen": -0.7139376997947693, "logps/rejected": -0.8725380897521973, "loss": 0.6869, "nll_loss": 0.6628342866897583, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.035696886479854584, "rewards/margins": 0.007930012419819832, "rewards/rejected": -0.043626900762319565, "step": 410 }, { "epoch": 1.16, "grad_norm": 2.15625, "learning_rate": 2.4397501823713327e-06, "log_odds_chosen": 0.4246703088283539, "log_odds_ratio": -0.5430588722229004, "logits/chosen": 3.829016923904419, "logits/rejected": 4.135481834411621, "logps/chosen": -0.7444478869438171, "logps/rejected": -0.9897812008857727, "loss": 0.6709, "nll_loss": 0.6274694204330444, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.037222400307655334, "rewards/margins": 0.012266661040484905, "rewards/rejected": -0.049489058554172516, "step": 420 }, { "epoch": 1.19, "grad_norm": 2.140625, "learning_rate": 2.411214110852061e-06, "log_odds_chosen": 0.25163400173187256, "log_odds_ratio": -0.6242859363555908, "logits/chosen": 3.951763868331909, "logits/rejected": 4.234454154968262, "logps/chosen": -0.7341341376304626, "logps/rejected": -0.9003491401672363, "loss": 0.6944, "nll_loss": 0.7275547385215759, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.03670670837163925, "rewards/margins": 0.008310755714774132, "rewards/rejected": -0.045017458498477936, "step": 430 }, { "epoch": 1.22, "grad_norm": 1.8203125, "learning_rate": 2.3836564731139807e-06, "log_odds_chosen": 1.0159021615982056, "log_odds_ratio": -0.37116050720214844, "logits/chosen": 4.0213727951049805, "logits/rejected": 3.6271164417266846, "logps/chosen": -0.5240938067436218, "logps/rejected": -0.8907585144042969, "loss": 0.6649, "nll_loss": 0.6113199591636658, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.02620469406247139, "rewards/margins": 0.018333233892917633, "rewards/rejected": -0.04453792795538902, "step": 440 }, { "epoch": 1.25, "grad_norm": 1.9765625, "learning_rate": 2.357022603955159e-06, "log_odds_chosen": 0.6340302228927612, "log_odds_ratio": -0.5088221430778503, "logits/chosen": 3.0994677543640137, "logits/rejected": 3.617880344390869, "logps/chosen": -0.6344050168991089, "logps/rejected": -0.9070758819580078, "loss": 0.6749, "nll_loss": 0.6057204604148865, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03172025829553604, "rewards/margins": 0.013633537106215954, "rewards/rejected": -0.04535379260778427, "step": 450 }, { "epoch": 1.27, "grad_norm": 2.375, "learning_rate": 2.3312620206007847e-06, "log_odds_chosen": 0.713164746761322, "log_odds_ratio": -0.5116228461265564, "logits/chosen": 3.274913787841797, "logits/rejected": 3.9031119346618652, "logps/chosen": -0.7013699412345886, "logps/rejected": -0.9506866335868835, "loss": 0.689, "nll_loss": 0.6167888045310974, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03506850078701973, "rewards/margins": 0.012465836480259895, "rewards/rejected": -0.047534335404634476, "step": 460 }, { "epoch": 1.3, "grad_norm": 1.859375, "learning_rate": 2.3063280200722128e-06, "log_odds_chosen": 0.276517391204834, "log_odds_ratio": -0.6000176668167114, "logits/chosen": 4.087462425231934, "logits/rejected": 4.489025592803955, "logps/chosen": -0.764947772026062, "logps/rejected": -0.9075348973274231, "loss": 0.7095, "nll_loss": 0.673345685005188, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03824738413095474, "rewards/margins": 0.007129358593374491, "rewards/rejected": -0.045376747846603394, "step": 470 }, { "epoch": 1.33, "grad_norm": 2.03125, "learning_rate": 2.2821773229381924e-06, "log_odds_chosen": 1.0662429332733154, "log_odds_ratio": -0.34629103541374207, "logits/chosen": 2.857253313064575, "logits/rejected": 4.2227373123168945, "logps/chosen": -0.6380993127822876, "logps/rejected": -1.1461809873580933, "loss": 0.6967, "nll_loss": 0.6230236291885376, "rewards/accuracies": 1.0, "rewards/chosen": -0.03190496563911438, "rewards/margins": 0.02540409006178379, "rewards/rejected": -0.05730905383825302, "step": 480 }, { "epoch": 1.36, "grad_norm": 1.8515625, "learning_rate": 2.2587697572631284e-06, "log_odds_chosen": 0.05213805288076401, "log_odds_ratio": -0.7282466888427734, "logits/chosen": 3.712087631225586, "logits/rejected": 3.26226806640625, "logps/chosen": -0.8317926526069641, "logps/rejected": -0.8364424705505371, "loss": 0.6909, "nll_loss": 0.7362145185470581, "rewards/accuracies": 0.5, "rewards/chosen": -0.041589636355638504, "rewards/margins": 0.00023248679644893855, "rewards/rejected": -0.041822124272584915, "step": 490 }, { "epoch": 1.39, "grad_norm": 1.9921875, "learning_rate": 2.23606797749979e-06, "log_odds_chosen": 0.6301153302192688, "log_odds_ratio": -0.4543212354183197, "logits/chosen": 3.4883971214294434, "logits/rejected": 4.507528781890869, "logps/chosen": -0.8292129635810852, "logps/rejected": -1.210298776626587, "loss": 0.6785, "nll_loss": 0.6745945811271667, "rewards/accuracies": 1.0, "rewards/chosen": -0.04146064817905426, "rewards/margins": 0.019054297357797623, "rewards/rejected": -0.06051494926214218, "step": 500 }, { "epoch": 1.41, "grad_norm": 1.75, "learning_rate": 2.2140372138502386e-06, "log_odds_chosen": -0.06262560188770294, "log_odds_ratio": -0.8653733134269714, "logits/chosen": 3.5743117332458496, "logits/rejected": 3.30938720703125, "logps/chosen": -0.7883298993110657, "logps/rejected": -0.8334089517593384, "loss": 0.6784, "nll_loss": 0.7287472486495972, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03941649571061134, "rewards/margins": 0.002253949176520109, "rewards/rejected": -0.04167044535279274, "step": 510 }, { "epoch": 1.44, "grad_norm": 2.453125, "learning_rate": 2.1926450482675734e-06, "log_odds_chosen": 0.7889357209205627, "log_odds_ratio": -0.5113512277603149, "logits/chosen": 3.6222686767578125, "logits/rejected": 4.588388919830322, "logps/chosen": -0.6055595278739929, "logps/rejected": -0.8102161288261414, "loss": 0.6911, "nll_loss": 0.7365372180938721, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.030277978628873825, "rewards/margins": 0.01023282390087843, "rewards/rejected": -0.04051080346107483, "step": 520 }, { "epoch": 1.47, "grad_norm": 1.671875, "learning_rate": 2.1718612138153473e-06, "log_odds_chosen": 0.4299934506416321, "log_odds_ratio": -0.5557712316513062, "logits/chosen": 4.014830112457275, "logits/rejected": 5.017902374267578, "logps/chosen": -0.7874509692192078, "logps/rejected": -1.018026351928711, "loss": 0.667, "nll_loss": 0.6355669498443604, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03937254846096039, "rewards/margins": 0.011528769508004189, "rewards/rejected": -0.05090131610631943, "step": 530 }, { "epoch": 1.5, "grad_norm": 2.296875, "learning_rate": 2.151657414559676e-06, "log_odds_chosen": 0.2829347550868988, "log_odds_ratio": -0.6682294607162476, "logits/chosen": 3.943626880645752, "logits/rejected": 4.587673187255859, "logps/chosen": -0.6128166913986206, "logps/rejected": -0.7399159073829651, "loss": 0.6921, "nll_loss": 0.5586395263671875, "rewards/accuracies": 0.5, "rewards/chosen": -0.03064083494246006, "rewards/margins": 0.006354962475597858, "rewards/rejected": -0.03699579834938049, "step": 540 }, { "epoch": 1.52, "grad_norm": 2.34375, "learning_rate": 2.132007163556104e-06, "log_odds_chosen": 0.12430952489376068, "log_odds_ratio": -0.691026508808136, "logits/chosen": 4.236235618591309, "logits/rejected": 4.258731842041016, "logps/chosen": -0.8816810846328735, "logps/rejected": -0.8798414468765259, "loss": 0.6621, "nll_loss": 0.7372727394104004, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.044084057211875916, "rewards/margins": -9.19781596167013e-05, "rewards/rejected": -0.04399207606911659, "step": 550 }, { "epoch": 1.55, "grad_norm": 2.0625, "learning_rate": 2.1128856368212917e-06, "log_odds_chosen": 0.45417746901512146, "log_odds_ratio": -0.6252912878990173, "logits/chosen": 3.6231613159179688, "logits/rejected": 4.052757263183594, "logps/chosen": -0.8618866801261902, "logps/rejected": -1.0115950107574463, "loss": 0.6866, "nll_loss": 0.7204962372779846, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04309433326125145, "rewards/margins": 0.007485417183488607, "rewards/rejected": -0.05057975649833679, "step": 560 }, { "epoch": 1.58, "grad_norm": 1.5078125, "learning_rate": 2.0942695414584777e-06, "log_odds_chosen": 0.2981715500354767, "log_odds_ratio": -0.6889346837997437, "logits/chosen": 4.225995063781738, "logits/rejected": 5.418367862701416, "logps/chosen": -0.8141951560974121, "logps/rejected": -0.9664665460586548, "loss": 0.682, "nll_loss": 0.7680364847183228, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.040709760040044785, "rewards/margins": 0.007613567169755697, "rewards/rejected": -0.04832332953810692, "step": 570 }, { "epoch": 1.61, "grad_norm": 2.078125, "learning_rate": 2.0761369963434992e-06, "log_odds_chosen": 0.09669248759746552, "log_odds_ratio": -0.6933160424232483, "logits/chosen": 4.034695625305176, "logits/rejected": 3.9514598846435547, "logps/chosen": -0.7859554886817932, "logps/rejected": -0.8040157556533813, "loss": 0.6702, "nll_loss": 0.6275065541267395, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03929777070879936, "rewards/margins": 0.0009030139190144837, "rewards/rejected": -0.04020078480243683, "step": 580 }, { "epoch": 1.63, "grad_norm": 2.046875, "learning_rate": 2.058467423981546e-06, "log_odds_chosen": 0.7078760266304016, "log_odds_ratio": -0.41859808564186096, "logits/chosen": 4.595616817474365, "logits/rejected": 5.63283634185791, "logps/chosen": -0.7666529417037964, "logps/rejected": -1.1932477951049805, "loss": 0.6744, "nll_loss": 0.6559887528419495, "rewards/accuracies": 1.0, "rewards/chosen": -0.03833264485001564, "rewards/margins": 0.021329741925001144, "rewards/rejected": -0.059662383049726486, "step": 590 }, { "epoch": 1.66, "grad_norm": 1.984375, "learning_rate": 2.0412414523193154e-06, "log_odds_chosen": 0.46980589628219604, "log_odds_ratio": -0.507270336151123, "logits/chosen": 4.159191131591797, "logits/rejected": 4.8198137283325195, "logps/chosen": -0.6551539897918701, "logps/rejected": -0.8997269868850708, "loss": 0.6999, "nll_loss": 0.5972849726676941, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.032757699489593506, "rewards/margins": 0.012228650972247124, "rewards/rejected": -0.04498635232448578, "step": 600 }, { "epoch": 1.69, "grad_norm": 1.828125, "learning_rate": 2.0244408254472904e-06, "log_odds_chosen": 0.8159920573234558, "log_odds_ratio": -0.44673436880111694, "logits/chosen": 1.4881597757339478, "logits/rejected": 3.2742371559143066, "logps/chosen": -0.5382428169250488, "logps/rejected": -0.8298252820968628, "loss": 0.6778, "nll_loss": 0.47871965169906616, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0269121415913105, "rewards/margins": 0.014579126611351967, "rewards/rejected": -0.04149126634001732, "step": 610 }, { "epoch": 1.72, "grad_norm": 2.0625, "learning_rate": 2.0080483222562476e-06, "log_odds_chosen": 0.4831489026546478, "log_odds_ratio": -0.6106674075126648, "logits/chosen": 2.6716971397399902, "logits/rejected": 4.190335273742676, "logps/chosen": -0.7028285264968872, "logps/rejected": -0.8107720613479614, "loss": 0.7, "nll_loss": 0.6031611561775208, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03514142706990242, "rewards/margins": 0.005397176835685968, "rewards/rejected": -0.04053860157728195, "step": 620 }, { "epoch": 1.75, "grad_norm": 2.390625, "learning_rate": 1.9920476822239895e-06, "log_odds_chosen": 1.1681692600250244, "log_odds_ratio": -0.42384225130081177, "logits/chosen": 1.9299933910369873, "logits/rejected": 4.495340824127197, "logps/chosen": -0.6006202697753906, "logps/rejected": -0.8768091201782227, "loss": 0.6826, "nll_loss": 0.5617047548294067, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03003101423382759, "rewards/margins": 0.013809445314109325, "rewards/rejected": -0.04384046047925949, "step": 630 }, { "epoch": 1.77, "grad_norm": 1.9921875, "learning_rate": 1.976423537605237e-06, "log_odds_chosen": 0.20114317536354065, "log_odds_ratio": -0.6850827932357788, "logits/chosen": 3.9372570514678955, "logits/rejected": 4.620089530944824, "logps/chosen": -0.7602877616882324, "logps/rejected": -0.798611581325531, "loss": 0.693, "nll_loss": 0.6903423070907593, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03801439329981804, "rewards/margins": 0.0019161909585818648, "rewards/rejected": -0.03993058204650879, "step": 640 }, { "epoch": 1.8, "grad_norm": 2.15625, "learning_rate": 1.961161351381841e-06, "log_odds_chosen": 0.7238461971282959, "log_odds_ratio": -0.49284863471984863, "logits/chosen": 3.1412081718444824, "logits/rejected": 4.667212963104248, "logps/chosen": -0.7989972829818726, "logps/rejected": -1.1198698282241821, "loss": 0.6811, "nll_loss": 0.6139580607414246, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03994986042380333, "rewards/margins": 0.01604362577199936, "rewards/rejected": -0.05599348992109299, "step": 650 }, { "epoch": 1.83, "grad_norm": 2.0625, "learning_rate": 1.9462473604038077e-06, "log_odds_chosen": 0.8220060467720032, "log_odds_ratio": -0.3908359408378601, "logits/chosen": 3.9316563606262207, "logits/rejected": 5.108726501464844, "logps/chosen": -0.6555695533752441, "logps/rejected": -1.0596764087677002, "loss": 0.6864, "nll_loss": 0.6063144207000732, "rewards/accuracies": 1.0, "rewards/chosen": -0.032778479158878326, "rewards/margins": 0.020205341279506683, "rewards/rejected": -0.05298382043838501, "step": 660 }, { "epoch": 1.86, "grad_norm": 2.015625, "learning_rate": 1.9316685232156397e-06, "log_odds_chosen": 0.2806245982646942, "log_odds_ratio": -0.6073696613311768, "logits/chosen": 3.3105571269989014, "logits/rejected": 3.892564058303833, "logps/chosen": -0.7684590816497803, "logps/rejected": -0.9330514073371887, "loss": 0.6861, "nll_loss": 0.6917561888694763, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.038422949612140656, "rewards/margins": 0.008229617960751057, "rewards/rejected": -0.046652574092149734, "step": 670 }, { "epoch": 1.88, "grad_norm": 2.234375, "learning_rate": 1.917412472118426e-06, "log_odds_chosen": 0.7242671251296997, "log_odds_ratio": -0.5751112699508667, "logits/chosen": 3.4888782501220703, "logits/rejected": 3.981926679611206, "logps/chosen": -0.7546848654747009, "logps/rejected": -0.9648480415344238, "loss": 0.708, "nll_loss": 0.7166061401367188, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03773424029350281, "rewards/margins": 0.010508162900805473, "rewards/rejected": -0.04824240505695343, "step": 680 }, { "epoch": 1.91, "grad_norm": 1.7265625, "learning_rate": 1.9034674690672024e-06, "log_odds_chosen": 0.007441395428031683, "log_odds_ratio": -0.8017139434814453, "logits/chosen": 4.016451358795166, "logits/rejected": 4.500775337219238, "logps/chosen": -0.6954795718193054, "logps/rejected": -0.7557869553565979, "loss": 0.6758, "nll_loss": 0.5994603633880615, "rewards/accuracies": 0.5, "rewards/chosen": -0.03477397933602333, "rewards/margins": 0.0030153680127114058, "rewards/rejected": -0.037789348512887955, "step": 690 }, { "epoch": 1.94, "grad_norm": 2.109375, "learning_rate": 1.8898223650461362e-06, "log_odds_chosen": 0.8856722712516785, "log_odds_ratio": -0.3986029624938965, "logits/chosen": 1.7587060928344727, "logits/rejected": 4.666166305541992, "logps/chosen": -0.550857424736023, "logps/rejected": -0.8934911489486694, "loss": 0.6606, "nll_loss": 0.4851749539375305, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.027542870491743088, "rewards/margins": 0.017131686210632324, "rewards/rejected": -0.044674552977085114, "step": 700 }, { "epoch": 1.97, "grad_norm": 1.7265625, "learning_rate": 1.876466562602004e-06, "log_odds_chosen": 0.24165010452270508, "log_odds_ratio": -0.6184555292129517, "logits/chosen": 2.516463279724121, "logits/rejected": 3.698896884918213, "logps/chosen": -0.5866281390190125, "logps/rejected": -0.6856867074966431, "loss": 0.7084, "nll_loss": 0.6121121644973755, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.029331404715776443, "rewards/margins": 0.004952927120029926, "rewards/rejected": -0.034284330904483795, "step": 710 }, { "epoch": 1.99, "grad_norm": 2.03125, "learning_rate": 1.863389981249825e-06, "log_odds_chosen": 0.913441002368927, "log_odds_ratio": -0.47331008315086365, "logits/chosen": 2.567636251449585, "logits/rejected": 4.045866012573242, "logps/chosen": -0.5542882084846497, "logps/rejected": -0.8390460014343262, "loss": 0.6859, "nll_loss": 0.4496838450431824, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.027714407071471214, "rewards/margins": 0.014237893745303154, "rewards/rejected": -0.04195230454206467, "step": 720 }, { "epoch": 2.02, "grad_norm": 2.0, "learning_rate": 1.8505830254940132e-06, "log_odds_chosen": 0.7949598431587219, "log_odds_ratio": -0.45839613676071167, "logits/chosen": 3.325202226638794, "logits/rejected": 4.260237216949463, "logps/chosen": -0.7130070924758911, "logps/rejected": -1.053581953048706, "loss": 0.6691, "nll_loss": 0.6019625663757324, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.035650357604026794, "rewards/margins": 0.01702873967587948, "rewards/rejected": -0.05267909914255142, "step": 730 }, { "epoch": 2.05, "grad_norm": 1.96875, "learning_rate": 1.8380365552345197e-06, "log_odds_chosen": 0.5770705938339233, "log_odds_ratio": -0.5122253894805908, "logits/chosen": 3.619288682937622, "logits/rejected": 4.384647369384766, "logps/chosen": -0.8298980593681335, "logps/rejected": -1.167079210281372, "loss": 0.6766, "nll_loss": 0.704396665096283, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.041494905948638916, "rewards/margins": 0.016859063878655434, "rewards/rejected": -0.0583539679646492, "step": 740 }, { "epoch": 2.08, "grad_norm": 2.0625, "learning_rate": 1.8257418583505536e-06, "log_odds_chosen": 0.6968498826026917, "log_odds_ratio": -0.4758426547050476, "logits/chosen": 3.848654270172119, "logits/rejected": 2.3218348026275635, "logps/chosen": -0.67643803358078, "logps/rejected": -0.8964241743087769, "loss": 0.7021, "nll_loss": 0.6553528904914856, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.03382190316915512, "rewards/margins": 0.010999304242432117, "rewards/rejected": -0.04482121020555496, "step": 750 }, { "epoch": 2.11, "grad_norm": 1.9296875, "learning_rate": 1.8136906252750293e-06, "log_odds_chosen": 0.2059679478406906, "log_odds_ratio": -0.6639232039451599, "logits/chosen": 4.111894130706787, "logits/rejected": 4.707150459289551, "logps/chosen": -0.7494447231292725, "logps/rejected": -0.8692378997802734, "loss": 0.6765, "nll_loss": 0.6483981013298035, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03747224062681198, "rewards/margins": 0.005989660043269396, "rewards/rejected": -0.04346190020442009, "step": 760 }, { "epoch": 2.13, "grad_norm": 1.8203125, "learning_rate": 1.801874925391118e-06, "log_odds_chosen": 0.5316376686096191, "log_odds_ratio": -0.5046476721763611, "logits/chosen": 2.8869481086730957, "logits/rejected": 2.6999757289886475, "logps/chosen": -0.5816842317581177, "logps/rejected": -0.7666364908218384, "loss": 0.6631, "nll_loss": 0.6736694574356079, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.029084209352731705, "rewards/margins": 0.009247615933418274, "rewards/rejected": -0.03833182901144028, "step": 770 }, { "epoch": 2.16, "grad_norm": 2.03125, "learning_rate": 1.7902871850985824e-06, "log_odds_chosen": 0.37722089886665344, "log_odds_ratio": -0.5441932082176208, "logits/chosen": 3.213557720184326, "logits/rejected": 4.203583240509033, "logps/chosen": -0.7685849070549011, "logps/rejected": -1.0165854692459106, "loss": 0.6767, "nll_loss": 0.6813093423843384, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.038429249078035355, "rewards/margins": 0.012400026433169842, "rewards/rejected": -0.05082928016781807, "step": 780 }, { "epoch": 2.19, "grad_norm": 2.0625, "learning_rate": 1.7789201674120502e-06, "log_odds_chosen": 1.2822726964950562, "log_odds_ratio": -0.43989676237106323, "logits/chosen": 2.3955116271972656, "logits/rejected": 4.735221862792969, "logps/chosen": -0.7316259145736694, "logps/rejected": -1.1169257164001465, "loss": 0.6817, "nll_loss": 0.5958443880081177, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03658129647374153, "rewards/margins": 0.019264988601207733, "rewards/rejected": -0.055846281349658966, "step": 790 }, { "epoch": 2.22, "grad_norm": 2.296875, "learning_rate": 1.7677669529663689e-06, "log_odds_chosen": 0.651089072227478, "log_odds_ratio": -0.44289833307266235, "logits/chosen": 3.480353593826294, "logits/rejected": 4.546199798583984, "logps/chosen": -0.6184307932853699, "logps/rejected": -0.884985625743866, "loss": 0.6483, "nll_loss": 0.5650686025619507, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.030921539291739464, "rewards/margins": 0.01332774292677641, "rewards/rejected": -0.0442492850124836, "step": 800 }, { "epoch": 2.24, "grad_norm": 1.9765625, "learning_rate": 1.7568209223157664e-06, "log_odds_chosen": 0.0874546617269516, "log_odds_ratio": -0.6833248138427734, "logits/chosen": 3.4896228313446045, "logits/rejected": 4.298434734344482, "logps/chosen": -0.7224100828170776, "logps/rejected": -0.7926791906356812, "loss": 0.6619, "nll_loss": 0.5892444849014282, "rewards/accuracies": 0.5, "rewards/chosen": -0.03612050786614418, "rewards/margins": 0.0035134553909301758, "rewards/rejected": -0.039633963257074356, "step": 810 }, { "epoch": 2.27, "grad_norm": 1.953125, "learning_rate": 1.7460757394239458e-06, "log_odds_chosen": 0.7751902341842651, "log_odds_ratio": -0.5393909215927124, "logits/chosen": 4.474148750305176, "logits/rejected": 5.107138156890869, "logps/chosen": -0.8380101919174194, "logps/rejected": -1.0609526634216309, "loss": 0.685, "nll_loss": 0.7294074296951294, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04190050810575485, "rewards/margins": 0.01114712655544281, "rewards/rejected": -0.05304763838648796, "step": 820 }, { "epoch": 2.3, "grad_norm": 1.9765625, "learning_rate": 1.7355253362515584e-06, "log_odds_chosen": 0.5033993721008301, "log_odds_ratio": -0.5074852108955383, "logits/chosen": 2.511612892150879, "logits/rejected": 3.992374897003174, "logps/chosen": -0.5894848704338074, "logps/rejected": -0.8681418299674988, "loss": 0.6802, "nll_loss": 0.5586022138595581, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.029474247246980667, "rewards/margins": 0.013932851143181324, "rewards/rejected": -0.043407101184129715, "step": 830 }, { "epoch": 2.33, "grad_norm": 2.046875, "learning_rate": 1.7251638983558855e-06, "log_odds_chosen": 0.6550665497779846, "log_odds_ratio": -0.49995166063308716, "logits/chosen": 3.1267733573913574, "logits/rejected": 3.973337173461914, "logps/chosen": -0.65223228931427, "logps/rejected": -0.904323399066925, "loss": 0.6808, "nll_loss": 0.6052195429801941, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03261161595582962, "rewards/margins": 0.012604555115103722, "rewards/rejected": -0.04521617293357849, "step": 840 }, { "epoch": 2.35, "grad_norm": 1.6796875, "learning_rate": 1.7149858514250883e-06, "log_odds_chosen": 0.28233373165130615, "log_odds_ratio": -0.6105042695999146, "logits/chosen": 4.673365116119385, "logits/rejected": 4.622339248657227, "logps/chosen": -0.839856743812561, "logps/rejected": -0.9960880279541016, "loss": 0.7006, "nll_loss": 0.7127640247344971, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.041992831975221634, "rewards/margins": 0.0078115700744092464, "rewards/rejected": -0.04980440065264702, "step": 850 }, { "epoch": 2.38, "grad_norm": 2.140625, "learning_rate": 1.704985848676184e-06, "log_odds_chosen": 0.3673107922077179, "log_odds_ratio": -0.6019551753997803, "logits/chosen": 4.395766258239746, "logits/rejected": 4.125451564788818, "logps/chosen": -0.7563544511795044, "logps/rejected": -0.9725953936576843, "loss": 0.6872, "nll_loss": 0.6850851774215698, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03781772404909134, "rewards/margins": 0.01081205252557993, "rewards/rejected": -0.048629771918058395, "step": 860 }, { "epoch": 2.41, "grad_norm": 1.90625, "learning_rate": 1.6951587590520263e-06, "log_odds_chosen": 0.316839337348938, "log_odds_ratio": -0.5757532119750977, "logits/chosen": 3.451542615890503, "logits/rejected": 4.92800760269165, "logps/chosen": -0.7138621807098389, "logps/rejected": -0.900250256061554, "loss": 0.6756, "nll_loss": 0.6208301782608032, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03569310903549194, "rewards/margins": 0.009319400414824486, "rewards/rejected": -0.04501251131296158, "step": 870 }, { "epoch": 2.44, "grad_norm": 2.171875, "learning_rate": 1.6854996561581053e-06, "log_odds_chosen": 0.6411941051483154, "log_odds_ratio": -0.4790283739566803, "logits/chosen": 3.6001486778259277, "logits/rejected": 4.4620466232299805, "logps/chosen": -0.7167496085166931, "logps/rejected": -1.0959062576293945, "loss": 0.666, "nll_loss": 0.5813517570495605, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.035837478935718536, "rewards/margins": 0.01895783841609955, "rewards/rejected": -0.05479532480239868, "step": 880 }, { "epoch": 2.47, "grad_norm": 2.125, "learning_rate": 1.6760038078849776e-06, "log_odds_chosen": 0.29189780354499817, "log_odds_ratio": -0.5954831838607788, "logits/chosen": 3.923008680343628, "logits/rejected": 3.9894745349884033, "logps/chosen": -0.7887361645698547, "logps/rejected": -0.9427839517593384, "loss": 0.6461, "nll_loss": 0.669800877571106, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.039436809718608856, "rewards/margins": 0.007702393922954798, "rewards/rejected": -0.04713919758796692, "step": 890 }, { "epoch": 2.49, "grad_norm": 2.0625, "learning_rate": 1.6666666666666667e-06, "log_odds_chosen": 0.47135716676712036, "log_odds_ratio": -0.5324128270149231, "logits/chosen": 3.2858338356018066, "logits/rejected": 4.336097240447998, "logps/chosen": -0.6921708583831787, "logps/rejected": -0.9751580357551575, "loss": 0.6725, "nll_loss": 0.5913586020469666, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.034608542919158936, "rewards/margins": 0.014149360358715057, "rewards/rejected": -0.048757899552583694, "step": 900 }, { "epoch": 2.52, "grad_norm": 2.203125, "learning_rate": 1.6574838603294898e-06, "log_odds_chosen": 0.3408917784690857, "log_odds_ratio": -0.6049419641494751, "logits/chosen": 4.834186553955078, "logits/rejected": 4.803133964538574, "logps/chosen": -0.9150100946426392, "logps/rejected": -1.1368293762207031, "loss": 0.6901, "nll_loss": 0.7697237730026245, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04575050249695778, "rewards/margins": 0.011090965941548347, "rewards/rejected": -0.056841470301151276, "step": 910 }, { "epoch": 2.55, "grad_norm": 1.9296875, "learning_rate": 1.648451183489468e-06, "log_odds_chosen": 0.5196985602378845, "log_odds_ratio": -0.48116031289100647, "logits/chosen": 4.574516773223877, "logits/rejected": 4.646053314208984, "logps/chosen": -0.754222571849823, "logps/rejected": -1.0455068349838257, "loss": 0.6679, "nll_loss": 0.6747040748596191, "rewards/accuracies": 1.0, "rewards/chosen": -0.03771113231778145, "rewards/margins": 0.0145642114803195, "rewards/rejected": -0.05227534845471382, "step": 920 }, { "epoch": 2.58, "grad_norm": 1.6171875, "learning_rate": 1.6395645894598825e-06, "log_odds_chosen": 0.6613763570785522, "log_odds_ratio": -0.44903701543807983, "logits/chosen": 4.178925514221191, "logits/rejected": 4.050136566162109, "logps/chosen": -0.6592333912849426, "logps/rejected": -0.9929401278495789, "loss": 0.6283, "nll_loss": 0.648184061050415, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.03296167403459549, "rewards/margins": 0.016685333102941513, "rewards/rejected": -0.049647007137537, "step": 930 }, { "epoch": 2.6, "grad_norm": 2.359375, "learning_rate": 1.6308201826336057e-06, "log_odds_chosen": 0.609188437461853, "log_odds_ratio": -0.46795234084129333, "logits/chosen": 2.6147894859313965, "logits/rejected": 3.9961628913879395, "logps/chosen": -0.6610501408576965, "logps/rejected": -1.0103614330291748, "loss": 0.6824, "nll_loss": 0.5963402390480042, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.033052511513233185, "rewards/margins": 0.017465557903051376, "rewards/rejected": -0.05051806569099426, "step": 940 }, { "epoch": 2.63, "grad_norm": 2.21875, "learning_rate": 1.6222142113076255e-06, "log_odds_chosen": 0.3838690519332886, "log_odds_ratio": -0.6736805438995361, "logits/chosen": 4.249535083770752, "logits/rejected": 4.508255958557129, "logps/chosen": -0.7882817983627319, "logps/rejected": -0.894466757774353, "loss": 0.6759, "nll_loss": 0.6239164471626282, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.03941408917307854, "rewards/margins": 0.005309252999722958, "rewards/rejected": -0.04472333937883377, "step": 950 }, { "epoch": 2.66, "grad_norm": 2.03125, "learning_rate": 1.6137430609197571e-06, "log_odds_chosen": 0.7146129012107849, "log_odds_ratio": -0.4838590621948242, "logits/chosen": 3.506737470626831, "logits/rejected": 4.8492279052734375, "logps/chosen": -0.7767097353935242, "logps/rejected": -1.2055903673171997, "loss": 0.6529, "nll_loss": 0.6371511220932007, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.03883548825979233, "rewards/margins": 0.02144402638077736, "rewards/rejected": -0.060279518365859985, "step": 960 }, { "epoch": 2.69, "grad_norm": 1.671875, "learning_rate": 1.605403247669839e-06, "log_odds_chosen": 0.9047861099243164, "log_odds_ratio": -0.46373167634010315, "logits/chosen": 3.7943649291992188, "logits/rejected": 4.658207416534424, "logps/chosen": -0.644945502281189, "logps/rejected": -0.9372779726982117, "loss": 0.6799, "nll_loss": 0.6254302263259888, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03224727883934975, "rewards/margins": 0.014616618864238262, "rewards/rejected": -0.046863894909620285, "step": 970 }, { "epoch": 2.71, "grad_norm": 1.984375, "learning_rate": 1.59719141249985e-06, "log_odds_chosen": 1.218353509902954, "log_odds_ratio": -0.5012712478637695, "logits/chosen": 3.276193141937256, "logits/rejected": 3.3000216484069824, "logps/chosen": -0.660887598991394, "logps/rejected": -1.0681812763214111, "loss": 0.6796, "nll_loss": 0.7227128148078918, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03304437920451164, "rewards/margins": 0.020364681258797646, "rewards/rejected": -0.05340906232595444, "step": 980 }, { "epoch": 2.74, "grad_norm": 2.1875, "learning_rate": 1.5891043154093205e-06, "log_odds_chosen": 0.7260122299194336, "log_odds_ratio": -0.4358360171318054, "logits/chosen": 2.1915860176086426, "logits/rejected": 5.122056007385254, "logps/chosen": -0.6457678079605103, "logps/rejected": -1.0166635513305664, "loss": 0.6739, "nll_loss": 0.5569924712181091, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.03228839114308357, "rewards/margins": 0.018544789403676987, "rewards/rejected": -0.05083317309617996, "step": 990 }, { "epoch": 2.77, "grad_norm": 1.859375, "learning_rate": 1.5811388300841898e-06, "log_odds_chosen": 0.6579316854476929, "log_odds_ratio": -0.5773683190345764, "logits/chosen": 3.0676589012145996, "logits/rejected": 3.216492176055908, "logps/chosen": -0.7558283805847168, "logps/rejected": -0.9228881597518921, "loss": 0.6639, "nll_loss": 0.7178717255592346, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0377914197742939, "rewards/margins": 0.008352992124855518, "rewards/rejected": -0.046144407242536545, "step": 1000 }, { "epoch": 2.8, "grad_norm": 2.0625, "learning_rate": 1.5732919388188816e-06, "log_odds_chosen": 0.13750185072422028, "log_odds_ratio": -0.6798086166381836, "logits/chosen": 4.563652515411377, "logits/rejected": 4.581662654876709, "logps/chosen": -0.8638485670089722, "logps/rejected": -0.9687344431877136, "loss": 0.6914, "nll_loss": 0.8300592303276062, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.04319242760539055, "rewards/margins": 0.005244298838078976, "rewards/rejected": -0.0484367236495018, "step": 1010 }, { "epoch": 2.83, "grad_norm": 2.0, "learning_rate": 1.565560727712874e-06, "log_odds_chosen": 0.2685840129852295, "log_odds_ratio": -0.594863772392273, "logits/chosen": 3.9020438194274902, "logits/rejected": 3.4271602630615234, "logps/chosen": -0.849018394947052, "logps/rejected": -0.9738754034042358, "loss": 0.6944, "nll_loss": 0.799597978591919, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.0424509234726429, "rewards/margins": 0.006242851726710796, "rewards/rejected": -0.04869377613067627, "step": 1020 }, { "epoch": 2.85, "grad_norm": 2.234375, "learning_rate": 1.5579423821243897e-06, "log_odds_chosen": 0.6145926117897034, "log_odds_ratio": -0.5494765043258667, "logits/chosen": 3.713395357131958, "logits/rejected": 3.5231990814208984, "logps/chosen": -0.7322524785995483, "logps/rejected": -0.8949598073959351, "loss": 0.6596, "nll_loss": 0.676146388053894, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0366126224398613, "rewards/margins": 0.008135369047522545, "rewards/rejected": -0.044747985899448395, "step": 1030 }, { "epoch": 2.88, "grad_norm": 2.296875, "learning_rate": 1.5504341823651056e-06, "log_odds_chosen": 0.3906387686729431, "log_odds_ratio": -0.5653647780418396, "logits/chosen": 2.943089485168457, "logits/rejected": 4.423019886016846, "logps/chosen": -0.6953160166740417, "logps/rejected": -0.895278811454773, "loss": 0.6726, "nll_loss": 0.6234105229377747, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03476579859852791, "rewards/margins": 0.009998142719268799, "rewards/rejected": -0.044763945043087006, "step": 1040 }, { "epoch": 2.91, "grad_norm": 1.8515625, "learning_rate": 1.5430334996209192e-06, "log_odds_chosen": 0.8320558667182922, "log_odds_ratio": -0.4963892102241516, "logits/chosen": 3.3935484886169434, "logits/rejected": 3.5727221965789795, "logps/chosen": -0.6379098296165466, "logps/rejected": -0.9367613792419434, "loss": 0.6757, "nll_loss": 0.6095770597457886, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03189549595117569, "rewards/margins": 0.014942578971385956, "rewards/rejected": -0.04683807119727135, "step": 1050 }, { "epoch": 2.94, "grad_norm": 1.625, "learning_rate": 1.5357377920848783e-06, "log_odds_chosen": -0.04924366623163223, "log_odds_ratio": -0.7443690299987793, "logits/chosen": 4.197890281677246, "logits/rejected": 3.850818157196045, "logps/chosen": -0.8803885579109192, "logps/rejected": -0.8369588851928711, "loss": 0.6633, "nll_loss": 0.7210748791694641, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.0440194308757782, "rewards/margins": -0.0021714833565056324, "rewards/rejected": -0.04184794798493385, "step": 1060 }, { "epoch": 2.96, "grad_norm": 1.9921875, "learning_rate": 1.5285446012893579e-06, "log_odds_chosen": 0.3693309426307678, "log_odds_ratio": -0.5573875904083252, "logits/chosen": 3.786158800125122, "logits/rejected": 3.326815366744995, "logps/chosen": -0.6857789158821106, "logps/rejected": -0.9004032015800476, "loss": 0.6727, "nll_loss": 0.7129669189453125, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03428894653916359, "rewards/margins": 0.010731215588748455, "rewards/rejected": -0.04502015933394432, "step": 1070 }, { "epoch": 2.99, "grad_norm": 2.171875, "learning_rate": 1.5214515486254614e-06, "log_odds_chosen": 0.3639599680900574, "log_odds_ratio": -0.5332378149032593, "logits/chosen": 3.4862148761749268, "logits/rejected": 3.6384239196777344, "logps/chosen": -0.776229202747345, "logps/rejected": -0.9858113527297974, "loss": 0.6785, "nll_loss": 0.6706089973449707, "rewards/accuracies": 1.0, "rewards/chosen": -0.03881146013736725, "rewards/margins": 0.010479103773832321, "rewards/rejected": -0.04929056018590927, "step": 1080 }, { "epoch": 3.0, "step": 1083, "total_flos": 0.0, "train_loss": 0.7946326331969004, "train_runtime": 13847.9433, "train_samples_per_second": 2.498, "train_steps_per_second": 0.078 } ], "logging_steps": 10, "max_steps": 1083, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }