diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,70590 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 7000, + "global_step": 50310, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 9.938382031405287e-10, + "logits/chosen": -3.0218403339385986, + "logits/rejected": -2.940047025680542, + "logps/chosen": -73.02317810058594, + "logps/rejected": -48.23734664916992, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 9.938382031405287e-09, + "logits/chosen": -2.9618678092956543, + "logits/rejected": -2.9563493728637695, + "logps/chosen": -62.71024703979492, + "logps/rejected": -44.89240264892578, + "loss": 0.693, + "rewards/accuracies": 0.4722222089767456, + "rewards/chosen": 0.00024960198788903654, + "rewards/margins": 0.00026416988112032413, + "rewards/rejected": -1.4567897778761107e-05, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 1.9876764062810574e-08, + "logits/chosen": -2.9604990482330322, + "logits/rejected": -2.953871250152588, + "logps/chosen": -75.63623809814453, + "logps/rejected": -44.453895568847656, + "loss": 0.6934, + "rewards/accuracies": 0.3499999940395355, + "rewards/chosen": -0.00019119930220767856, + "rewards/margins": -0.0006826258031651378, + "rewards/rejected": 0.0004914264427497983, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 2.9815146094215865e-08, + "logits/chosen": -2.938742160797119, + "logits/rejected": -2.9424524307250977, + "logps/chosen": -70.04289245605469, + "logps/rejected": -44.03239059448242, + "loss": 0.6931, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": 3.4008000966423424e-06, + "rewards/margins": -6.249712168937549e-05, + "rewards/rejected": 6.589795520994812e-05, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 3.975352812562115e-08, + "logits/chosen": -2.9838123321533203, + "logits/rejected": -2.947876453399658, + "logps/chosen": -69.44886779785156, + "logps/rejected": -42.50042724609375, + "loss": 0.6932, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.0002481898700352758, + "rewards/margins": 0.00010414600546937436, + "rewards/rejected": 0.00014404390822164714, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.969191015702644e-08, + "logits/chosen": -2.931032657623291, + "logits/rejected": -2.9111180305480957, + "logps/chosen": -68.45838928222656, + "logps/rejected": -44.28104782104492, + "loss": 0.6932, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.00015665341925341636, + "rewards/margins": 0.00015643119695596397, + "rewards/rejected": -0.00031308463076129556, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 5.963029218843173e-08, + "logits/chosen": -3.0057373046875, + "logits/rejected": -2.988908052444458, + "logps/chosen": -70.67280578613281, + "logps/rejected": -45.87385559082031, + "loss": 0.6931, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": 0.00039666169323027134, + "rewards/margins": 0.00012486171908676624, + "rewards/rejected": 0.000271800032351166, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 6.956867421983701e-08, + "logits/chosen": -2.9854843616485596, + "logits/rejected": -2.94712233543396, + "logps/chosen": -72.19477844238281, + "logps/rejected": -45.64137268066406, + "loss": 0.693, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 2.2792815798311494e-05, + "rewards/margins": 0.00045540143037214875, + "rewards/rejected": -0.0004326086491346359, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 7.95070562512423e-08, + "logits/chosen": -2.9712636470794678, + "logits/rejected": -2.9385437965393066, + "logps/chosen": -72.05076599121094, + "logps/rejected": -44.82624435424805, + "loss": 0.6929, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 7.38982780603692e-05, + "rewards/margins": 0.0005020436947233975, + "rewards/rejected": -0.0004281453730072826, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 8.94454382826476e-08, + "logits/chosen": -2.9702322483062744, + "logits/rejected": -2.9738237857818604, + "logps/chosen": -75.1077880859375, + "logps/rejected": -44.23411178588867, + "loss": 0.6929, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.00030066302861087024, + "rewards/margins": -1.1079793694079854e-05, + "rewards/rejected": -0.0002895832003559917, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 9.938382031405288e-08, + "logits/chosen": -2.989254951477051, + "logits/rejected": -3.0034003257751465, + "logps/chosen": -71.69474792480469, + "logps/rejected": -45.18828201293945, + "loss": 0.6929, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.00046932976692914963, + "rewards/margins": 0.0008053073543123901, + "rewards/rejected": -0.00033597755827941, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 1.0932220234545817e-07, + "logits/chosen": -2.9632771015167236, + "logits/rejected": -2.9279801845550537, + "logps/chosen": -69.83442687988281, + "logps/rejected": -44.24370574951172, + "loss": 0.6927, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.00013900376507081091, + "rewards/margins": 0.0008278178866021335, + "rewards/rejected": -0.0006888141506351531, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 1.1926058437686346e-07, + "logits/chosen": -2.9743335247039795, + "logits/rejected": -2.957536220550537, + "logps/chosen": -67.26787567138672, + "logps/rejected": -45.17921829223633, + "loss": 0.6928, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.00032205486786551774, + "rewards/margins": 0.0009219359490089118, + "rewards/rejected": -0.0005998812266625464, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 1.2919896640826874e-07, + "logits/chosen": -2.995117664337158, + "logits/rejected": -2.9725120067596436, + "logps/chosen": -64.87049102783203, + "logps/rejected": -43.63898468017578, + "loss": 0.6926, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.0004153624176979065, + "rewards/margins": 0.0008974180673249066, + "rewards/rejected": -0.0004820556496270001, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 1.3913734843967403e-07, + "logits/chosen": -2.966625928878784, + "logits/rejected": -2.937664747238159, + "logps/chosen": -71.7284164428711, + "logps/rejected": -44.03948211669922, + "loss": 0.6923, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.0004335699195507914, + "rewards/margins": 0.0012579860631376505, + "rewards/rejected": -0.0008244161726906896, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 1.490757304710793e-07, + "logits/chosen": -2.98844051361084, + "logits/rejected": -2.948321580886841, + "logps/chosen": -73.96751403808594, + "logps/rejected": -43.87926483154297, + "loss": 0.6921, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.0013482751091942191, + "rewards/margins": 0.0024665442761033773, + "rewards/rejected": -0.0011182688176631927, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 1.590141125024846e-07, + "logits/chosen": -2.9966416358947754, + "logits/rejected": -2.972294330596924, + "logps/chosen": -68.28688049316406, + "logps/rejected": -43.788970947265625, + "loss": 0.6919, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.0008087358437478542, + "rewards/margins": 0.002199420938268304, + "rewards/rejected": -0.0013906850945204496, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 1.6895249453388988e-07, + "logits/chosen": -2.9536423683166504, + "logits/rejected": -2.9364702701568604, + "logps/chosen": -75.17198181152344, + "logps/rejected": -44.039005279541016, + "loss": 0.6916, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.0014083824353292584, + "rewards/margins": 0.002807198790833354, + "rewards/rejected": -0.00139881600625813, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 1.788908765652952e-07, + "logits/chosen": -2.9649224281311035, + "logits/rejected": -2.938742160797119, + "logps/chosen": -72.81172943115234, + "logps/rejected": -44.81022262573242, + "loss": 0.6915, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.0007405938813462853, + "rewards/margins": 0.003570768516510725, + "rewards/rejected": -0.002830174285918474, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 1.8882925859670047e-07, + "logits/chosen": -3.021819829940796, + "logits/rejected": -2.967698574066162, + "logps/chosen": -77.29383087158203, + "logps/rejected": -46.151893615722656, + "loss": 0.691, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.001676453510299325, + "rewards/margins": 0.004718971438705921, + "rewards/rejected": -0.0030425176955759525, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 1.9876764062810576e-07, + "logits/chosen": -2.9397988319396973, + "logits/rejected": -2.917863368988037, + "logps/chosen": -71.49901580810547, + "logps/rejected": -43.490272521972656, + "loss": 0.6908, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.0021435001399368048, + "rewards/margins": 0.004856492858380079, + "rewards/rejected": -0.0027129927184432745, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 2.0870602265951104e-07, + "logits/chosen": -2.983961343765259, + "logits/rejected": -2.9618401527404785, + "logps/chosen": -66.7228012084961, + "logps/rejected": -43.64258575439453, + "loss": 0.6902, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0024440973065793514, + "rewards/margins": 0.005612206645309925, + "rewards/rejected": -0.0031681086402386427, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 2.1864440469091635e-07, + "logits/chosen": -2.9875996112823486, + "logits/rejected": -2.9680562019348145, + "logps/chosen": -78.15232849121094, + "logps/rejected": -45.131385803222656, + "loss": 0.6893, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.002925829729065299, + "rewards/margins": 0.00734885036945343, + "rewards/rejected": -0.0044230199418962, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 2.2858278672232163e-07, + "logits/chosen": -2.950333833694458, + "logits/rejected": -2.919938802719116, + "logps/chosen": -75.40995788574219, + "logps/rejected": -45.04998779296875, + "loss": 0.6893, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.0028421056922525167, + "rewards/margins": 0.008465655148029327, + "rewards/rejected": -0.005623549222946167, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 2.385211687537269e-07, + "logits/chosen": -2.988229274749756, + "logits/rejected": -2.988311767578125, + "logps/chosen": -71.42940521240234, + "logps/rejected": -44.344566345214844, + "loss": 0.6883, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.0028978486079722643, + "rewards/margins": 0.009432690218091011, + "rewards/rejected": -0.006534843239933252, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 2.484595507851322e-07, + "logits/chosen": -2.9741768836975098, + "logits/rejected": -2.965543270111084, + "logps/chosen": -71.6085433959961, + "logps/rejected": -46.04959487915039, + "loss": 0.6879, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.002663231687620282, + "rewards/margins": 0.01022954098880291, + "rewards/rejected": -0.007566309534013271, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 2.583979328165375e-07, + "logits/chosen": -2.997169256210327, + "logits/rejected": -2.9719414710998535, + "logps/chosen": -73.43926239013672, + "logps/rejected": -45.43397903442383, + "loss": 0.6873, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0042081596329808235, + "rewards/margins": 0.011326530016958714, + "rewards/rejected": -0.00711837038397789, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 2.6833631484794277e-07, + "logits/chosen": -2.973280429840088, + "logits/rejected": -2.968266725540161, + "logps/chosen": -71.72904968261719, + "logps/rejected": -44.16044235229492, + "loss": 0.6859, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006081936415284872, + "rewards/margins": 0.01435314305126667, + "rewards/rejected": -0.008271204307675362, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 2.7827469687934805e-07, + "logits/chosen": -2.969820499420166, + "logits/rejected": -2.962028980255127, + "logps/chosen": -76.09542846679688, + "logps/rejected": -44.35374069213867, + "loss": 0.6858, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.006933757569640875, + "rewards/margins": 0.01638907380402088, + "rewards/rejected": -0.009455314837396145, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 2.8821307891075334e-07, + "logits/chosen": -2.984722137451172, + "logits/rejected": -2.9411227703094482, + "logps/chosen": -62.493873596191406, + "logps/rejected": -45.33516311645508, + "loss": 0.6855, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.003541975049301982, + "rewards/margins": 0.014006761834025383, + "rewards/rejected": -0.010464785620570183, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 2.981514609421586e-07, + "logits/chosen": -2.9810471534729004, + "logits/rejected": -2.956744909286499, + "logps/chosen": -74.27899169921875, + "logps/rejected": -45.19349670410156, + "loss": 0.6839, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00896482914686203, + "rewards/margins": 0.019126158207654953, + "rewards/rejected": -0.010161329992115498, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 3.0808984297356396e-07, + "logits/chosen": -2.987964153289795, + "logits/rejected": -2.9734349250793457, + "logps/chosen": -71.14620208740234, + "logps/rejected": -45.47577667236328, + "loss": 0.6825, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.008326919749379158, + "rewards/margins": 0.021482601761817932, + "rewards/rejected": -0.013155683875083923, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 3.180282250049692e-07, + "logits/chosen": -2.97816801071167, + "logits/rejected": -2.948195695877075, + "logps/chosen": -65.21907043457031, + "logps/rejected": -45.496604919433594, + "loss": 0.6818, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00784609280526638, + "rewards/margins": 0.020932724699378014, + "rewards/rejected": -0.013086630031466484, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 3.2796660703637447e-07, + "logits/chosen": -3.012901544570923, + "logits/rejected": -2.9814612865448, + "logps/chosen": -70.08870697021484, + "logps/rejected": -47.361114501953125, + "loss": 0.6799, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.009475327096879482, + "rewards/margins": 0.026397859677672386, + "rewards/rejected": -0.01692252978682518, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 3.3790498906777976e-07, + "logits/chosen": -2.9749598503112793, + "logits/rejected": -2.968273162841797, + "logps/chosen": -71.00715637207031, + "logps/rejected": -45.43170166015625, + "loss": 0.6787, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.012844504788517952, + "rewards/margins": 0.030695322901010513, + "rewards/rejected": -0.01785081811249256, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 3.4784337109918504e-07, + "logits/chosen": -2.9603538513183594, + "logits/rejected": -2.945807933807373, + "logps/chosen": -70.25607299804688, + "logps/rejected": -46.45256042480469, + "loss": 0.6775, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.00953252986073494, + "rewards/margins": 0.030728604644536972, + "rewards/rejected": -0.02119607664644718, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 3.577817531305904e-07, + "logits/chosen": -2.9706406593322754, + "logits/rejected": -2.9509472846984863, + "logps/chosen": -71.34107971191406, + "logps/rejected": -47.943389892578125, + "loss": 0.676, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.009725799784064293, + "rewards/margins": 0.033471547067165375, + "rewards/rejected": -0.023745745420455933, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 3.6772013516199566e-07, + "logits/chosen": -2.989041805267334, + "logits/rejected": -2.948477268218994, + "logps/chosen": -68.99105834960938, + "logps/rejected": -47.76191711425781, + "loss": 0.6748, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.017860647290945053, + "rewards/margins": 0.03811539337038994, + "rewards/rejected": -0.020254749804735184, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 3.7765851719340094e-07, + "logits/chosen": -2.978956937789917, + "logits/rejected": -2.9634833335876465, + "logps/chosen": -77.84246826171875, + "logps/rejected": -48.146484375, + "loss": 0.6737, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.01408182829618454, + "rewards/margins": 0.039258528500795364, + "rewards/rejected": -0.025176703929901123, + "step": 380 + }, + { + "epoch": 0.02, + "learning_rate": 3.8759689922480623e-07, + "logits/chosen": -2.966442108154297, + "logits/rejected": -2.9499220848083496, + "logps/chosen": -65.14691925048828, + "logps/rejected": -48.054229736328125, + "loss": 0.6714, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.011923792771995068, + "rewards/margins": 0.04394835978746414, + "rewards/rejected": -0.0320245660841465, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 3.975352812562115e-07, + "logits/chosen": -2.9605329036712646, + "logits/rejected": -2.953305244445801, + "logps/chosen": -68.85978698730469, + "logps/rejected": -47.85978317260742, + "loss": 0.6701, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.017412390559911728, + "rewards/margins": 0.04696084186434746, + "rewards/rejected": -0.02954845502972603, + "step": 400 + }, + { + "epoch": 0.02, + "learning_rate": 4.074736632876168e-07, + "logits/chosen": -2.9576849937438965, + "logits/rejected": -2.967395305633545, + "logps/chosen": -68.44776916503906, + "logps/rejected": -47.39374542236328, + "loss": 0.6685, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.016429241746664047, + "rewards/margins": 0.051134128123521805, + "rewards/rejected": -0.03470488637685776, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.174120453190221e-07, + "logits/chosen": -2.9821112155914307, + "logits/rejected": -2.940983295440674, + "logps/chosen": -68.70052337646484, + "logps/rejected": -47.69420623779297, + "loss": 0.6646, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02265460416674614, + "rewards/margins": 0.059500087052583694, + "rewards/rejected": -0.036845482885837555, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.273504273504274e-07, + "logits/chosen": -3.0058252811431885, + "logits/rejected": -2.967651605606079, + "logps/chosen": -63.493621826171875, + "logps/rejected": -50.01903533935547, + "loss": 0.6639, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.013338056392967701, + "rewards/margins": 0.05726746469736099, + "rewards/rejected": -0.04392940551042557, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.372888093818327e-07, + "logits/chosen": -2.9821419715881348, + "logits/rejected": -2.9721193313598633, + "logps/chosen": -68.68019104003906, + "logps/rejected": -49.001522064208984, + "loss": 0.6605, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.028216874226927757, + "rewards/margins": 0.07037781924009323, + "rewards/rejected": -0.04216094687581062, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.47227191413238e-07, + "logits/chosen": -2.9543004035949707, + "logits/rejected": -2.9492108821868896, + "logps/chosen": -63.76306915283203, + "logps/rejected": -49.251956939697266, + "loss": 0.661, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.021893437951803207, + "rewards/margins": 0.06285889446735382, + "rewards/rejected": -0.04096546024084091, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.5716557344464327e-07, + "logits/chosen": -3.0126965045928955, + "logits/rejected": -2.9949119091033936, + "logps/chosen": -64.54737091064453, + "logps/rejected": -50.091007232666016, + "loss": 0.6539, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.027567267417907715, + "rewards/margins": 0.08035286515951157, + "rewards/rejected": -0.05278560519218445, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.6710395547604855e-07, + "logits/chosen": -2.958397626876831, + "logits/rejected": -2.927009344100952, + "logps/chosen": -63.194427490234375, + "logps/rejected": -47.510719299316406, + "loss": 0.654, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02065487764775753, + "rewards/margins": 0.07059844583272934, + "rewards/rejected": -0.04994357004761696, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.770423375074538e-07, + "logits/chosen": -2.9852523803710938, + "logits/rejected": -2.971374034881592, + "logps/chosen": -65.12055969238281, + "logps/rejected": -49.480201721191406, + "loss": 0.6494, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.023866429924964905, + "rewards/margins": 0.08510489761829376, + "rewards/rejected": -0.061238475143909454, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.869807195388592e-07, + "logits/chosen": -2.9966256618499756, + "logits/rejected": -2.949772357940674, + "logps/chosen": -65.80155944824219, + "logps/rejected": -49.00783157348633, + "loss": 0.6466, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.037832971662282944, + "rewards/margins": 0.09684345871210098, + "rewards/rejected": -0.05901048332452774, + "step": 490 + }, + { + "epoch": 0.03, + "learning_rate": 4.969191015702644e-07, + "logits/chosen": -2.9895145893096924, + "logits/rejected": -2.989184617996216, + "logps/chosen": -74.5199203491211, + "logps/rejected": -50.57038497924805, + "loss": 0.638, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0440087765455246, + "rewards/margins": 0.10899189859628677, + "rewards/rejected": -0.06498311460018158, + "step": 500 + }, + { + "epoch": 0.03, + "learning_rate": 5.068574836016696e-07, + "logits/chosen": -2.9883549213409424, + "logits/rejected": -2.9589343070983887, + "logps/chosen": -67.21178436279297, + "logps/rejected": -53.14472198486328, + "loss": 0.6411, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03456006571650505, + "rewards/margins": 0.11196110397577286, + "rewards/rejected": -0.07740103453397751, + "step": 510 + }, + { + "epoch": 0.03, + "learning_rate": 5.16795865633075e-07, + "logits/chosen": -2.9538991451263428, + "logits/rejected": -2.953151226043701, + "logps/chosen": -67.80831909179688, + "logps/rejected": -50.541709899902344, + "loss": 0.6381, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03886599838733673, + "rewards/margins": 0.10856328159570694, + "rewards/rejected": -0.0696972906589508, + "step": 520 + }, + { + "epoch": 0.03, + "learning_rate": 5.267342476644802e-07, + "logits/chosen": -2.987692356109619, + "logits/rejected": -2.989316940307617, + "logps/chosen": -67.93988800048828, + "logps/rejected": -52.96715545654297, + "loss": 0.6334, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.03496728092432022, + "rewards/margins": 0.12306027114391327, + "rewards/rejected": -0.08809299767017365, + "step": 530 + }, + { + "epoch": 0.03, + "learning_rate": 5.366726296958855e-07, + "logits/chosen": -2.9974300861358643, + "logits/rejected": -2.981309175491333, + "logps/chosen": -70.23783874511719, + "logps/rejected": -51.838661193847656, + "loss": 0.6317, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04535972699522972, + "rewards/margins": 0.13089272379875183, + "rewards/rejected": -0.08553299307823181, + "step": 540 + }, + { + "epoch": 0.03, + "learning_rate": 5.466110117272909e-07, + "logits/chosen": -2.9830803871154785, + "logits/rejected": -2.9559905529022217, + "logps/chosen": -65.56177520751953, + "logps/rejected": -53.15424346923828, + "loss": 0.6273, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.044293973594903946, + "rewards/margins": 0.14079733192920685, + "rewards/rejected": -0.096503347158432, + "step": 550 + }, + { + "epoch": 0.03, + "learning_rate": 5.565493937586961e-07, + "logits/chosen": -2.9877357482910156, + "logits/rejected": -3.011868715286255, + "logps/chosen": -63.41651153564453, + "logps/rejected": -55.3110237121582, + "loss": 0.6219, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.04214970022439957, + "rewards/margins": 0.14666105806827545, + "rewards/rejected": -0.10451134294271469, + "step": 560 + }, + { + "epoch": 0.03, + "learning_rate": 5.664877757901014e-07, + "logits/chosen": -3.0175890922546387, + "logits/rejected": -3.004194498062134, + "logps/chosen": -68.44681549072266, + "logps/rejected": -52.69337844848633, + "loss": 0.6137, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0691230297088623, + "rewards/margins": 0.16195707023143768, + "rewards/rejected": -0.09283401817083359, + "step": 570 + }, + { + "epoch": 0.03, + "learning_rate": 5.764261578215067e-07, + "logits/chosen": -2.9806010723114014, + "logits/rejected": -2.9613728523254395, + "logps/chosen": -60.87751007080078, + "logps/rejected": -56.103599548339844, + "loss": 0.6053, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05534251779317856, + "rewards/margins": 0.17521242797374725, + "rewards/rejected": -0.1198699101805687, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 5.86364539852912e-07, + "logits/chosen": -2.9623851776123047, + "logits/rejected": -2.9473297595977783, + "logps/chosen": -63.61543655395508, + "logps/rejected": -57.820289611816406, + "loss": 0.6044, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.061884354799985886, + "rewards/margins": 0.18909001350402832, + "rewards/rejected": -0.12720565497875214, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 5.963029218843172e-07, + "logits/chosen": -2.959725856781006, + "logits/rejected": -2.9625604152679443, + "logps/chosen": -62.54412078857422, + "logps/rejected": -57.79389190673828, + "loss": 0.5999, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.06699486076831818, + "rewards/margins": 0.18586081266403198, + "rewards/rejected": -0.1188659518957138, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 6.062413039157226e-07, + "logits/chosen": -2.987450361251831, + "logits/rejected": -2.979149341583252, + "logps/chosen": -59.422454833984375, + "logps/rejected": -56.3871955871582, + "loss": 0.5892, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08414442837238312, + "rewards/margins": 0.21681544184684753, + "rewards/rejected": -0.13267099857330322, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 6.161796859471279e-07, + "logits/chosen": -3.0046346187591553, + "logits/rejected": -2.965883731842041, + "logps/chosen": -56.12904739379883, + "logps/rejected": -59.7369499206543, + "loss": 0.5809, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09052859991788864, + "rewards/margins": 0.23301804065704346, + "rewards/rejected": -0.14248943328857422, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 6.26118067978533e-07, + "logits/chosen": -2.9555015563964844, + "logits/rejected": -2.942434310913086, + "logps/chosen": -62.20178985595703, + "logps/rejected": -61.55986785888672, + "loss": 0.5709, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09668537974357605, + "rewards/margins": 0.2660496234893799, + "rewards/rejected": -0.16936424374580383, + "step": 630 + }, + { + "epoch": 0.04, + "learning_rate": 6.360564500099384e-07, + "logits/chosen": -2.997354507446289, + "logits/rejected": -2.997156858444214, + "logps/chosen": -54.53004837036133, + "logps/rejected": -60.6444206237793, + "loss": 0.5697, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.09886881709098816, + "rewards/margins": 0.2593911290168762, + "rewards/rejected": -0.16052231192588806, + "step": 640 + }, + { + "epoch": 0.04, + "learning_rate": 6.459948320413437e-07, + "logits/chosen": -2.980217456817627, + "logits/rejected": -2.990428924560547, + "logps/chosen": -58.79888153076172, + "logps/rejected": -61.6186637878418, + "loss": 0.5596, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11014284193515778, + "rewards/margins": 0.27415168285369873, + "rewards/rejected": -0.16400885581970215, + "step": 650 + }, + { + "epoch": 0.04, + "learning_rate": 6.559332140727489e-07, + "logits/chosen": -2.9917922019958496, + "logits/rejected": -2.9781980514526367, + "logps/chosen": -57.63948440551758, + "logps/rejected": -63.41618728637695, + "loss": 0.5565, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10788984596729279, + "rewards/margins": 0.2862575650215149, + "rewards/rejected": -0.1783677190542221, + "step": 660 + }, + { + "epoch": 0.04, + "learning_rate": 6.658715961041543e-07, + "logits/chosen": -2.9772791862487793, + "logits/rejected": -2.9862213134765625, + "logps/chosen": -57.66252899169922, + "logps/rejected": -64.41493225097656, + "loss": 0.5469, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12357179820537567, + "rewards/margins": 0.31052836775779724, + "rewards/rejected": -0.18695658445358276, + "step": 670 + }, + { + "epoch": 0.04, + "learning_rate": 6.758099781355595e-07, + "logits/chosen": -3.0169644355773926, + "logits/rejected": -2.993161678314209, + "logps/chosen": -56.02703857421875, + "logps/rejected": -65.37163543701172, + "loss": 0.5342, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1470944881439209, + "rewards/margins": 0.3548891842365265, + "rewards/rejected": -0.20779471099376678, + "step": 680 + }, + { + "epoch": 0.04, + "learning_rate": 6.857483601669648e-07, + "logits/chosen": -3.004271984100342, + "logits/rejected": -2.9846913814544678, + "logps/chosen": -56.86962890625, + "logps/rejected": -66.35066986083984, + "loss": 0.5281, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.14802919328212738, + "rewards/margins": 0.3678736984729767, + "rewards/rejected": -0.2198444902896881, + "step": 690 + }, + { + "epoch": 0.04, + "learning_rate": 6.956867421983701e-07, + "logits/chosen": -2.9689719676971436, + "logits/rejected": -2.978224754333496, + "logps/chosen": -59.26942825317383, + "logps/rejected": -66.37487030029297, + "loss": 0.5215, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1530422568321228, + "rewards/margins": 0.37646859884262085, + "rewards/rejected": -0.22342631220817566, + "step": 700 + }, + { + "epoch": 0.04, + "learning_rate": 7.056251242297754e-07, + "logits/chosen": -2.975046396255493, + "logits/rejected": -2.9638750553131104, + "logps/chosen": -56.147735595703125, + "logps/rejected": -69.89857482910156, + "loss": 0.5144, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1424897015094757, + "rewards/margins": 0.4067215323448181, + "rewards/rejected": -0.2642318904399872, + "step": 710 + }, + { + "epoch": 0.04, + "learning_rate": 7.155635062611808e-07, + "logits/chosen": -2.9593985080718994, + "logits/rejected": -2.9972317218780518, + "logps/chosen": -51.78497314453125, + "logps/rejected": -68.33277893066406, + "loss": 0.5092, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1566731184720993, + "rewards/margins": 0.4000648558139801, + "rewards/rejected": -0.243391752243042, + "step": 720 + }, + { + "epoch": 0.04, + "learning_rate": 7.25501888292586e-07, + "logits/chosen": -2.9621880054473877, + "logits/rejected": -2.9895238876342773, + "logps/chosen": -54.41298294067383, + "logps/rejected": -72.35991668701172, + "loss": 0.5048, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.17735710740089417, + "rewards/margins": 0.4531319737434387, + "rewards/rejected": -0.27577486634254456, + "step": 730 + }, + { + "epoch": 0.04, + "learning_rate": 7.354402703239913e-07, + "logits/chosen": -2.976144790649414, + "logits/rejected": -2.984219551086426, + "logps/chosen": -53.3802375793457, + "logps/rejected": -70.50772857666016, + "loss": 0.4992, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16987410187721252, + "rewards/margins": 0.4365244507789612, + "rewards/rejected": -0.26665031909942627, + "step": 740 + }, + { + "epoch": 0.04, + "learning_rate": 7.453786523553966e-07, + "logits/chosen": -2.9507055282592773, + "logits/rejected": -2.9569320678710938, + "logps/chosen": -50.86923599243164, + "logps/rejected": -73.02778625488281, + "loss": 0.4886, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1691243201494217, + "rewards/margins": 0.4565269947052002, + "rewards/rejected": -0.2874026596546173, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 7.553170343868019e-07, + "logits/chosen": -2.9611563682556152, + "logits/rejected": -2.9940297603607178, + "logps/chosen": -49.2674560546875, + "logps/rejected": -75.46057891845703, + "loss": 0.4712, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19806522130966187, + "rewards/margins": 0.5186591148376465, + "rewards/rejected": -0.3205938935279846, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 7.652554164182071e-07, + "logits/chosen": -2.9476170539855957, + "logits/rejected": -2.9274778366088867, + "logps/chosen": -53.78412628173828, + "logps/rejected": -73.97071838378906, + "loss": 0.4823, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19257666170597076, + "rewards/margins": 0.4919704496860504, + "rewards/rejected": -0.29939383268356323, + "step": 770 + }, + { + "epoch": 0.05, + "learning_rate": 7.751937984496125e-07, + "logits/chosen": -2.962630033493042, + "logits/rejected": -2.9868996143341064, + "logps/chosen": -50.600746154785156, + "logps/rejected": -77.97933959960938, + "loss": 0.4689, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18909578025341034, + "rewards/margins": 0.5196400880813599, + "rewards/rejected": -0.33054429292678833, + "step": 780 + }, + { + "epoch": 0.05, + "learning_rate": 7.851321804810178e-07, + "logits/chosen": -2.962425470352173, + "logits/rejected": -2.942141532897949, + "logps/chosen": -48.71178436279297, + "logps/rejected": -78.54088592529297, + "loss": 0.457, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19333525002002716, + "rewards/margins": 0.5478498339653015, + "rewards/rejected": -0.35451453924179077, + "step": 790 + }, + { + "epoch": 0.05, + "learning_rate": 7.95070562512423e-07, + "logits/chosen": -2.9782376289367676, + "logits/rejected": -2.964660882949829, + "logps/chosen": -53.38645553588867, + "logps/rejected": -82.45108032226562, + "loss": 0.4534, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20937713980674744, + "rewards/margins": 0.5798531770706177, + "rewards/rejected": -0.37047603726387024, + "step": 800 + }, + { + "epoch": 0.05, + "learning_rate": 8.050089445438284e-07, + "logits/chosen": -2.9950320720672607, + "logits/rejected": -2.972954273223877, + "logps/chosen": -51.9129524230957, + "logps/rejected": -78.4679946899414, + "loss": 0.4583, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1914788782596588, + "rewards/margins": 0.5305585861206055, + "rewards/rejected": -0.33907973766326904, + "step": 810 + }, + { + "epoch": 0.05, + "learning_rate": 8.149473265752336e-07, + "logits/chosen": -3.0063626766204834, + "logits/rejected": -2.9628567695617676, + "logps/chosen": -53.054283142089844, + "logps/rejected": -83.8841323852539, + "loss": 0.4448, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2074771225452423, + "rewards/margins": 0.5962440967559814, + "rewards/rejected": -0.38876691460609436, + "step": 820 + }, + { + "epoch": 0.05, + "learning_rate": 8.248857086066389e-07, + "logits/chosen": -2.9732773303985596, + "logits/rejected": -2.9658243656158447, + "logps/chosen": -51.035011291503906, + "logps/rejected": -83.63090515136719, + "loss": 0.4384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19818072021007538, + "rewards/margins": 0.6051009297370911, + "rewards/rejected": -0.4069201946258545, + "step": 830 + }, + { + "epoch": 0.05, + "learning_rate": 8.348240906380442e-07, + "logits/chosen": -2.9848427772521973, + "logits/rejected": -2.9683117866516113, + "logps/chosen": -47.78346633911133, + "logps/rejected": -87.0458984375, + "loss": 0.4281, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21166002750396729, + "rewards/margins": 0.6392298936843872, + "rewards/rejected": -0.4275698661804199, + "step": 840 + }, + { + "epoch": 0.05, + "learning_rate": 8.447624726694495e-07, + "logits/chosen": -2.995678424835205, + "logits/rejected": -3.0024828910827637, + "logps/chosen": -54.467124938964844, + "logps/rejected": -86.72532653808594, + "loss": 0.4354, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19690290093421936, + "rewards/margins": 0.6241633892059326, + "rewards/rejected": -0.42726054787635803, + "step": 850 + }, + { + "epoch": 0.05, + "learning_rate": 8.547008547008548e-07, + "logits/chosen": -2.9429619312286377, + "logits/rejected": -2.950666904449463, + "logps/chosen": -49.288841247558594, + "logps/rejected": -85.37086486816406, + "loss": 0.4273, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2182169407606125, + "rewards/margins": 0.6259053945541382, + "rewards/rejected": -0.4076884388923645, + "step": 860 + }, + { + "epoch": 0.05, + "learning_rate": 8.646392367322601e-07, + "logits/chosen": -2.98612642288208, + "logits/rejected": -2.9788975715637207, + "logps/chosen": -44.57892990112305, + "logps/rejected": -87.64762878417969, + "loss": 0.4191, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2114826738834381, + "rewards/margins": 0.6498143076896667, + "rewards/rejected": -0.4383315443992615, + "step": 870 + }, + { + "epoch": 0.05, + "learning_rate": 8.745776187636654e-07, + "logits/chosen": -3.0083725452423096, + "logits/rejected": -2.9872288703918457, + "logps/chosen": -54.485618591308594, + "logps/rejected": -88.82099914550781, + "loss": 0.4126, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24242253601551056, + "rewards/margins": 0.6851190328598022, + "rewards/rejected": -0.4426964819431305, + "step": 880 + }, + { + "epoch": 0.05, + "learning_rate": 8.845160007950706e-07, + "logits/chosen": -3.002410888671875, + "logits/rejected": -2.9717094898223877, + "logps/chosen": -50.73870086669922, + "logps/rejected": -89.54593658447266, + "loss": 0.4084, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21918468177318573, + "rewards/margins": 0.6681510210037231, + "rewards/rejected": -0.44896626472473145, + "step": 890 + }, + { + "epoch": 0.05, + "learning_rate": 8.94454382826476e-07, + "logits/chosen": -2.9618163108825684, + "logits/rejected": -2.966247081756592, + "logps/chosen": -47.223793029785156, + "logps/rejected": -89.93421936035156, + "loss": 0.4139, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22395136952400208, + "rewards/margins": 0.6739547848701477, + "rewards/rejected": -0.450003445148468, + "step": 900 + }, + { + "epoch": 0.05, + "learning_rate": 9.043927648578812e-07, + "logits/chosen": -2.9758992195129395, + "logits/rejected": -2.955958843231201, + "logps/chosen": -49.3495979309082, + "logps/rejected": -90.64035034179688, + "loss": 0.4134, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22812247276306152, + "rewards/margins": 0.7028956413269043, + "rewards/rejected": -0.4747731685638428, + "step": 910 + }, + { + "epoch": 0.05, + "learning_rate": 9.143311468892865e-07, + "logits/chosen": -2.9744534492492676, + "logits/rejected": -3.012028455734253, + "logps/chosen": -50.84080505371094, + "logps/rejected": -94.28749084472656, + "loss": 0.3953, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2396964281797409, + "rewards/margins": 0.7375591993331909, + "rewards/rejected": -0.4978628158569336, + "step": 920 + }, + { + "epoch": 0.06, + "learning_rate": 9.242695289206919e-07, + "logits/chosen": -2.947758197784424, + "logits/rejected": -2.9638397693634033, + "logps/chosen": -50.96099090576172, + "logps/rejected": -89.93829345703125, + "loss": 0.4071, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23924145102500916, + "rewards/margins": 0.6867080926895142, + "rewards/rejected": -0.4474666714668274, + "step": 930 + }, + { + "epoch": 0.06, + "learning_rate": 9.342079109520971e-07, + "logits/chosen": -2.952387571334839, + "logits/rejected": -2.965949773788452, + "logps/chosen": -47.782405853271484, + "logps/rejected": -94.4518814086914, + "loss": 0.389, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23538902401924133, + "rewards/margins": 0.7464532852172852, + "rewards/rejected": -0.5110644102096558, + "step": 940 + }, + { + "epoch": 0.06, + "learning_rate": 9.441462929835024e-07, + "logits/chosen": -2.9822800159454346, + "logits/rejected": -2.975546360015869, + "logps/chosen": -40.84980773925781, + "logps/rejected": -92.09322357177734, + "loss": 0.398, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22563132643699646, + "rewards/margins": 0.7102110981941223, + "rewards/rejected": -0.48457974195480347, + "step": 950 + }, + { + "epoch": 0.06, + "learning_rate": 9.540846750149077e-07, + "logits/chosen": -2.969069004058838, + "logits/rejected": -2.962684154510498, + "logps/chosen": -47.630165100097656, + "logps/rejected": -97.15986633300781, + "loss": 0.3899, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23567290604114532, + "rewards/margins": 0.7591179609298706, + "rewards/rejected": -0.5234450697898865, + "step": 960 + }, + { + "epoch": 0.06, + "learning_rate": 9.64023057046313e-07, + "logits/chosen": -2.9731202125549316, + "logits/rejected": -2.9434409141540527, + "logps/chosen": -42.85427474975586, + "logps/rejected": -93.60237121582031, + "loss": 0.3852, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24563130736351013, + "rewards/margins": 0.7461942434310913, + "rewards/rejected": -0.5005629062652588, + "step": 970 + }, + { + "epoch": 0.06, + "learning_rate": 9.739614390777183e-07, + "logits/chosen": -2.9469094276428223, + "logits/rejected": -2.98797345161438, + "logps/chosen": -48.71785354614258, + "logps/rejected": -94.83198547363281, + "loss": 0.3875, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24081316590309143, + "rewards/margins": 0.7389912009239197, + "rewards/rejected": -0.49817800521850586, + "step": 980 + }, + { + "epoch": 0.06, + "learning_rate": 9.838998211091236e-07, + "logits/chosen": -2.95385479927063, + "logits/rejected": -2.9593377113342285, + "logps/chosen": -45.76028060913086, + "logps/rejected": -96.6683578491211, + "loss": 0.3801, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24966220557689667, + "rewards/margins": 0.765512228012085, + "rewards/rejected": -0.5158500671386719, + "step": 990 + }, + { + "epoch": 0.06, + "learning_rate": 9.938382031405288e-07, + "logits/chosen": -2.9899439811706543, + "logits/rejected": -2.990107297897339, + "logps/chosen": -46.114768981933594, + "logps/rejected": -99.82188415527344, + "loss": 0.3742, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24618954956531525, + "rewards/margins": 0.786666750907898, + "rewards/rejected": -0.5404771566390991, + "step": 1000 + }, + { + "epoch": 0.06, + "learning_rate": 1.0037765851719342e-06, + "logits/chosen": -2.9668798446655273, + "logits/rejected": -2.9723763465881348, + "logps/chosen": -44.59617233276367, + "logps/rejected": -97.80751037597656, + "loss": 0.3802, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24568989872932434, + "rewards/margins": 0.7936664819717407, + "rewards/rejected": -0.5479766130447388, + "step": 1010 + }, + { + "epoch": 0.06, + "learning_rate": 1.0137149672033393e-06, + "logits/chosen": -2.961395740509033, + "logits/rejected": -2.951077699661255, + "logps/chosen": -44.144432067871094, + "logps/rejected": -99.34906768798828, + "loss": 0.3704, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2572305500507355, + "rewards/margins": 0.811815083026886, + "rewards/rejected": -0.5545845031738281, + "step": 1020 + }, + { + "epoch": 0.06, + "learning_rate": 1.0236533492347447e-06, + "logits/chosen": -2.9847769737243652, + "logits/rejected": -2.9409685134887695, + "logps/chosen": -42.09632873535156, + "logps/rejected": -99.50137329101562, + "loss": 0.3703, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25040099024772644, + "rewards/margins": 0.7920783758163452, + "rewards/rejected": -0.5416773557662964, + "step": 1030 + }, + { + "epoch": 0.06, + "learning_rate": 1.03359173126615e-06, + "logits/chosen": -2.9414637088775635, + "logits/rejected": -2.908144474029541, + "logps/chosen": -46.711570739746094, + "logps/rejected": -98.54896545410156, + "loss": 0.3675, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2561383545398712, + "rewards/margins": 0.8060194849967957, + "rewards/rejected": -0.549881100654602, + "step": 1040 + }, + { + "epoch": 0.06, + "learning_rate": 1.0435301132975552e-06, + "logits/chosen": -2.983294725418091, + "logits/rejected": -2.95699143409729, + "logps/chosen": -46.20642852783203, + "logps/rejected": -102.4758529663086, + "loss": 0.3605, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2601773738861084, + "rewards/margins": 0.8497975468635559, + "rewards/rejected": -0.5896202325820923, + "step": 1050 + }, + { + "epoch": 0.06, + "learning_rate": 1.0534684953289604e-06, + "logits/chosen": -2.954195976257324, + "logits/rejected": -2.9083893299102783, + "logps/chosen": -45.27033233642578, + "logps/rejected": -104.21604919433594, + "loss": 0.3598, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23958437144756317, + "rewards/margins": 0.8385517001152039, + "rewards/rejected": -0.5989673137664795, + "step": 1060 + }, + { + "epoch": 0.06, + "learning_rate": 1.0634068773603658e-06, + "logits/chosen": -2.946291923522949, + "logits/rejected": -2.9133691787719727, + "logps/chosen": -43.65072250366211, + "logps/rejected": -102.16130828857422, + "loss": 0.3612, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2648928165435791, + "rewards/margins": 0.8436846733093262, + "rewards/rejected": -0.5787917971611023, + "step": 1070 + }, + { + "epoch": 0.06, + "learning_rate": 1.073345259391771e-06, + "logits/chosen": -2.942638874053955, + "logits/rejected": -2.9335644245147705, + "logps/chosen": -48.75079345703125, + "logps/rejected": -103.7171859741211, + "loss": 0.3528, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26143261790275574, + "rewards/margins": 0.8594480752944946, + "rewards/rejected": -0.5980154275894165, + "step": 1080 + }, + { + "epoch": 0.06, + "learning_rate": 1.0832836414231763e-06, + "logits/chosen": -2.9323132038116455, + "logits/rejected": -2.913806676864624, + "logps/chosen": -48.25647735595703, + "logps/rejected": -101.50083923339844, + "loss": 0.3559, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25861674547195435, + "rewards/margins": 0.8367189168930054, + "rewards/rejected": -0.5781022310256958, + "step": 1090 + }, + { + "epoch": 0.07, + "learning_rate": 1.0932220234545817e-06, + "logits/chosen": -2.9763708114624023, + "logits/rejected": -2.9589896202087402, + "logps/chosen": -41.532142639160156, + "logps/rejected": -105.98603820800781, + "loss": 0.3544, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26403117179870605, + "rewards/margins": 0.8827972412109375, + "rewards/rejected": -0.6187661290168762, + "step": 1100 + }, + { + "epoch": 0.07, + "learning_rate": 1.103160405485987e-06, + "logits/chosen": -2.9788546562194824, + "logits/rejected": -2.9246952533721924, + "logps/chosen": -44.79595184326172, + "logps/rejected": -105.32807922363281, + "loss": 0.355, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.267597496509552, + "rewards/margins": 0.8784884214401245, + "rewards/rejected": -0.6108909845352173, + "step": 1110 + }, + { + "epoch": 0.07, + "learning_rate": 1.1130987875173922e-06, + "logits/chosen": -2.8998022079467773, + "logits/rejected": -2.891361713409424, + "logps/chosen": -43.60729217529297, + "logps/rejected": -106.59000396728516, + "loss": 0.3453, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26675909757614136, + "rewards/margins": 0.8832080960273743, + "rewards/rejected": -0.6164489984512329, + "step": 1120 + }, + { + "epoch": 0.07, + "learning_rate": 1.1230371695487974e-06, + "logits/chosen": -2.9689712524414062, + "logits/rejected": -2.9361870288848877, + "logps/chosen": -41.960838317871094, + "logps/rejected": -109.47734069824219, + "loss": 0.3413, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2581261098384857, + "rewards/margins": 0.9055348634719849, + "rewards/rejected": -0.6474087238311768, + "step": 1130 + }, + { + "epoch": 0.07, + "learning_rate": 1.1329755515802029e-06, + "logits/chosen": -2.957937717437744, + "logits/rejected": -2.9321389198303223, + "logps/chosen": -37.86954879760742, + "logps/rejected": -104.15303039550781, + "loss": 0.3457, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28091493248939514, + "rewards/margins": 0.8806669116020203, + "rewards/rejected": -0.5997520685195923, + "step": 1140 + }, + { + "epoch": 0.07, + "learning_rate": 1.1429139336116081e-06, + "logits/chosen": -2.982106924057007, + "logits/rejected": -2.9367918968200684, + "logps/chosen": -43.054290771484375, + "logps/rejected": -110.64491271972656, + "loss": 0.3317, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30060526728630066, + "rewards/margins": 0.958831787109375, + "rewards/rejected": -0.6582265496253967, + "step": 1150 + }, + { + "epoch": 0.07, + "learning_rate": 1.1528523156430133e-06, + "logits/chosen": -2.9483280181884766, + "logits/rejected": -2.916172742843628, + "logps/chosen": -44.473876953125, + "logps/rejected": -112.30595397949219, + "loss": 0.3354, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27052047848701477, + "rewards/margins": 0.9437543153762817, + "rewards/rejected": -0.6732339262962341, + "step": 1160 + }, + { + "epoch": 0.07, + "learning_rate": 1.1627906976744188e-06, + "logits/chosen": -2.9228413105010986, + "logits/rejected": -2.892449140548706, + "logps/chosen": -45.81367874145508, + "logps/rejected": -110.29158020019531, + "loss": 0.3305, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2775477170944214, + "rewards/margins": 0.9275795817375183, + "rewards/rejected": -0.6500318646430969, + "step": 1170 + }, + { + "epoch": 0.07, + "learning_rate": 1.172729079705824e-06, + "logits/chosen": -2.916384220123291, + "logits/rejected": -2.8964712619781494, + "logps/chosen": -48.085121154785156, + "logps/rejected": -111.40904235839844, + "loss": 0.3252, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2755643427371979, + "rewards/margins": 0.9488896131515503, + "rewards/rejected": -0.6733254194259644, + "step": 1180 + }, + { + "epoch": 0.07, + "learning_rate": 1.1826674617372293e-06, + "logits/chosen": -2.96451997756958, + "logits/rejected": -2.929577112197876, + "logps/chosen": -42.83641815185547, + "logps/rejected": -112.46778869628906, + "loss": 0.3209, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29049235582351685, + "rewards/margins": 0.96845543384552, + "rewards/rejected": -0.677963137626648, + "step": 1190 + }, + { + "epoch": 0.07, + "learning_rate": 1.1926058437686345e-06, + "logits/chosen": -2.9245667457580566, + "logits/rejected": -2.912393093109131, + "logps/chosen": -42.31415557861328, + "logps/rejected": -114.44093322753906, + "loss": 0.3216, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.276333749294281, + "rewards/margins": 0.9857515096664429, + "rewards/rejected": -0.7094178795814514, + "step": 1200 + }, + { + "epoch": 0.07, + "learning_rate": 1.20254422580004e-06, + "logits/chosen": -2.9792442321777344, + "logits/rejected": -2.9402084350585938, + "logps/chosen": -45.81867218017578, + "logps/rejected": -118.02745056152344, + "loss": 0.3081, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2988472878932953, + "rewards/margins": 1.038311243057251, + "rewards/rejected": -0.7394639849662781, + "step": 1210 + }, + { + "epoch": 0.07, + "learning_rate": 1.2124826078314452e-06, + "logits/chosen": -2.9441699981689453, + "logits/rejected": -2.9214982986450195, + "logps/chosen": -46.35295867919922, + "logps/rejected": -116.0353775024414, + "loss": 0.3119, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.27399203181266785, + "rewards/margins": 0.9982835650444031, + "rewards/rejected": -0.7242916226387024, + "step": 1220 + }, + { + "epoch": 0.07, + "learning_rate": 1.2224209898628504e-06, + "logits/chosen": -2.9310688972473145, + "logits/rejected": -2.8821468353271484, + "logps/chosen": -44.355873107910156, + "logps/rejected": -117.6589126586914, + "loss": 0.3222, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2562260329723358, + "rewards/margins": 0.9979751706123352, + "rewards/rejected": -0.741749107837677, + "step": 1230 + }, + { + "epoch": 0.07, + "learning_rate": 1.2323593718942558e-06, + "logits/chosen": -2.9453577995300293, + "logits/rejected": -2.906303882598877, + "logps/chosen": -44.794151306152344, + "logps/rejected": -120.19110107421875, + "loss": 0.311, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28007739782333374, + "rewards/margins": 1.0430974960327148, + "rewards/rejected": -0.7630199790000916, + "step": 1240 + }, + { + "epoch": 0.07, + "learning_rate": 1.242297753925661e-06, + "logits/chosen": -2.924051523208618, + "logits/rejected": -2.8995883464813232, + "logps/chosen": -36.76924133300781, + "logps/rejected": -119.93827056884766, + "loss": 0.2994, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29065531492233276, + "rewards/margins": 1.0389933586120605, + "rewards/rejected": -0.7483380436897278, + "step": 1250 + }, + { + "epoch": 0.08, + "learning_rate": 1.252236135957066e-06, + "logits/chosen": -2.9887847900390625, + "logits/rejected": -2.9285659790039062, + "logps/chosen": -42.90056228637695, + "logps/rejected": -123.27613830566406, + "loss": 0.2959, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29128286242485046, + "rewards/margins": 1.0795278549194336, + "rewards/rejected": -0.7882449626922607, + "step": 1260 + }, + { + "epoch": 0.08, + "learning_rate": 1.2621745179884715e-06, + "logits/chosen": -2.964524269104004, + "logits/rejected": -2.8739566802978516, + "logps/chosen": -44.307228088378906, + "logps/rejected": -124.32806396484375, + "loss": 0.2895, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.307515412569046, + "rewards/margins": 1.120300054550171, + "rewards/rejected": -0.8127846717834473, + "step": 1270 + }, + { + "epoch": 0.08, + "learning_rate": 1.2721129000198768e-06, + "logits/chosen": -2.9385740756988525, + "logits/rejected": -2.864626884460449, + "logps/chosen": -45.3248291015625, + "logps/rejected": -124.88838195800781, + "loss": 0.2951, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29406994581222534, + "rewards/margins": 1.110584020614624, + "rewards/rejected": -0.8165140151977539, + "step": 1280 + }, + { + "epoch": 0.08, + "learning_rate": 1.282051282051282e-06, + "logits/chosen": -2.929049015045166, + "logits/rejected": -2.887411594390869, + "logps/chosen": -40.715126037597656, + "logps/rejected": -127.87248229980469, + "loss": 0.2861, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3066486716270447, + "rewards/margins": 1.1390666961669922, + "rewards/rejected": -0.8324179649353027, + "step": 1290 + }, + { + "epoch": 0.08, + "learning_rate": 1.2919896640826874e-06, + "logits/chosen": -2.9244682788848877, + "logits/rejected": -2.9236178398132324, + "logps/chosen": -39.14201736450195, + "logps/rejected": -129.1346435546875, + "loss": 0.2815, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2926309108734131, + "rewards/margins": 1.1283031702041626, + "rewards/rejected": -0.8356720805168152, + "step": 1300 + }, + { + "epoch": 0.08, + "learning_rate": 1.3019280461140927e-06, + "logits/chosen": -2.970465660095215, + "logits/rejected": -2.9091796875, + "logps/chosen": -45.896629333496094, + "logps/rejected": -129.3776092529297, + "loss": 0.2784, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30494052171707153, + "rewards/margins": 1.162015438079834, + "rewards/rejected": -0.8570748567581177, + "step": 1310 + }, + { + "epoch": 0.08, + "learning_rate": 1.3118664281454979e-06, + "logits/chosen": -2.9044699668884277, + "logits/rejected": -2.849128007888794, + "logps/chosen": -40.792999267578125, + "logps/rejected": -131.70736694335938, + "loss": 0.2736, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2985122501850128, + "rewards/margins": 1.1631443500518799, + "rewards/rejected": -0.8646320104598999, + "step": 1320 + }, + { + "epoch": 0.08, + "learning_rate": 1.3218048101769031e-06, + "logits/chosen": -2.9164700508117676, + "logits/rejected": -2.8974251747131348, + "logps/chosen": -46.31407928466797, + "logps/rejected": -134.1363525390625, + "loss": 0.2672, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2929149270057678, + "rewards/margins": 1.184870958328247, + "rewards/rejected": -0.8919559717178345, + "step": 1330 + }, + { + "epoch": 0.08, + "learning_rate": 1.3317431922083086e-06, + "logits/chosen": -2.9394633769989014, + "logits/rejected": -2.9111828804016113, + "logps/chosen": -36.217926025390625, + "logps/rejected": -130.59249877929688, + "loss": 0.2695, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3006562888622284, + "rewards/margins": 1.1630761623382568, + "rewards/rejected": -0.8624197840690613, + "step": 1340 + }, + { + "epoch": 0.08, + "learning_rate": 1.3416815742397138e-06, + "logits/chosen": -2.9409735202789307, + "logits/rejected": -2.8888020515441895, + "logps/chosen": -40.7383918762207, + "logps/rejected": -137.83474731445312, + "loss": 0.2595, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.298238068819046, + "rewards/margins": 1.2299134731292725, + "rewards/rejected": -0.9316754341125488, + "step": 1350 + }, + { + "epoch": 0.08, + "learning_rate": 1.351619956271119e-06, + "logits/chosen": -2.9436936378479004, + "logits/rejected": -2.8949098587036133, + "logps/chosen": -45.67042541503906, + "logps/rejected": -140.38766479492188, + "loss": 0.2507, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3088319003582001, + "rewards/margins": 1.2657891511917114, + "rewards/rejected": -0.956957221031189, + "step": 1360 + }, + { + "epoch": 0.08, + "learning_rate": 1.3615583383025245e-06, + "logits/chosen": -2.9387409687042236, + "logits/rejected": -2.8832027912139893, + "logps/chosen": -34.867549896240234, + "logps/rejected": -140.9546661376953, + "loss": 0.2558, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29934054613113403, + "rewards/margins": 1.2633939981460571, + "rewards/rejected": -0.9640534520149231, + "step": 1370 + }, + { + "epoch": 0.08, + "learning_rate": 1.3714967203339297e-06, + "logits/chosen": -2.9165754318237305, + "logits/rejected": -2.908573865890503, + "logps/chosen": -38.23236083984375, + "logps/rejected": -140.98251342773438, + "loss": 0.2546, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3026297688484192, + "rewards/margins": 1.2641355991363525, + "rewards/rejected": -0.9615060091018677, + "step": 1380 + }, + { + "epoch": 0.08, + "learning_rate": 1.381435102365335e-06, + "logits/chosen": -2.925297498703003, + "logits/rejected": -2.878129720687866, + "logps/chosen": -39.85452651977539, + "logps/rejected": -141.86251831054688, + "loss": 0.2534, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.304016649723053, + "rewards/margins": 1.2717875242233276, + "rewards/rejected": -0.9677708745002747, + "step": 1390 + }, + { + "epoch": 0.08, + "learning_rate": 1.3913734843967402e-06, + "logits/chosen": -2.924449920654297, + "logits/rejected": -2.904493808746338, + "logps/chosen": -37.08269500732422, + "logps/rejected": -140.50038146972656, + "loss": 0.2436, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30948275327682495, + "rewards/margins": 1.2736009359359741, + "rewards/rejected": -0.9641181826591492, + "step": 1400 + }, + { + "epoch": 0.08, + "learning_rate": 1.4013118664281456e-06, + "logits/chosen": -2.967745542526245, + "logits/rejected": -2.896939754486084, + "logps/chosen": -37.66357421875, + "logps/rejected": -142.93434143066406, + "loss": 0.2496, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2874212861061096, + "rewards/margins": 1.2640149593353271, + "rewards/rejected": -0.9765936732292175, + "step": 1410 + }, + { + "epoch": 0.08, + "learning_rate": 1.4112502484595508e-06, + "logits/chosen": -2.951911687850952, + "logits/rejected": -2.8962619304656982, + "logps/chosen": -40.64483642578125, + "logps/rejected": -145.44163513183594, + "loss": 0.2384, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31696954369544983, + "rewards/margins": 1.3312621116638184, + "rewards/rejected": -1.0142927169799805, + "step": 1420 + }, + { + "epoch": 0.09, + "learning_rate": 1.421188630490956e-06, + "logits/chosen": -2.9361109733581543, + "logits/rejected": -2.8758249282836914, + "logps/chosen": -38.79964065551758, + "logps/rejected": -146.67324829101562, + "loss": 0.2394, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29289090633392334, + "rewards/margins": 1.3078114986419678, + "rewards/rejected": -1.0149204730987549, + "step": 1430 + }, + { + "epoch": 0.09, + "learning_rate": 1.4311270125223615e-06, + "logits/chosen": -2.9488155841827393, + "logits/rejected": -2.9059524536132812, + "logps/chosen": -46.274288177490234, + "logps/rejected": -147.9485626220703, + "loss": 0.2328, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30536288022994995, + "rewards/margins": 1.331538200378418, + "rewards/rejected": -1.0261752605438232, + "step": 1440 + }, + { + "epoch": 0.09, + "learning_rate": 1.4410653945537667e-06, + "logits/chosen": -2.9230549335479736, + "logits/rejected": -2.878357172012329, + "logps/chosen": -47.830711364746094, + "logps/rejected": -143.77590942382812, + "loss": 0.2377, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2835736870765686, + "rewards/margins": 1.2721548080444336, + "rewards/rejected": -0.9885808825492859, + "step": 1450 + }, + { + "epoch": 0.09, + "learning_rate": 1.451003776585172e-06, + "logits/chosen": -2.956698179244995, + "logits/rejected": -2.8707072734832764, + "logps/chosen": -38.53097152709961, + "logps/rejected": -148.80825805664062, + "loss": 0.2253, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3125065863132477, + "rewards/margins": 1.376301646232605, + "rewards/rejected": -1.0637949705123901, + "step": 1460 + }, + { + "epoch": 0.09, + "learning_rate": 1.4609421586165772e-06, + "logits/chosen": -2.9724314212799072, + "logits/rejected": -2.8991143703460693, + "logps/chosen": -42.491878509521484, + "logps/rejected": -151.6505584716797, + "loss": 0.2232, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.31689000129699707, + "rewards/margins": 1.3956716060638428, + "rewards/rejected": -1.0787817239761353, + "step": 1470 + }, + { + "epoch": 0.09, + "learning_rate": 1.4708805406479826e-06, + "logits/chosen": -2.9285428524017334, + "logits/rejected": -2.8855035305023193, + "logps/chosen": -39.49323272705078, + "logps/rejected": -149.090087890625, + "loss": 0.2295, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28731364011764526, + "rewards/margins": 1.3422085046768188, + "rewards/rejected": -1.054895043373108, + "step": 1480 + }, + { + "epoch": 0.09, + "learning_rate": 1.4808189226793879e-06, + "logits/chosen": -2.927196741104126, + "logits/rejected": -2.874084234237671, + "logps/chosen": -44.55095291137695, + "logps/rejected": -156.89939880371094, + "loss": 0.2285, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2680080533027649, + "rewards/margins": 1.3889793157577515, + "rewards/rejected": -1.1209713220596313, + "step": 1490 + }, + { + "epoch": 0.09, + "learning_rate": 1.490757304710793e-06, + "logits/chosen": -2.9545087814331055, + "logits/rejected": -2.923438787460327, + "logps/chosen": -44.786529541015625, + "logps/rejected": -153.52267456054688, + "loss": 0.2244, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28377148509025574, + "rewards/margins": 1.3718507289886475, + "rewards/rejected": -1.0880792140960693, + "step": 1500 + }, + { + "epoch": 0.09, + "learning_rate": 1.5006956867421985e-06, + "logits/chosen": -2.9451799392700195, + "logits/rejected": -2.8646039962768555, + "logps/chosen": -41.443214416503906, + "logps/rejected": -160.72991943359375, + "loss": 0.2126, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30741560459136963, + "rewards/margins": 1.4733822345733643, + "rewards/rejected": -1.1659666299819946, + "step": 1510 + }, + { + "epoch": 0.09, + "learning_rate": 1.5106340687736038e-06, + "logits/chosen": -2.917659282684326, + "logits/rejected": -2.8530144691467285, + "logps/chosen": -41.021400451660156, + "logps/rejected": -160.82330322265625, + "loss": 0.2154, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.30029305815696716, + "rewards/margins": 1.468364953994751, + "rewards/rejected": -1.168071985244751, + "step": 1520 + }, + { + "epoch": 0.09, + "learning_rate": 1.520572450805009e-06, + "logits/chosen": -2.9592232704162598, + "logits/rejected": -2.890079975128174, + "logps/chosen": -43.687782287597656, + "logps/rejected": -158.5913543701172, + "loss": 0.216, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.3011111617088318, + "rewards/margins": 1.4519691467285156, + "rewards/rejected": -1.1508580446243286, + "step": 1530 + }, + { + "epoch": 0.09, + "learning_rate": 1.5305108328364142e-06, + "logits/chosen": -2.949289083480835, + "logits/rejected": -2.909923791885376, + "logps/chosen": -42.339317321777344, + "logps/rejected": -158.1796875, + "loss": 0.2175, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2839142978191376, + "rewards/margins": 1.438750982284546, + "rewards/rejected": -1.154836654663086, + "step": 1540 + }, + { + "epoch": 0.09, + "learning_rate": 1.5404492148678197e-06, + "logits/chosen": -2.947479724884033, + "logits/rejected": -2.9036335945129395, + "logps/chosen": -35.69739532470703, + "logps/rejected": -160.46084594726562, + "loss": 0.2151, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2824109196662903, + "rewards/margins": 1.453467845916748, + "rewards/rejected": -1.1710569858551025, + "step": 1550 + }, + { + "epoch": 0.09, + "learning_rate": 1.550387596899225e-06, + "logits/chosen": -2.907451629638672, + "logits/rejected": -2.8666439056396484, + "logps/chosen": -36.6124267578125, + "logps/rejected": -167.05599975585938, + "loss": 0.2047, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.29000118374824524, + "rewards/margins": 1.519871473312378, + "rewards/rejected": -1.229870319366455, + "step": 1560 + }, + { + "epoch": 0.09, + "learning_rate": 1.5603259789306301e-06, + "logits/chosen": -2.939171314239502, + "logits/rejected": -2.8935093879699707, + "logps/chosen": -45.197120666503906, + "logps/rejected": -165.12063598632812, + "loss": 0.2068, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2768077552318573, + "rewards/margins": 1.4892899990081787, + "rewards/rejected": -1.2124820947647095, + "step": 1570 + }, + { + "epoch": 0.09, + "learning_rate": 1.5702643609620356e-06, + "logits/chosen": -2.931025505065918, + "logits/rejected": -2.89253306388855, + "logps/chosen": -41.273170471191406, + "logps/rejected": -166.5309295654297, + "loss": 0.1976, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28405338525772095, + "rewards/margins": 1.5089828968048096, + "rewards/rejected": -1.2249294519424438, + "step": 1580 + }, + { + "epoch": 0.09, + "learning_rate": 1.5802027429934408e-06, + "logits/chosen": -2.936065435409546, + "logits/rejected": -2.8526697158813477, + "logps/chosen": -47.48405075073242, + "logps/rejected": -169.52169799804688, + "loss": 0.2043, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2552075982093811, + "rewards/margins": 1.5022130012512207, + "rewards/rejected": -1.2470054626464844, + "step": 1590 + }, + { + "epoch": 0.1, + "learning_rate": 1.590141125024846e-06, + "logits/chosen": -2.9385826587677, + "logits/rejected": -2.869152784347534, + "logps/chosen": -40.14501953125, + "logps/rejected": -171.1200714111328, + "loss": 0.1936, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2854040861129761, + "rewards/margins": 1.5473027229309082, + "rewards/rejected": -1.2618986368179321, + "step": 1600 + }, + { + "epoch": 0.1, + "learning_rate": 1.6000795070562513e-06, + "logits/chosen": -2.950686454772949, + "logits/rejected": -2.8969125747680664, + "logps/chosen": -48.180946350097656, + "logps/rejected": -168.5851593017578, + "loss": 0.1932, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2918631434440613, + "rewards/margins": 1.531005620956421, + "rewards/rejected": -1.2391424179077148, + "step": 1610 + }, + { + "epoch": 0.1, + "learning_rate": 1.6100178890876567e-06, + "logits/chosen": -2.9565420150756836, + "logits/rejected": -2.8605005741119385, + "logps/chosen": -42.319358825683594, + "logps/rejected": -166.67636108398438, + "loss": 0.2047, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2758972942829132, + "rewards/margins": 1.5064362287521362, + "rewards/rejected": -1.2305389642715454, + "step": 1620 + }, + { + "epoch": 0.1, + "learning_rate": 1.619956271119062e-06, + "logits/chosen": -2.914909601211548, + "logits/rejected": -2.831927537918091, + "logps/chosen": -42.313499450683594, + "logps/rejected": -167.9279327392578, + "loss": 0.2067, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2684255540370941, + "rewards/margins": 1.5069520473480225, + "rewards/rejected": -1.238526463508606, + "step": 1630 + }, + { + "epoch": 0.1, + "learning_rate": 1.6298946531504672e-06, + "logits/chosen": -2.955915927886963, + "logits/rejected": -2.9154694080352783, + "logps/chosen": -42.53447341918945, + "logps/rejected": -173.2788848876953, + "loss": 0.2007, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2627255320549011, + "rewards/margins": 1.5596225261688232, + "rewards/rejected": -1.2968969345092773, + "step": 1640 + }, + { + "epoch": 0.1, + "learning_rate": 1.6398330351818726e-06, + "logits/chosen": -2.938546657562256, + "logits/rejected": -2.864445924758911, + "logps/chosen": -46.61555480957031, + "logps/rejected": -171.0252227783203, + "loss": 0.1991, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23432859778404236, + "rewards/margins": 1.502878189086914, + "rewards/rejected": -1.2685496807098389, + "step": 1650 + }, + { + "epoch": 0.1, + "learning_rate": 1.6497714172132779e-06, + "logits/chosen": -2.9387500286102295, + "logits/rejected": -2.8690807819366455, + "logps/chosen": -45.87975311279297, + "logps/rejected": -176.9271240234375, + "loss": 0.186, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2607995867729187, + "rewards/margins": 1.595625400543213, + "rewards/rejected": -1.3348257541656494, + "step": 1660 + }, + { + "epoch": 0.1, + "learning_rate": 1.659709799244683e-06, + "logits/chosen": -2.9080569744110107, + "logits/rejected": -2.8466291427612305, + "logps/chosen": -41.54750442504883, + "logps/rejected": -173.8631134033203, + "loss": 0.1985, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25337880849838257, + "rewards/margins": 1.550815224647522, + "rewards/rejected": -1.2974361181259155, + "step": 1670 + }, + { + "epoch": 0.1, + "learning_rate": 1.6696481812760883e-06, + "logits/chosen": -2.9300200939178467, + "logits/rejected": -2.913851261138916, + "logps/chosen": -44.24148178100586, + "logps/rejected": -178.66310119628906, + "loss": 0.1957, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.26822495460510254, + "rewards/margins": 1.6096464395523071, + "rewards/rejected": -1.341421365737915, + "step": 1680 + }, + { + "epoch": 0.1, + "learning_rate": 1.6795865633074938e-06, + "logits/chosen": -2.9271740913391113, + "logits/rejected": -2.8629698753356934, + "logps/chosen": -44.854530334472656, + "logps/rejected": -178.25804138183594, + "loss": 0.1806, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2493271380662918, + "rewards/margins": 1.594040036201477, + "rewards/rejected": -1.3447128534317017, + "step": 1690 + }, + { + "epoch": 0.1, + "learning_rate": 1.689524945338899e-06, + "logits/chosen": -2.9085564613342285, + "logits/rejected": -2.8497085571289062, + "logps/chosen": -46.561439514160156, + "logps/rejected": -178.12074279785156, + "loss": 0.1932, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23066172003746033, + "rewards/margins": 1.5638407468795776, + "rewards/rejected": -1.333178997039795, + "step": 1700 + }, + { + "epoch": 0.1, + "learning_rate": 1.6994633273703042e-06, + "logits/chosen": -2.912637233734131, + "logits/rejected": -2.8519294261932373, + "logps/chosen": -38.837989807128906, + "logps/rejected": -178.4434051513672, + "loss": 0.1892, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2649097144603729, + "rewards/margins": 1.5986964702606201, + "rewards/rejected": -1.3337867259979248, + "step": 1710 + }, + { + "epoch": 0.1, + "learning_rate": 1.7094017094017097e-06, + "logits/chosen": -2.9310035705566406, + "logits/rejected": -2.864668607711792, + "logps/chosen": -47.33824920654297, + "logps/rejected": -184.09056091308594, + "loss": 0.1854, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24403885006904602, + "rewards/margins": 1.633710265159607, + "rewards/rejected": -1.3896714448928833, + "step": 1720 + }, + { + "epoch": 0.1, + "learning_rate": 1.7193400914331149e-06, + "logits/chosen": -2.944894552230835, + "logits/rejected": -2.866276979446411, + "logps/chosen": -45.86943054199219, + "logps/rejected": -182.3990478515625, + "loss": 0.1771, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24304766952991486, + "rewards/margins": 1.6313107013702393, + "rewards/rejected": -1.3882629871368408, + "step": 1730 + }, + { + "epoch": 0.1, + "learning_rate": 1.7292784734645201e-06, + "logits/chosen": -2.9287362098693848, + "logits/rejected": -2.8648197650909424, + "logps/chosen": -48.32317352294922, + "logps/rejected": -179.72482299804688, + "loss": 0.1848, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2619437873363495, + "rewards/margins": 1.6352773904800415, + "rewards/rejected": -1.37333345413208, + "step": 1740 + }, + { + "epoch": 0.1, + "learning_rate": 1.7392168554959254e-06, + "logits/chosen": -2.954458713531494, + "logits/rejected": -2.8825223445892334, + "logps/chosen": -45.188865661621094, + "logps/rejected": -190.28541564941406, + "loss": 0.1748, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.25864511728286743, + "rewards/margins": 1.7096599340438843, + "rewards/rejected": -1.451014757156372, + "step": 1750 + }, + { + "epoch": 0.1, + "learning_rate": 1.7491552375273308e-06, + "logits/chosen": -2.9159703254699707, + "logits/rejected": -2.8699893951416016, + "logps/chosen": -43.1900634765625, + "logps/rejected": -183.1000213623047, + "loss": 0.1829, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22863885760307312, + "rewards/margins": 1.6202843189239502, + "rewards/rejected": -1.3916454315185547, + "step": 1760 + }, + { + "epoch": 0.11, + "learning_rate": 1.759093619558736e-06, + "logits/chosen": -2.8955366611480713, + "logits/rejected": -2.841960906982422, + "logps/chosen": -42.93968963623047, + "logps/rejected": -184.12289428710938, + "loss": 0.1796, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2416425496339798, + "rewards/margins": 1.6491963863372803, + "rewards/rejected": -1.4075539112091064, + "step": 1770 + }, + { + "epoch": 0.11, + "learning_rate": 1.7690320015901413e-06, + "logits/chosen": -2.8926377296447754, + "logits/rejected": -2.8444771766662598, + "logps/chosen": -51.476524353027344, + "logps/rejected": -185.59422302246094, + "loss": 0.182, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2534574270248413, + "rewards/margins": 1.6707864999771118, + "rewards/rejected": -1.4173290729522705, + "step": 1780 + }, + { + "epoch": 0.11, + "learning_rate": 1.7789703836215467e-06, + "logits/chosen": -2.9222676753997803, + "logits/rejected": -2.876471757888794, + "logps/chosen": -45.28787612915039, + "logps/rejected": -189.94509887695312, + "loss": 0.1769, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2418483942747116, + "rewards/margins": 1.6985985040664673, + "rewards/rejected": -1.4567501544952393, + "step": 1790 + }, + { + "epoch": 0.11, + "learning_rate": 1.788908765652952e-06, + "logits/chosen": -2.9510467052459717, + "logits/rejected": -2.9071922302246094, + "logps/chosen": -46.791744232177734, + "logps/rejected": -184.75167846679688, + "loss": 0.1758, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21823222935199738, + "rewards/margins": 1.617605209350586, + "rewards/rejected": -1.3993730545043945, + "step": 1800 + }, + { + "epoch": 0.11, + "learning_rate": 1.7988471476843572e-06, + "logits/chosen": -2.9110751152038574, + "logits/rejected": -2.8870439529418945, + "logps/chosen": -40.97353744506836, + "logps/rejected": -189.69935607910156, + "loss": 0.1784, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2392907589673996, + "rewards/margins": 1.689972162246704, + "rewards/rejected": -1.450681447982788, + "step": 1810 + }, + { + "epoch": 0.11, + "learning_rate": 1.8087855297157624e-06, + "logits/chosen": -2.9231131076812744, + "logits/rejected": -2.879042148590088, + "logps/chosen": -45.35505676269531, + "logps/rejected": -187.88198852539062, + "loss": 0.1745, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23739032447338104, + "rewards/margins": 1.6829437017440796, + "rewards/rejected": -1.4455534219741821, + "step": 1820 + }, + { + "epoch": 0.11, + "learning_rate": 1.8187239117471678e-06, + "logits/chosen": -2.904010534286499, + "logits/rejected": -2.819209337234497, + "logps/chosen": -49.072837829589844, + "logps/rejected": -188.5721435546875, + "loss": 0.1739, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23199772834777832, + "rewards/margins": 1.6778055429458618, + "rewards/rejected": -1.4458078145980835, + "step": 1830 + }, + { + "epoch": 0.11, + "learning_rate": 1.828662293778573e-06, + "logits/chosen": -2.9317567348480225, + "logits/rejected": -2.8722825050354004, + "logps/chosen": -46.78784942626953, + "logps/rejected": -186.72462463378906, + "loss": 0.1717, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2583271861076355, + "rewards/margins": 1.6835315227508545, + "rewards/rejected": -1.4252045154571533, + "step": 1840 + }, + { + "epoch": 0.11, + "learning_rate": 1.8386006758099783e-06, + "logits/chosen": -2.8970112800598145, + "logits/rejected": -2.8344664573669434, + "logps/chosen": -44.3566780090332, + "logps/rejected": -192.00375366210938, + "loss": 0.1761, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2229662388563156, + "rewards/margins": 1.6996742486953735, + "rewards/rejected": -1.476707935333252, + "step": 1850 + }, + { + "epoch": 0.11, + "learning_rate": 1.8485390578413837e-06, + "logits/chosen": -2.9342124462127686, + "logits/rejected": -2.855379581451416, + "logps/chosen": -47.38892364501953, + "logps/rejected": -183.08070373535156, + "loss": 0.1815, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22573116421699524, + "rewards/margins": 1.594970464706421, + "rewards/rejected": -1.369239091873169, + "step": 1860 + }, + { + "epoch": 0.11, + "learning_rate": 1.858477439872789e-06, + "logits/chosen": -2.90877366065979, + "logits/rejected": -2.8199291229248047, + "logps/chosen": -49.482688903808594, + "logps/rejected": -198.30319213867188, + "loss": 0.1682, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23475375771522522, + "rewards/margins": 1.7726142406463623, + "rewards/rejected": -1.537860631942749, + "step": 1870 + }, + { + "epoch": 0.11, + "learning_rate": 1.8684158219041942e-06, + "logits/chosen": -2.9375572204589844, + "logits/rejected": -2.851771593093872, + "logps/chosen": -49.955467224121094, + "logps/rejected": -192.17507934570312, + "loss": 0.1719, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2292042225599289, + "rewards/margins": 1.707363486289978, + "rewards/rejected": -1.4781593084335327, + "step": 1880 + }, + { + "epoch": 0.11, + "learning_rate": 1.8783542039355994e-06, + "logits/chosen": -2.9321413040161133, + "logits/rejected": -2.86686110496521, + "logps/chosen": -47.53290939331055, + "logps/rejected": -197.1437530517578, + "loss": 0.1696, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24333615601062775, + "rewards/margins": 1.7653614282608032, + "rewards/rejected": -1.5220253467559814, + "step": 1890 + }, + { + "epoch": 0.11, + "learning_rate": 1.8882925859670049e-06, + "logits/chosen": -2.8830559253692627, + "logits/rejected": -2.8106460571289062, + "logps/chosen": -52.13166427612305, + "logps/rejected": -190.29136657714844, + "loss": 0.1716, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19944444298744202, + "rewards/margins": 1.6554276943206787, + "rewards/rejected": -1.455983281135559, + "step": 1900 + }, + { + "epoch": 0.11, + "learning_rate": 1.8982309679984101e-06, + "logits/chosen": -2.893970489501953, + "logits/rejected": -2.827990770339966, + "logps/chosen": -50.79450225830078, + "logps/rejected": -196.33302307128906, + "loss": 0.1691, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1848539412021637, + "rewards/margins": 1.7052139043807983, + "rewards/rejected": -1.5203601121902466, + "step": 1910 + }, + { + "epoch": 0.11, + "learning_rate": 1.9081693500298153e-06, + "logits/chosen": -2.912644624710083, + "logits/rejected": -2.8506178855895996, + "logps/chosen": -45.973182678222656, + "logps/rejected": -193.03225708007812, + "loss": 0.1636, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23503684997558594, + "rewards/margins": 1.7213551998138428, + "rewards/rejected": -1.4863183498382568, + "step": 1920 + }, + { + "epoch": 0.12, + "learning_rate": 1.9181077320612206e-06, + "logits/chosen": -2.8933639526367188, + "logits/rejected": -2.804399013519287, + "logps/chosen": -48.881996154785156, + "logps/rejected": -185.63601684570312, + "loss": 0.1768, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23119862377643585, + "rewards/margins": 1.6478252410888672, + "rewards/rejected": -1.4166269302368164, + "step": 1930 + }, + { + "epoch": 0.12, + "learning_rate": 1.928046114092626e-06, + "logits/chosen": -2.9234893321990967, + "logits/rejected": -2.851243495941162, + "logps/chosen": -45.63372039794922, + "logps/rejected": -201.4655303955078, + "loss": 0.161, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22551283240318298, + "rewards/margins": 1.7911382913589478, + "rewards/rejected": -1.565625548362732, + "step": 1940 + }, + { + "epoch": 0.12, + "learning_rate": 1.9379844961240315e-06, + "logits/chosen": -2.9253287315368652, + "logits/rejected": -2.87669038772583, + "logps/chosen": -46.300086975097656, + "logps/rejected": -201.2342987060547, + "loss": 0.1611, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2001142054796219, + "rewards/margins": 1.7722715139389038, + "rewards/rejected": -1.5721571445465088, + "step": 1950 + }, + { + "epoch": 0.12, + "learning_rate": 1.9479228781554367e-06, + "logits/chosen": -2.9038681983947754, + "logits/rejected": -2.8421547412872314, + "logps/chosen": -47.702606201171875, + "logps/rejected": -202.36703491210938, + "loss": 0.1588, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23584742844104767, + "rewards/margins": 1.8061392307281494, + "rewards/rejected": -1.5702917575836182, + "step": 1960 + }, + { + "epoch": 0.12, + "learning_rate": 1.957861260186842e-06, + "logits/chosen": -2.898216724395752, + "logits/rejected": -2.8557543754577637, + "logps/chosen": -49.49296951293945, + "logps/rejected": -203.859619140625, + "loss": 0.1514, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2412387579679489, + "rewards/margins": 1.8397296667099, + "rewards/rejected": -1.5984910726547241, + "step": 1970 + }, + { + "epoch": 0.12, + "learning_rate": 1.967799642218247e-06, + "logits/chosen": -2.9186441898345947, + "logits/rejected": -2.866548538208008, + "logps/chosen": -43.513694763183594, + "logps/rejected": -201.56503295898438, + "loss": 0.156, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24759168922901154, + "rewards/margins": 1.8169513940811157, + "rewards/rejected": -1.569359540939331, + "step": 1980 + }, + { + "epoch": 0.12, + "learning_rate": 1.9777380242496524e-06, + "logits/chosen": -2.8971500396728516, + "logits/rejected": -2.8362698554992676, + "logps/chosen": -54.737281799316406, + "logps/rejected": -197.1339111328125, + "loss": 0.1658, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21952727437019348, + "rewards/margins": 1.7384130954742432, + "rewards/rejected": -1.5188857316970825, + "step": 1990 + }, + { + "epoch": 0.12, + "learning_rate": 1.9876764062810576e-06, + "logits/chosen": -2.885488271713257, + "logits/rejected": -2.8172786235809326, + "logps/chosen": -48.67802810668945, + "logps/rejected": -203.86416625976562, + "loss": 0.1573, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21016936004161835, + "rewards/margins": 1.8105642795562744, + "rewards/rejected": -1.60039484500885, + "step": 2000 + }, + { + "epoch": 0.12, + "learning_rate": 1.997614788312463e-06, + "logits/chosen": -2.903860569000244, + "logits/rejected": -2.8115737438201904, + "logps/chosen": -51.11986541748047, + "logps/rejected": -208.486083984375, + "loss": 0.1548, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21518366038799286, + "rewards/margins": 1.8591692447662354, + "rewards/rejected": -1.6439855098724365, + "step": 2010 + }, + { + "epoch": 0.12, + "learning_rate": 2.0075531703438685e-06, + "logits/chosen": -2.8930842876434326, + "logits/rejected": -2.829664945602417, + "logps/chosen": -51.193660736083984, + "logps/rejected": -211.62576293945312, + "loss": 0.1614, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19436848163604736, + "rewards/margins": 1.8588836193084717, + "rewards/rejected": -1.6645152568817139, + "step": 2020 + }, + { + "epoch": 0.12, + "learning_rate": 2.0174915523752733e-06, + "logits/chosen": -2.8958897590637207, + "logits/rejected": -2.820620059967041, + "logps/chosen": -47.2879638671875, + "logps/rejected": -207.93789672851562, + "loss": 0.1486, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20446279644966125, + "rewards/margins": 1.85037362575531, + "rewards/rejected": -1.6459108591079712, + "step": 2030 + }, + { + "epoch": 0.12, + "learning_rate": 2.0274299344066785e-06, + "logits/chosen": -2.8643155097961426, + "logits/rejected": -2.786074161529541, + "logps/chosen": -49.969356536865234, + "logps/rejected": -212.8159942626953, + "loss": 0.1462, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.19357457756996155, + "rewards/margins": 1.869572639465332, + "rewards/rejected": -1.6759979724884033, + "step": 2040 + }, + { + "epoch": 0.12, + "learning_rate": 2.0373683164380838e-06, + "logits/chosen": -2.8460536003112793, + "logits/rejected": -2.8023734092712402, + "logps/chosen": -48.92486572265625, + "logps/rejected": -211.31655883789062, + "loss": 0.1529, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2194788008928299, + "rewards/margins": 1.9022010564804077, + "rewards/rejected": -1.6827223300933838, + "step": 2050 + }, + { + "epoch": 0.12, + "learning_rate": 2.0473066984694894e-06, + "logits/chosen": -2.898953914642334, + "logits/rejected": -2.8344292640686035, + "logps/chosen": -49.07508087158203, + "logps/rejected": -209.7917022705078, + "loss": 0.155, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18529078364372253, + "rewards/margins": 1.837156057357788, + "rewards/rejected": -1.6518652439117432, + "step": 2060 + }, + { + "epoch": 0.12, + "learning_rate": 2.0572450805008946e-06, + "logits/chosen": -2.888219118118286, + "logits/rejected": -2.8265671730041504, + "logps/chosen": -52.49650192260742, + "logps/rejected": -209.6946258544922, + "loss": 0.1426, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18174710869789124, + "rewards/margins": 1.831261396408081, + "rewards/rejected": -1.6495144367218018, + "step": 2070 + }, + { + "epoch": 0.12, + "learning_rate": 2.0671834625323e-06, + "logits/chosen": -2.911973237991333, + "logits/rejected": -2.838061809539795, + "logps/chosen": -49.2669563293457, + "logps/rejected": -214.31411743164062, + "loss": 0.1438, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1901896744966507, + "rewards/margins": 1.8881337642669678, + "rewards/rejected": -1.6979444026947021, + "step": 2080 + }, + { + "epoch": 0.12, + "learning_rate": 2.077121844563705e-06, + "logits/chosen": -2.90057635307312, + "logits/rejected": -2.8115038871765137, + "logps/chosen": -54.580650329589844, + "logps/rejected": -225.47021484375, + "loss": 0.1364, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18691764771938324, + "rewards/margins": 1.9921677112579346, + "rewards/rejected": -1.8052499294281006, + "step": 2090 + }, + { + "epoch": 0.13, + "learning_rate": 2.0870602265951103e-06, + "logits/chosen": -2.897571086883545, + "logits/rejected": -2.827625036239624, + "logps/chosen": -50.81390380859375, + "logps/rejected": -229.47134399414062, + "loss": 0.1338, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15122418105602264, + "rewards/margins": 1.9932104349136353, + "rewards/rejected": -1.8419862985610962, + "step": 2100 + }, + { + "epoch": 0.13, + "learning_rate": 2.0969986086265156e-06, + "logits/chosen": -2.8723649978637695, + "logits/rejected": -2.7884433269500732, + "logps/chosen": -53.060707092285156, + "logps/rejected": -231.3718719482422, + "loss": 0.1286, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16204611957073212, + "rewards/margins": 2.0296947956085205, + "rewards/rejected": -1.867648720741272, + "step": 2110 + }, + { + "epoch": 0.13, + "learning_rate": 2.106936990657921e-06, + "logits/chosen": -2.910182476043701, + "logits/rejected": -2.8322958946228027, + "logps/chosen": -50.7359733581543, + "logps/rejected": -243.06405639648438, + "loss": 0.1261, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.15359267592430115, + "rewards/margins": 2.132904529571533, + "rewards/rejected": -1.9793117046356201, + "step": 2120 + }, + { + "epoch": 0.13, + "learning_rate": 2.1168753726893265e-06, + "logits/chosen": -2.8896210193634033, + "logits/rejected": -2.824690341949463, + "logps/chosen": -60.180206298828125, + "logps/rejected": -241.84912109375, + "loss": 0.1314, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.12059202045202255, + "rewards/margins": 2.0972983837127686, + "rewards/rejected": -1.9767062664031982, + "step": 2130 + }, + { + "epoch": 0.13, + "learning_rate": 2.1268137547207317e-06, + "logits/chosen": -2.8733506202697754, + "logits/rejected": -2.7782347202301025, + "logps/chosen": -57.960365295410156, + "logps/rejected": -243.59115600585938, + "loss": 0.1217, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.16324757039546967, + "rewards/margins": 2.1472976207733154, + "rewards/rejected": -1.9840500354766846, + "step": 2140 + }, + { + "epoch": 0.13, + "learning_rate": 2.136752136752137e-06, + "logits/chosen": -2.8736488819122314, + "logits/rejected": -2.777522325515747, + "logps/chosen": -51.34862518310547, + "logps/rejected": -242.35128784179688, + "loss": 0.1222, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11208371073007584, + "rewards/margins": 2.100311279296875, + "rewards/rejected": -1.9882274866104126, + "step": 2150 + }, + { + "epoch": 0.13, + "learning_rate": 2.146690518783542e-06, + "logits/chosen": -2.89473295211792, + "logits/rejected": -2.812666177749634, + "logps/chosen": -60.13788604736328, + "logps/rejected": -259.27557373046875, + "loss": 0.1146, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.11730917543172836, + "rewards/margins": 2.267714738845825, + "rewards/rejected": -2.1504056453704834, + "step": 2160 + }, + { + "epoch": 0.13, + "learning_rate": 2.1566289008149474e-06, + "logits/chosen": -2.8573780059814453, + "logits/rejected": -2.7930097579956055, + "logps/chosen": -64.19554138183594, + "logps/rejected": -255.5740509033203, + "loss": 0.1261, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.05539960786700249, + "rewards/margins": 2.1695351600646973, + "rewards/rejected": -2.1141357421875, + "step": 2170 + }, + { + "epoch": 0.13, + "learning_rate": 2.1665672828463526e-06, + "logits/chosen": -2.8643319606781006, + "logits/rejected": -2.7785964012145996, + "logps/chosen": -62.382057189941406, + "logps/rejected": -250.58633422851562, + "loss": 0.1085, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08105885982513428, + "rewards/margins": 2.133678436279297, + "rewards/rejected": -2.0526199340820312, + "step": 2180 + }, + { + "epoch": 0.13, + "learning_rate": 2.176505664877758e-06, + "logits/chosen": -2.882225751876831, + "logits/rejected": -2.7978339195251465, + "logps/chosen": -61.5095329284668, + "logps/rejected": -268.1742248535156, + "loss": 0.1054, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.10081814229488373, + "rewards/margins": 2.347299098968506, + "rewards/rejected": -2.246480941772461, + "step": 2190 + }, + { + "epoch": 0.13, + "learning_rate": 2.1864440469091635e-06, + "logits/chosen": -2.8665823936462402, + "logits/rejected": -2.7472100257873535, + "logps/chosen": -62.08959197998047, + "logps/rejected": -299.62469482421875, + "loss": 0.0839, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0653982013463974, + "rewards/margins": 2.61576509475708, + "rewards/rejected": -2.5503668785095215, + "step": 2200 + }, + { + "epoch": 0.13, + "learning_rate": 2.1963824289405687e-06, + "logits/chosen": -2.862450122833252, + "logits/rejected": -2.7589011192321777, + "logps/chosen": -75.2565689086914, + "logps/rejected": -310.0911865234375, + "loss": 0.0892, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.04842941835522652, + "rewards/margins": 2.595315456390381, + "rewards/rejected": -2.643744945526123, + "step": 2210 + }, + { + "epoch": 0.13, + "learning_rate": 2.206320810971974e-06, + "logits/chosen": -2.882495880126953, + "logits/rejected": -2.7162063121795654, + "logps/chosen": -83.32658386230469, + "logps/rejected": -349.49810791015625, + "loss": 0.0886, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15759214758872986, + "rewards/margins": 2.885789632797241, + "rewards/rejected": -3.043381929397583, + "step": 2220 + }, + { + "epoch": 0.13, + "learning_rate": 2.216259193003379e-06, + "logits/chosen": -2.886505603790283, + "logits/rejected": -2.7478206157684326, + "logps/chosen": -80.19174194335938, + "logps/rejected": -354.93804931640625, + "loss": 0.0802, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13000604510307312, + "rewards/margins": 2.965484857559204, + "rewards/rejected": -3.095491409301758, + "step": 2230 + }, + { + "epoch": 0.13, + "learning_rate": 2.2261975750347844e-06, + "logits/chosen": -2.8555188179016113, + "logits/rejected": -2.7492566108703613, + "logps/chosen": -86.01118469238281, + "logps/rejected": -381.38970947265625, + "loss": 0.068, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15803980827331543, + "rewards/margins": 3.2158145904541016, + "rewards/rejected": -3.373854160308838, + "step": 2240 + }, + { + "epoch": 0.13, + "learning_rate": 2.2361359570661897e-06, + "logits/chosen": -2.838379144668579, + "logits/rejected": -2.712153196334839, + "logps/chosen": -96.08528900146484, + "logps/rejected": -420.78515625, + "loss": 0.0607, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.27243855595588684, + "rewards/margins": 3.4838435649871826, + "rewards/rejected": -3.756282091140747, + "step": 2250 + }, + { + "epoch": 0.13, + "learning_rate": 2.246074339097595e-06, + "logits/chosen": -2.8615758419036865, + "logits/rejected": -2.744259834289551, + "logps/chosen": -85.43272399902344, + "logps/rejected": -397.5384216308594, + "loss": 0.0695, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17542442679405212, + "rewards/margins": 3.3579750061035156, + "rewards/rejected": -3.5333995819091797, + "step": 2260 + }, + { + "epoch": 0.14, + "learning_rate": 2.2560127211290005e-06, + "logits/chosen": -2.849581003189087, + "logits/rejected": -2.7261905670166016, + "logps/chosen": -109.35972595214844, + "logps/rejected": -479.42596435546875, + "loss": 0.057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.36785703897476196, + "rewards/margins": 3.985023021697998, + "rewards/rejected": -4.352880001068115, + "step": 2270 + }, + { + "epoch": 0.14, + "learning_rate": 2.2659511031604058e-06, + "logits/chosen": -2.876084566116333, + "logits/rejected": -2.732330799102783, + "logps/chosen": -86.38233947753906, + "logps/rejected": -427.0595703125, + "loss": 0.0534, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13335882127285004, + "rewards/margins": 3.692967176437378, + "rewards/rejected": -3.826326370239258, + "step": 2280 + }, + { + "epoch": 0.14, + "learning_rate": 2.275889485191811e-06, + "logits/chosen": -2.826002359390259, + "logits/rejected": -2.703542947769165, + "logps/chosen": -97.13479614257812, + "logps/rejected": -445.15826416015625, + "loss": 0.0774, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22232595086097717, + "rewards/margins": 3.7864315509796143, + "rewards/rejected": -4.008757591247559, + "step": 2290 + }, + { + "epoch": 0.14, + "learning_rate": 2.2858278672232162e-06, + "logits/chosen": -2.8458759784698486, + "logits/rejected": -2.73734712600708, + "logps/chosen": -103.75789642333984, + "logps/rejected": -457.8998107910156, + "loss": 0.0575, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.33128562569618225, + "rewards/margins": 3.8148033618927, + "rewards/rejected": -4.14608907699585, + "step": 2300 + }, + { + "epoch": 0.14, + "learning_rate": 2.2957662492546215e-06, + "logits/chosen": -2.87620210647583, + "logits/rejected": -2.77702260017395, + "logps/chosen": -80.80003356933594, + "logps/rejected": -445.89324951171875, + "loss": 0.0514, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13084685802459717, + "rewards/margins": 3.88403582572937, + "rewards/rejected": -4.014882564544678, + "step": 2310 + }, + { + "epoch": 0.14, + "learning_rate": 2.3057046312860267e-06, + "logits/chosen": -2.828129768371582, + "logits/rejected": -2.7512614727020264, + "logps/chosen": -103.52816009521484, + "logps/rejected": -491.0267639160156, + "loss": 0.0546, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34766465425491333, + "rewards/margins": 4.119288444519043, + "rewards/rejected": -4.466953277587891, + "step": 2320 + }, + { + "epoch": 0.14, + "learning_rate": 2.315643013317432e-06, + "logits/chosen": -2.8489155769348145, + "logits/rejected": -2.7733635902404785, + "logps/chosen": -99.70780944824219, + "logps/rejected": -448.8748474121094, + "loss": 0.0572, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3031924068927765, + "rewards/margins": 3.743147373199463, + "rewards/rejected": -4.046339988708496, + "step": 2330 + }, + { + "epoch": 0.14, + "learning_rate": 2.3255813953488376e-06, + "logits/chosen": -2.886247158050537, + "logits/rejected": -2.7421345710754395, + "logps/chosen": -111.17787170410156, + "logps/rejected": -497.55242919921875, + "loss": 0.0566, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.437234491109848, + "rewards/margins": 4.10015344619751, + "rewards/rejected": -4.537387847900391, + "step": 2340 + }, + { + "epoch": 0.14, + "learning_rate": 2.335519777380243e-06, + "logits/chosen": -2.8593311309814453, + "logits/rejected": -2.742645025253296, + "logps/chosen": -115.872802734375, + "logps/rejected": -511.1111755371094, + "loss": 0.0559, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4879177212715149, + "rewards/margins": 4.190369129180908, + "rewards/rejected": -4.678286552429199, + "step": 2350 + }, + { + "epoch": 0.14, + "learning_rate": 2.345458159411648e-06, + "logits/chosen": -2.8421268463134766, + "logits/rejected": -2.7488067150115967, + "logps/chosen": -93.20408630371094, + "logps/rejected": -518.5665893554688, + "loss": 0.0387, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22678843140602112, + "rewards/margins": 4.5082502365112305, + "rewards/rejected": -4.735038757324219, + "step": 2360 + }, + { + "epoch": 0.14, + "learning_rate": 2.3553965414430533e-06, + "logits/chosen": -2.878983497619629, + "logits/rejected": -2.7681632041931152, + "logps/chosen": -97.37540435791016, + "logps/rejected": -484.1717834472656, + "loss": 0.0753, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.24301226437091827, + "rewards/margins": 4.169155597686768, + "rewards/rejected": -4.412167549133301, + "step": 2370 + }, + { + "epoch": 0.14, + "learning_rate": 2.3653349234744585e-06, + "logits/chosen": -2.845381259918213, + "logits/rejected": -2.718956470489502, + "logps/chosen": -92.97537231445312, + "logps/rejected": -524.410888671875, + "loss": 0.0352, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.261413037776947, + "rewards/margins": 4.5325541496276855, + "rewards/rejected": -4.793966770172119, + "step": 2380 + }, + { + "epoch": 0.14, + "learning_rate": 2.3752733055058637e-06, + "logits/chosen": -2.8451075553894043, + "logits/rejected": -2.753882884979248, + "logps/chosen": -110.72901916503906, + "logps/rejected": -500.1316833496094, + "loss": 0.0491, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.43363088369369507, + "rewards/margins": 4.124763488769531, + "rewards/rejected": -4.558394908905029, + "step": 2390 + }, + { + "epoch": 0.14, + "learning_rate": 2.385211687537269e-06, + "logits/chosen": -2.8595046997070312, + "logits/rejected": -2.72416353225708, + "logps/chosen": -109.85948181152344, + "logps/rejected": -531.4679565429688, + "loss": 0.0435, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4186236262321472, + "rewards/margins": 4.457502841949463, + "rewards/rejected": -4.876126289367676, + "step": 2400 + }, + { + "epoch": 0.14, + "learning_rate": 2.3951500695686746e-06, + "logits/chosen": -2.8917999267578125, + "logits/rejected": -2.7836642265319824, + "logps/chosen": -95.10630798339844, + "logps/rejected": -539.7811889648438, + "loss": 0.0248, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21226295828819275, + "rewards/margins": 4.741831302642822, + "rewards/rejected": -4.954094409942627, + "step": 2410 + }, + { + "epoch": 0.14, + "learning_rate": 2.40508845160008e-06, + "logits/chosen": -2.856287717819214, + "logits/rejected": -2.735699415206909, + "logps/chosen": -126.57493591308594, + "logps/rejected": -510.465576171875, + "loss": 0.0401, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5308834910392761, + "rewards/margins": 4.121683597564697, + "rewards/rejected": -4.652567386627197, + "step": 2420 + }, + { + "epoch": 0.14, + "learning_rate": 2.415026833631485e-06, + "logits/chosen": -2.869654893875122, + "logits/rejected": -2.7530159950256348, + "logps/chosen": -115.6380386352539, + "logps/rejected": -524.8341064453125, + "loss": 0.0375, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4219719469547272, + "rewards/margins": 4.3757643699646, + "rewards/rejected": -4.797736167907715, + "step": 2430 + }, + { + "epoch": 0.15, + "learning_rate": 2.4249652156628903e-06, + "logits/chosen": -2.880510091781616, + "logits/rejected": -2.742283344268799, + "logps/chosen": -94.3664779663086, + "logps/rejected": -600.8681030273438, + "loss": 0.0348, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22611363232135773, + "rewards/margins": 5.320172309875488, + "rewards/rejected": -5.546285629272461, + "step": 2440 + }, + { + "epoch": 0.15, + "learning_rate": 2.4349035976942955e-06, + "logits/chosen": -2.847015857696533, + "logits/rejected": -2.714139938354492, + "logps/chosen": -107.5555648803711, + "logps/rejected": -539.5269775390625, + "loss": 0.0478, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35273414850234985, + "rewards/margins": 4.595271110534668, + "rewards/rejected": -4.948005676269531, + "step": 2450 + }, + { + "epoch": 0.15, + "learning_rate": 2.4448419797257008e-06, + "logits/chosen": -2.8563923835754395, + "logits/rejected": -2.7354495525360107, + "logps/chosen": -106.40681457519531, + "logps/rejected": -558.0792236328125, + "loss": 0.0317, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.33554622530937195, + "rewards/margins": 4.803497314453125, + "rewards/rejected": -5.13904333114624, + "step": 2460 + }, + { + "epoch": 0.15, + "learning_rate": 2.454780361757106e-06, + "logits/chosen": -2.8795835971832275, + "logits/rejected": -2.755885362625122, + "logps/chosen": -133.39938354492188, + "logps/rejected": -587.6358032226562, + "loss": 0.0386, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5809357762336731, + "rewards/margins": 4.847970962524414, + "rewards/rejected": -5.4289069175720215, + "step": 2470 + }, + { + "epoch": 0.15, + "learning_rate": 2.4647187437885117e-06, + "logits/chosen": -2.864941120147705, + "logits/rejected": -2.752467632293701, + "logps/chosen": -131.55223083496094, + "logps/rejected": -589.6901245117188, + "loss": 0.0397, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5569919943809509, + "rewards/margins": 4.885194778442383, + "rewards/rejected": -5.4421868324279785, + "step": 2480 + }, + { + "epoch": 0.15, + "learning_rate": 2.474657125819917e-06, + "logits/chosen": -2.910388946533203, + "logits/rejected": -2.761775016784668, + "logps/chosen": -116.4517593383789, + "logps/rejected": -622.0790405273438, + "loss": 0.0402, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4571622312068939, + "rewards/margins": 5.318145275115967, + "rewards/rejected": -5.775307655334473, + "step": 2490 + }, + { + "epoch": 0.15, + "learning_rate": 2.484595507851322e-06, + "logits/chosen": -2.8604049682617188, + "logits/rejected": -2.723972797393799, + "logps/chosen": -102.49222564697266, + "logps/rejected": -567.1668701171875, + "loss": 0.0454, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.33957189321517944, + "rewards/margins": 4.883059501647949, + "rewards/rejected": -5.222630977630615, + "step": 2500 + }, + { + "epoch": 0.15, + "learning_rate": 2.4945338898827273e-06, + "logits/chosen": -2.8329379558563232, + "logits/rejected": -2.7282357215881348, + "logps/chosen": -118.11934661865234, + "logps/rejected": -696.2083740234375, + "loss": 0.0309, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5108665823936462, + "rewards/margins": 6.0030837059021, + "rewards/rejected": -6.513949394226074, + "step": 2510 + }, + { + "epoch": 0.15, + "learning_rate": 2.504472271914132e-06, + "logits/chosen": -2.8782997131347656, + "logits/rejected": -2.7737390995025635, + "logps/chosen": -112.9260482788086, + "logps/rejected": -571.0912475585938, + "loss": 0.0361, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4593760371208191, + "rewards/margins": 4.808173656463623, + "rewards/rejected": -5.267550468444824, + "step": 2520 + }, + { + "epoch": 0.15, + "learning_rate": 2.514410653945538e-06, + "logits/chosen": -2.8624765872955322, + "logits/rejected": -2.7829763889312744, + "logps/chosen": -104.45611572265625, + "logps/rejected": -625.0031127929688, + "loss": 0.0352, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3366129994392395, + "rewards/margins": 5.461874485015869, + "rewards/rejected": -5.798487663269043, + "step": 2530 + }, + { + "epoch": 0.15, + "learning_rate": 2.524349035976943e-06, + "logits/chosen": -2.8784902095794678, + "logits/rejected": -2.750974655151367, + "logps/chosen": -94.70683288574219, + "logps/rejected": -583.3682861328125, + "loss": 0.0407, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19562990963459015, + "rewards/margins": 5.201220512390137, + "rewards/rejected": -5.396849632263184, + "step": 2540 + }, + { + "epoch": 0.15, + "learning_rate": 2.5342874180083483e-06, + "logits/chosen": -2.8390884399414062, + "logits/rejected": -2.7711246013641357, + "logps/chosen": -124.96305847167969, + "logps/rejected": -607.1495971679688, + "loss": 0.0331, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5488590002059937, + "rewards/margins": 5.083286762237549, + "rewards/rejected": -5.632145404815674, + "step": 2550 + }, + { + "epoch": 0.15, + "learning_rate": 2.5442258000397535e-06, + "logits/chosen": -2.907749891281128, + "logits/rejected": -2.796351671218872, + "logps/chosen": -136.42730712890625, + "logps/rejected": -656.330810546875, + "loss": 0.0431, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6309903264045715, + "rewards/margins": 5.493134021759033, + "rewards/rejected": -6.124124526977539, + "step": 2560 + }, + { + "epoch": 0.15, + "learning_rate": 2.5541641820711587e-06, + "logits/chosen": -2.879051923751831, + "logits/rejected": -2.7668848037719727, + "logps/chosen": -122.47438049316406, + "logps/rejected": -622.0551147460938, + "loss": 0.0402, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4394451975822449, + "rewards/margins": 5.3370161056518555, + "rewards/rejected": -5.776461124420166, + "step": 2570 + }, + { + "epoch": 0.15, + "learning_rate": 2.564102564102564e-06, + "logits/chosen": -2.8612680435180664, + "logits/rejected": -2.7154414653778076, + "logps/chosen": -99.13762664794922, + "logps/rejected": -623.2130126953125, + "loss": 0.0332, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2930333912372589, + "rewards/margins": 5.489495277404785, + "rewards/rejected": -5.782528877258301, + "step": 2580 + }, + { + "epoch": 0.15, + "learning_rate": 2.574040946133969e-06, + "logits/chosen": -2.8236520290374756, + "logits/rejected": -2.733917713165283, + "logps/chosen": -112.0412826538086, + "logps/rejected": -597.1922607421875, + "loss": 0.0412, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.44201985001564026, + "rewards/margins": 5.092393398284912, + "rewards/rejected": -5.534413814544678, + "step": 2590 + }, + { + "epoch": 0.16, + "learning_rate": 2.583979328165375e-06, + "logits/chosen": -2.897037982940674, + "logits/rejected": -2.7984516620635986, + "logps/chosen": -127.2643814086914, + "logps/rejected": -681.1658325195312, + "loss": 0.0382, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5940750241279602, + "rewards/margins": 5.776637077331543, + "rewards/rejected": -6.3707122802734375, + "step": 2600 + }, + { + "epoch": 0.16, + "learning_rate": 2.59391771019678e-06, + "logits/chosen": -2.8619964122772217, + "logits/rejected": -2.6980233192443848, + "logps/chosen": -127.5995101928711, + "logps/rejected": -575.2250366210938, + "loss": 0.0421, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.567305862903595, + "rewards/margins": 4.744027137756348, + "rewards/rejected": -5.311333179473877, + "step": 2610 + }, + { + "epoch": 0.16, + "learning_rate": 2.6038560922281853e-06, + "logits/chosen": -2.9083852767944336, + "logits/rejected": -2.8188624382019043, + "logps/chosen": -102.30079650878906, + "logps/rejected": -613.44287109375, + "loss": 0.0661, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.32300594449043274, + "rewards/margins": 5.374509334564209, + "rewards/rejected": -5.697515487670898, + "step": 2620 + }, + { + "epoch": 0.16, + "learning_rate": 2.6137944742595905e-06, + "logits/chosen": -2.855656147003174, + "logits/rejected": -2.7884740829467773, + "logps/chosen": -104.42740631103516, + "logps/rejected": -658.5119018554688, + "loss": 0.052, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.2905837893486023, + "rewards/margins": 5.853411674499512, + "rewards/rejected": -6.143995761871338, + "step": 2630 + }, + { + "epoch": 0.16, + "learning_rate": 2.6237328562909958e-06, + "logits/chosen": -2.8788671493530273, + "logits/rejected": -2.7701425552368164, + "logps/chosen": -86.54953002929688, + "logps/rejected": -652.6976318359375, + "loss": 0.0349, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16848286986351013, + "rewards/margins": 5.915610313415527, + "rewards/rejected": -6.08409309387207, + "step": 2640 + }, + { + "epoch": 0.16, + "learning_rate": 2.633671238322401e-06, + "logits/chosen": -2.8572185039520264, + "logits/rejected": -2.735884666442871, + "logps/chosen": -97.97199249267578, + "logps/rejected": -651.403564453125, + "loss": 0.0267, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2499045878648758, + "rewards/margins": 5.807662010192871, + "rewards/rejected": -6.0575666427612305, + "step": 2650 + }, + { + "epoch": 0.16, + "learning_rate": 2.6436096203538062e-06, + "logits/chosen": -2.8611574172973633, + "logits/rejected": -2.7753987312316895, + "logps/chosen": -103.7802963256836, + "logps/rejected": -581.8047485351562, + "loss": 0.0398, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.36258944869041443, + "rewards/margins": 5.012294292449951, + "rewards/rejected": -5.374884605407715, + "step": 2660 + }, + { + "epoch": 0.16, + "learning_rate": 2.653548002385212e-06, + "logits/chosen": -2.8793888092041016, + "logits/rejected": -2.7534477710723877, + "logps/chosen": -108.2569808959961, + "logps/rejected": -730.9557495117188, + "loss": 0.0901, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3683033287525177, + "rewards/margins": 6.505792140960693, + "rewards/rejected": -6.8740949630737305, + "step": 2670 + }, + { + "epoch": 0.16, + "learning_rate": 2.663486384416617e-06, + "logits/chosen": -2.8663830757141113, + "logits/rejected": -2.792574405670166, + "logps/chosen": -108.5348892211914, + "logps/rejected": -733.4496459960938, + "loss": 0.0332, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3757360279560089, + "rewards/margins": 6.515524864196777, + "rewards/rejected": -6.891261100769043, + "step": 2680 + }, + { + "epoch": 0.16, + "learning_rate": 2.6734247664480224e-06, + "logits/chosen": -2.8600144386291504, + "logits/rejected": -2.7582755088806152, + "logps/chosen": -102.83154296875, + "logps/rejected": -640.8211669921875, + "loss": 0.046, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.28494149446487427, + "rewards/margins": 5.686570644378662, + "rewards/rejected": -5.9715118408203125, + "step": 2690 + }, + { + "epoch": 0.16, + "learning_rate": 2.6833631484794276e-06, + "logits/chosen": -2.8962249755859375, + "logits/rejected": -2.781036615371704, + "logps/chosen": -94.19402313232422, + "logps/rejected": -587.6622924804688, + "loss": 0.0361, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2963286340236664, + "rewards/margins": 5.140114784240723, + "rewards/rejected": -5.43644380569458, + "step": 2700 + }, + { + "epoch": 0.16, + "learning_rate": 2.693301530510833e-06, + "logits/chosen": -2.853874683380127, + "logits/rejected": -2.7567248344421387, + "logps/chosen": -88.25701904296875, + "logps/rejected": -592.0921630859375, + "loss": 0.0395, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20549853146076202, + "rewards/margins": 5.274724960327148, + "rewards/rejected": -5.480223655700684, + "step": 2710 + }, + { + "epoch": 0.16, + "learning_rate": 2.703239912542238e-06, + "logits/chosen": -2.869687080383301, + "logits/rejected": -2.745649814605713, + "logps/chosen": -131.42483520507812, + "logps/rejected": -611.7410278320312, + "loss": 0.0537, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5350287556648254, + "rewards/margins": 5.121642112731934, + "rewards/rejected": -5.656671047210693, + "step": 2720 + }, + { + "epoch": 0.16, + "learning_rate": 2.7131782945736433e-06, + "logits/chosen": -2.862182140350342, + "logits/rejected": -2.7179338932037354, + "logps/chosen": -93.20243835449219, + "logps/rejected": -642.1387939453125, + "loss": 0.0434, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24539831280708313, + "rewards/margins": 5.739306449890137, + "rewards/rejected": -5.984704494476318, + "step": 2730 + }, + { + "epoch": 0.16, + "learning_rate": 2.723116676605049e-06, + "logits/chosen": -2.8901262283325195, + "logits/rejected": -2.797729969024658, + "logps/chosen": -84.1973876953125, + "logps/rejected": -688.47802734375, + "loss": 0.0256, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1470116674900055, + "rewards/margins": 6.291937828063965, + "rewards/rejected": -6.4389495849609375, + "step": 2740 + }, + { + "epoch": 0.16, + "learning_rate": 2.733055058636454e-06, + "logits/chosen": -2.8824591636657715, + "logits/rejected": -2.7746615409851074, + "logps/chosen": -91.85446166992188, + "logps/rejected": -648.22021484375, + "loss": 0.0277, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.25233158469200134, + "rewards/margins": 5.790163993835449, + "rewards/rejected": -6.0424957275390625, + "step": 2750 + }, + { + "epoch": 0.16, + "learning_rate": 2.7429934406678594e-06, + "logits/chosen": -2.8513290882110596, + "logits/rejected": -2.7744479179382324, + "logps/chosen": -103.04087829589844, + "logps/rejected": -616.8130493164062, + "loss": 0.0402, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35844701528549194, + "rewards/margins": 5.371678352355957, + "rewards/rejected": -5.730125427246094, + "step": 2760 + }, + { + "epoch": 0.17, + "learning_rate": 2.7529318226992646e-06, + "logits/chosen": -2.894268035888672, + "logits/rejected": -2.794651746749878, + "logps/chosen": -145.3050994873047, + "logps/rejected": -715.2186279296875, + "loss": 0.0295, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7133463025093079, + "rewards/margins": 5.989928245544434, + "rewards/rejected": -6.703274726867676, + "step": 2770 + }, + { + "epoch": 0.17, + "learning_rate": 2.76287020473067e-06, + "logits/chosen": -2.8498189449310303, + "logits/rejected": -2.7287516593933105, + "logps/chosen": -99.15318298339844, + "logps/rejected": -626.75244140625, + "loss": 0.0357, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.28488004207611084, + "rewards/margins": 5.550641059875488, + "rewards/rejected": -5.835521221160889, + "step": 2780 + }, + { + "epoch": 0.17, + "learning_rate": 2.772808586762075e-06, + "logits/chosen": -2.8795785903930664, + "logits/rejected": -2.744080066680908, + "logps/chosen": -97.97019958496094, + "logps/rejected": -655.9735107421875, + "loss": 0.0218, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.263166606426239, + "rewards/margins": 5.860152244567871, + "rewards/rejected": -6.123318672180176, + "step": 2790 + }, + { + "epoch": 0.17, + "learning_rate": 2.7827469687934803e-06, + "logits/chosen": -2.894972562789917, + "logits/rejected": -2.808884859085083, + "logps/chosen": -133.23939514160156, + "logps/rejected": -672.1030883789062, + "loss": 0.0425, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6367225646972656, + "rewards/margins": 5.641109943389893, + "rewards/rejected": -6.277831554412842, + "step": 2800 + }, + { + "epoch": 0.17, + "learning_rate": 2.792685350824886e-06, + "logits/chosen": -2.8780696392059326, + "logits/rejected": -2.747467517852783, + "logps/chosen": -104.76585388183594, + "logps/rejected": -729.0770263671875, + "loss": 0.0338, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34846994280815125, + "rewards/margins": 6.499715328216553, + "rewards/rejected": -6.848184108734131, + "step": 2810 + }, + { + "epoch": 0.17, + "learning_rate": 2.802623732856291e-06, + "logits/chosen": -2.8922054767608643, + "logits/rejected": -2.7903504371643066, + "logps/chosen": -93.2664794921875, + "logps/rejected": -633.392578125, + "loss": 0.0448, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23111975193023682, + "rewards/margins": 5.665144920349121, + "rewards/rejected": -5.896264553070068, + "step": 2820 + }, + { + "epoch": 0.17, + "learning_rate": 2.8125621148876964e-06, + "logits/chosen": -2.867053508758545, + "logits/rejected": -2.768561840057373, + "logps/chosen": -92.99760437011719, + "logps/rejected": -725.6866455078125, + "loss": 0.0389, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21966056525707245, + "rewards/margins": 6.586615085601807, + "rewards/rejected": -6.806275844573975, + "step": 2830 + }, + { + "epoch": 0.17, + "learning_rate": 2.8225004969191017e-06, + "logits/chosen": -2.8811697959899902, + "logits/rejected": -2.7710976600646973, + "logps/chosen": -117.5415267944336, + "logps/rejected": -709.8499145507812, + "loss": 0.0368, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4142531752586365, + "rewards/margins": 6.226785659790039, + "rewards/rejected": -6.641039848327637, + "step": 2840 + }, + { + "epoch": 0.17, + "learning_rate": 2.832438878950507e-06, + "logits/chosen": -2.901637315750122, + "logits/rejected": -2.817617416381836, + "logps/chosen": -94.924072265625, + "logps/rejected": -769.3198852539062, + "loss": 0.0301, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2212250977754593, + "rewards/margins": 7.029671669006348, + "rewards/rejected": -7.250896453857422, + "step": 2850 + }, + { + "epoch": 0.17, + "learning_rate": 2.842377260981912e-06, + "logits/chosen": -2.9103469848632812, + "logits/rejected": -2.801241874694824, + "logps/chosen": -115.74806213378906, + "logps/rejected": -647.172119140625, + "loss": 0.0429, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4390796720981598, + "rewards/margins": 5.587935447692871, + "rewards/rejected": -6.027014255523682, + "step": 2860 + }, + { + "epoch": 0.17, + "learning_rate": 2.8523156430133174e-06, + "logits/chosen": -2.8404757976531982, + "logits/rejected": -2.761259078979492, + "logps/chosen": -114.18792724609375, + "logps/rejected": -638.3275146484375, + "loss": 0.035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3874098062515259, + "rewards/margins": 5.547663688659668, + "rewards/rejected": -5.935073375701904, + "step": 2870 + }, + { + "epoch": 0.17, + "learning_rate": 2.862254025044723e-06, + "logits/chosen": -2.853609561920166, + "logits/rejected": -2.682980537414551, + "logps/chosen": -101.93614196777344, + "logps/rejected": -635.2669067382812, + "loss": 0.038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3145122230052948, + "rewards/margins": 5.5879926681518555, + "rewards/rejected": -5.902504920959473, + "step": 2880 + }, + { + "epoch": 0.17, + "learning_rate": 2.8721924070761282e-06, + "logits/chosen": -2.88828706741333, + "logits/rejected": -2.8116888999938965, + "logps/chosen": -112.49085998535156, + "logps/rejected": -717.3214111328125, + "loss": 0.0439, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4062039256095886, + "rewards/margins": 6.320347785949707, + "rewards/rejected": -6.726551055908203, + "step": 2890 + }, + { + "epoch": 0.17, + "learning_rate": 2.8821307891075335e-06, + "logits/chosen": -2.888155221939087, + "logits/rejected": -2.7694592475891113, + "logps/chosen": -117.2420883178711, + "logps/rejected": -724.3717651367188, + "loss": 0.023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5135464668273926, + "rewards/margins": 6.289589881896973, + "rewards/rejected": -6.803136348724365, + "step": 2900 + }, + { + "epoch": 0.17, + "learning_rate": 2.8920691711389387e-06, + "logits/chosen": -2.893031597137451, + "logits/rejected": -2.7797048091888428, + "logps/chosen": -102.85868072509766, + "logps/rejected": -757.6156616210938, + "loss": 0.0364, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.30193406343460083, + "rewards/margins": 6.821624755859375, + "rewards/rejected": -7.12355899810791, + "step": 2910 + }, + { + "epoch": 0.17, + "learning_rate": 2.902007553170344e-06, + "logits/chosen": -2.8646652698516846, + "logits/rejected": -2.7581627368927, + "logps/chosen": -102.48793029785156, + "logps/rejected": -704.0696411132812, + "loss": 0.0325, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.33571746945381165, + "rewards/margins": 6.269131660461426, + "rewards/rejected": -6.604849338531494, + "step": 2920 + }, + { + "epoch": 0.17, + "learning_rate": 2.911945935201749e-06, + "logits/chosen": -2.8784122467041016, + "logits/rejected": -2.744175434112549, + "logps/chosen": -112.38508605957031, + "logps/rejected": -682.3624877929688, + "loss": 0.0306, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.398209810256958, + "rewards/margins": 5.9899821281433105, + "rewards/rejected": -6.388192176818848, + "step": 2930 + }, + { + "epoch": 0.18, + "learning_rate": 2.9218843172331544e-06, + "logits/chosen": -2.856950283050537, + "logits/rejected": -2.740995168685913, + "logps/chosen": -115.57112884521484, + "logps/rejected": -736.8250732421875, + "loss": 0.0255, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3931712210178375, + "rewards/margins": 6.53338098526001, + "rewards/rejected": -6.926552772521973, + "step": 2940 + }, + { + "epoch": 0.18, + "learning_rate": 2.93182269926456e-06, + "logits/chosen": -2.8637847900390625, + "logits/rejected": -2.78702974319458, + "logps/chosen": -96.43524169921875, + "logps/rejected": -756.9443359375, + "loss": 0.0295, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2518252730369568, + "rewards/margins": 6.866809844970703, + "rewards/rejected": -7.118635654449463, + "step": 2950 + }, + { + "epoch": 0.18, + "learning_rate": 2.9417610812959653e-06, + "logits/chosen": -2.8927760124206543, + "logits/rejected": -2.756986618041992, + "logps/chosen": -100.75493621826172, + "logps/rejected": -650.0506591796875, + "loss": 0.0385, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.28601452708244324, + "rewards/margins": 5.783627510070801, + "rewards/rejected": -6.069641590118408, + "step": 2960 + }, + { + "epoch": 0.18, + "learning_rate": 2.9516994633273705e-06, + "logits/chosen": -2.865492105484009, + "logits/rejected": -2.751094341278076, + "logps/chosen": -112.09814453125, + "logps/rejected": -729.6751708984375, + "loss": 0.0375, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.37113359570503235, + "rewards/margins": 6.464478492736816, + "rewards/rejected": -6.835613250732422, + "step": 2970 + }, + { + "epoch": 0.18, + "learning_rate": 2.9616378453587757e-06, + "logits/chosen": -2.8587982654571533, + "logits/rejected": -2.7710373401641846, + "logps/chosen": -90.50580596923828, + "logps/rejected": -711.6503295898438, + "loss": 0.0318, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2031019926071167, + "rewards/margins": 6.457244873046875, + "rewards/rejected": -6.660346984863281, + "step": 2980 + }, + { + "epoch": 0.18, + "learning_rate": 2.971576227390181e-06, + "logits/chosen": -2.8674347400665283, + "logits/rejected": -2.769869089126587, + "logps/chosen": -125.6655044555664, + "logps/rejected": -752.1784057617188, + "loss": 0.0322, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.580061674118042, + "rewards/margins": 6.492389678955078, + "rewards/rejected": -7.072451591491699, + "step": 2990 + }, + { + "epoch": 0.18, + "learning_rate": 2.981514609421586e-06, + "logits/chosen": -2.8679070472717285, + "logits/rejected": -2.773073673248291, + "logps/chosen": -132.1669921875, + "logps/rejected": -714.5845947265625, + "loss": 0.0475, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6375214457511902, + "rewards/margins": 6.075433731079102, + "rewards/rejected": -6.712954521179199, + "step": 3000 + }, + { + "epoch": 0.18, + "learning_rate": 2.9914529914529914e-06, + "logits/chosen": -2.8939924240112305, + "logits/rejected": -2.80106782913208, + "logps/chosen": -103.74288177490234, + "logps/rejected": -640.6436767578125, + "loss": 0.0372, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2855769991874695, + "rewards/margins": 5.671343803405762, + "rewards/rejected": -5.956920623779297, + "step": 3010 + }, + { + "epoch": 0.18, + "learning_rate": 3.001391373484397e-06, + "logits/chosen": -2.8784306049346924, + "logits/rejected": -2.789215564727783, + "logps/chosen": -74.49238586425781, + "logps/rejected": -657.7066650390625, + "loss": 0.0328, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0775570496916771, + "rewards/margins": 6.071505546569824, + "rewards/rejected": -6.1490631103515625, + "step": 3020 + }, + { + "epoch": 0.18, + "learning_rate": 3.0113297555158023e-06, + "logits/chosen": -2.8953421115875244, + "logits/rejected": -2.7964260578155518, + "logps/chosen": -95.72850799560547, + "logps/rejected": -725.2684326171875, + "loss": 0.0348, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23079900443553925, + "rewards/margins": 6.5758161544799805, + "rewards/rejected": -6.806616306304932, + "step": 3030 + }, + { + "epoch": 0.18, + "learning_rate": 3.0212681375472075e-06, + "logits/chosen": -2.885683536529541, + "logits/rejected": -2.788086414337158, + "logps/chosen": -100.4228515625, + "logps/rejected": -740.9483642578125, + "loss": 0.0424, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2992871403694153, + "rewards/margins": 6.679093837738037, + "rewards/rejected": -6.9783806800842285, + "step": 3040 + }, + { + "epoch": 0.18, + "learning_rate": 3.0312065195786128e-06, + "logits/chosen": -2.8492419719696045, + "logits/rejected": -2.7747936248779297, + "logps/chosen": -89.10025787353516, + "logps/rejected": -634.396728515625, + "loss": 0.0629, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.21653075516223907, + "rewards/margins": 5.674725532531738, + "rewards/rejected": -5.8912553787231445, + "step": 3050 + }, + { + "epoch": 0.18, + "learning_rate": 3.041144901610018e-06, + "logits/chosen": -2.8707456588745117, + "logits/rejected": -2.7658143043518066, + "logps/chosen": -87.15570068359375, + "logps/rejected": -694.0610961914062, + "loss": 0.0265, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2077595740556717, + "rewards/margins": 6.288426399230957, + "rewards/rejected": -6.496186256408691, + "step": 3060 + }, + { + "epoch": 0.18, + "learning_rate": 3.0510832836414232e-06, + "logits/chosen": -2.9070446491241455, + "logits/rejected": -2.780196189880371, + "logps/chosen": -115.8943099975586, + "logps/rejected": -795.0138549804688, + "loss": 0.0205, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5137068033218384, + "rewards/margins": 6.982461452484131, + "rewards/rejected": -7.496167182922363, + "step": 3070 + }, + { + "epoch": 0.18, + "learning_rate": 3.0610216656728285e-06, + "logits/chosen": -2.9139535427093506, + "logits/rejected": -2.8067004680633545, + "logps/chosen": -101.54532623291016, + "logps/rejected": -810.957275390625, + "loss": 0.0267, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3749556541442871, + "rewards/margins": 7.290726661682129, + "rewards/rejected": -7.665682792663574, + "step": 3080 + }, + { + "epoch": 0.18, + "learning_rate": 3.070960047704234e-06, + "logits/chosen": -2.8890957832336426, + "logits/rejected": -2.8002617359161377, + "logps/chosen": -121.07295227050781, + "logps/rejected": -731.0221557617188, + "loss": 0.0308, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4476683735847473, + "rewards/margins": 6.415041923522949, + "rewards/rejected": -6.862710475921631, + "step": 3090 + }, + { + "epoch": 0.18, + "learning_rate": 3.0808984297356394e-06, + "logits/chosen": -2.8968539237976074, + "logits/rejected": -2.8133416175842285, + "logps/chosen": -102.348388671875, + "logps/rejected": -726.0812377929688, + "loss": 0.0368, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.29068514704704285, + "rewards/margins": 6.5149712562561035, + "rewards/rejected": -6.805656433105469, + "step": 3100 + }, + { + "epoch": 0.19, + "learning_rate": 3.0908368117670446e-06, + "logits/chosen": -2.9044034481048584, + "logits/rejected": -2.77488374710083, + "logps/chosen": -95.65206146240234, + "logps/rejected": -806.8651123046875, + "loss": 0.0248, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2751914858818054, + "rewards/margins": 7.348446846008301, + "rewards/rejected": -7.623639106750488, + "step": 3110 + }, + { + "epoch": 0.19, + "learning_rate": 3.10077519379845e-06, + "logits/chosen": -2.899625778198242, + "logits/rejected": -2.8098533153533936, + "logps/chosen": -142.45399475097656, + "logps/rejected": -736.4090576171875, + "loss": 0.0668, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6934425830841064, + "rewards/margins": 6.211747646331787, + "rewards/rejected": -6.905190467834473, + "step": 3120 + }, + { + "epoch": 0.19, + "learning_rate": 3.110713575829855e-06, + "logits/chosen": -2.8865866661071777, + "logits/rejected": -2.812035083770752, + "logps/chosen": -99.13453674316406, + "logps/rejected": -742.4283447265625, + "loss": 0.0198, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24457857012748718, + "rewards/margins": 6.7325944900512695, + "rewards/rejected": -6.9771728515625, + "step": 3130 + }, + { + "epoch": 0.19, + "learning_rate": 3.1206519578612603e-06, + "logits/chosen": -2.8337085247039795, + "logits/rejected": -2.742119789123535, + "logps/chosen": -96.2691879272461, + "logps/rejected": -757.0511474609375, + "loss": 0.0299, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.29792946577072144, + "rewards/margins": 6.821341037750244, + "rewards/rejected": -7.119271278381348, + "step": 3140 + }, + { + "epoch": 0.19, + "learning_rate": 3.1305903398926655e-06, + "logits/chosen": -2.8743724822998047, + "logits/rejected": -2.7733163833618164, + "logps/chosen": -135.0880889892578, + "logps/rejected": -809.2984008789062, + "loss": 0.0255, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6021624803543091, + "rewards/margins": 7.044102668762207, + "rewards/rejected": -7.646264553070068, + "step": 3150 + }, + { + "epoch": 0.19, + "learning_rate": 3.140528721924071e-06, + "logits/chosen": -2.855893850326538, + "logits/rejected": -2.7896721363067627, + "logps/chosen": -103.2000503540039, + "logps/rejected": -722.926513671875, + "loss": 0.0394, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.30653971433639526, + "rewards/margins": 6.48551082611084, + "rewards/rejected": -6.792050838470459, + "step": 3160 + }, + { + "epoch": 0.19, + "learning_rate": 3.1504671039554764e-06, + "logits/chosen": -2.895770311355591, + "logits/rejected": -2.795011520385742, + "logps/chosen": -100.98402404785156, + "logps/rejected": -718.298583984375, + "loss": 0.0353, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3586081862449646, + "rewards/margins": 6.387120246887207, + "rewards/rejected": -6.745728492736816, + "step": 3170 + }, + { + "epoch": 0.19, + "learning_rate": 3.1604054859868816e-06, + "logits/chosen": -2.8831334114074707, + "logits/rejected": -2.772667407989502, + "logps/chosen": -98.63158416748047, + "logps/rejected": -748.1744384765625, + "loss": 0.0372, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2982601225376129, + "rewards/margins": 6.739428520202637, + "rewards/rejected": -7.037688255310059, + "step": 3180 + }, + { + "epoch": 0.19, + "learning_rate": 3.170343868018287e-06, + "logits/chosen": -2.909309148788452, + "logits/rejected": -2.823024272918701, + "logps/chosen": -123.8080825805664, + "logps/rejected": -781.291259765625, + "loss": 0.0244, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4999138414859772, + "rewards/margins": 6.857600212097168, + "rewards/rejected": -7.35751485824585, + "step": 3190 + }, + { + "epoch": 0.19, + "learning_rate": 3.180282250049692e-06, + "logits/chosen": -2.8730711936950684, + "logits/rejected": -2.782789945602417, + "logps/chosen": -109.0028305053711, + "logps/rejected": -754.8485107421875, + "loss": 0.0282, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.30224376916885376, + "rewards/margins": 6.802436828613281, + "rewards/rejected": -7.104680061340332, + "step": 3200 + }, + { + "epoch": 0.19, + "learning_rate": 3.1902206320810973e-06, + "logits/chosen": -2.870027542114258, + "logits/rejected": -2.7576565742492676, + "logps/chosen": -119.55351257324219, + "logps/rejected": -850.5467529296875, + "loss": 0.0204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5007976293563843, + "rewards/margins": 7.551764488220215, + "rewards/rejected": -8.052562713623047, + "step": 3210 + }, + { + "epoch": 0.19, + "learning_rate": 3.2001590141125026e-06, + "logits/chosen": -2.8452816009521484, + "logits/rejected": -2.772615909576416, + "logps/chosen": -128.43690490722656, + "logps/rejected": -804.0809326171875, + "loss": 0.0375, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6223068833351135, + "rewards/margins": 6.970645904541016, + "rewards/rejected": -7.592953681945801, + "step": 3220 + }, + { + "epoch": 0.19, + "learning_rate": 3.210097396143908e-06, + "logits/chosen": -2.8709464073181152, + "logits/rejected": -2.815075397491455, + "logps/chosen": -118.72892761230469, + "logps/rejected": -690.2423095703125, + "loss": 0.0295, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5030520558357239, + "rewards/margins": 5.958141326904297, + "rewards/rejected": -6.461194038391113, + "step": 3230 + }, + { + "epoch": 0.19, + "learning_rate": 3.2200357781753134e-06, + "logits/chosen": -2.86689829826355, + "logits/rejected": -2.7892942428588867, + "logps/chosen": -109.1916732788086, + "logps/rejected": -629.1363525390625, + "loss": 0.0457, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3853798508644104, + "rewards/margins": 5.479775905609131, + "rewards/rejected": -5.865156173706055, + "step": 3240 + }, + { + "epoch": 0.19, + "learning_rate": 3.2299741602067187e-06, + "logits/chosen": -2.858123302459717, + "logits/rejected": -2.75048828125, + "logps/chosen": -85.06198120117188, + "logps/rejected": -805.44189453125, + "loss": 0.0265, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13197161257266998, + "rewards/margins": 7.476771354675293, + "rewards/rejected": -7.608744144439697, + "step": 3250 + }, + { + "epoch": 0.19, + "learning_rate": 3.239912542238124e-06, + "logits/chosen": -2.873476028442383, + "logits/rejected": -2.787125587463379, + "logps/chosen": -101.66516876220703, + "logps/rejected": -781.8282470703125, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2967422306537628, + "rewards/margins": 7.064032554626465, + "rewards/rejected": -7.360775947570801, + "step": 3260 + }, + { + "epoch": 0.19, + "learning_rate": 3.249850924269529e-06, + "logits/chosen": -2.8262104988098145, + "logits/rejected": -2.761509418487549, + "logps/chosen": -108.12437438964844, + "logps/rejected": -756.9655151367188, + "loss": 0.0338, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3981439471244812, + "rewards/margins": 6.725272178649902, + "rewards/rejected": -7.123415946960449, + "step": 3270 + }, + { + "epoch": 0.2, + "learning_rate": 3.2597893063009344e-06, + "logits/chosen": -2.8656084537506104, + "logits/rejected": -2.734905481338501, + "logps/chosen": -104.09513092041016, + "logps/rejected": -784.6286010742188, + "loss": 0.0251, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2940555214881897, + "rewards/margins": 7.11794376373291, + "rewards/rejected": -7.411998748779297, + "step": 3280 + }, + { + "epoch": 0.2, + "learning_rate": 3.2697276883323396e-06, + "logits/chosen": -2.8959453105926514, + "logits/rejected": -2.8011155128479004, + "logps/chosen": -109.53990173339844, + "logps/rejected": -778.5452880859375, + "loss": 0.0272, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.42634859681129456, + "rewards/margins": 6.917084693908691, + "rewards/rejected": -7.343432426452637, + "step": 3290 + }, + { + "epoch": 0.2, + "learning_rate": 3.2796660703637452e-06, + "logits/chosen": -2.8623619079589844, + "logits/rejected": -2.7654006481170654, + "logps/chosen": -99.03923034667969, + "logps/rejected": -755.8483276367188, + "loss": 0.0369, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2554091215133667, + "rewards/margins": 6.852990627288818, + "rewards/rejected": -7.108399868011475, + "step": 3300 + }, + { + "epoch": 0.2, + "learning_rate": 3.2896044523951505e-06, + "logits/chosen": -2.8669679164886475, + "logits/rejected": -2.773939609527588, + "logps/chosen": -114.67442321777344, + "logps/rejected": -838.4226684570312, + "loss": 0.0252, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.43710631132125854, + "rewards/margins": 7.500557899475098, + "rewards/rejected": -7.937664985656738, + "step": 3310 + }, + { + "epoch": 0.2, + "learning_rate": 3.2995428344265557e-06, + "logits/chosen": -2.8922908306121826, + "logits/rejected": -2.831644058227539, + "logps/chosen": -128.57049560546875, + "logps/rejected": -792.2843017578125, + "loss": 0.0433, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6134609580039978, + "rewards/margins": 6.868658542633057, + "rewards/rejected": -7.482119560241699, + "step": 3320 + }, + { + "epoch": 0.2, + "learning_rate": 3.309481216457961e-06, + "logits/chosen": -2.895716905593872, + "logits/rejected": -2.8028347492218018, + "logps/chosen": -84.60770416259766, + "logps/rejected": -719.1607055664062, + "loss": 0.0263, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12492235004901886, + "rewards/margins": 6.625997066497803, + "rewards/rejected": -6.750920295715332, + "step": 3330 + }, + { + "epoch": 0.2, + "learning_rate": 3.319419598489366e-06, + "logits/chosen": -2.871966600418091, + "logits/rejected": -2.804713249206543, + "logps/chosen": -84.00405883789062, + "logps/rejected": -698.4354248046875, + "loss": 0.0454, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15816029906272888, + "rewards/margins": 6.397922515869141, + "rewards/rejected": -6.556082725524902, + "step": 3340 + }, + { + "epoch": 0.2, + "learning_rate": 3.3293579805207714e-06, + "logits/chosen": -2.847782611846924, + "logits/rejected": -2.7515032291412354, + "logps/chosen": -110.1525650024414, + "logps/rejected": -759.5135498046875, + "loss": 0.0289, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4128446578979492, + "rewards/margins": 6.728204250335693, + "rewards/rejected": -7.141049385070801, + "step": 3350 + }, + { + "epoch": 0.2, + "learning_rate": 3.3392963625521766e-06, + "logits/chosen": -2.887519121170044, + "logits/rejected": -2.8123087882995605, + "logps/chosen": -101.87709045410156, + "logps/rejected": -814.0603637695312, + "loss": 0.0295, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3294782042503357, + "rewards/margins": 7.369837760925293, + "rewards/rejected": -7.699315547943115, + "step": 3360 + }, + { + "epoch": 0.2, + "learning_rate": 3.3492347445835823e-06, + "logits/chosen": -2.9011664390563965, + "logits/rejected": -2.798304319381714, + "logps/chosen": -93.92784118652344, + "logps/rejected": -771.7990112304688, + "loss": 0.0267, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22625771164894104, + "rewards/margins": 7.052165985107422, + "rewards/rejected": -7.2784247398376465, + "step": 3370 + }, + { + "epoch": 0.2, + "learning_rate": 3.3591731266149875e-06, + "logits/chosen": -2.88793683052063, + "logits/rejected": -2.7970547676086426, + "logps/chosen": -90.79705810546875, + "logps/rejected": -872.1009521484375, + "loss": 0.0176, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21846094727516174, + "rewards/margins": 8.04808521270752, + "rewards/rejected": -8.266546249389648, + "step": 3380 + }, + { + "epoch": 0.2, + "learning_rate": 3.3691115086463927e-06, + "logits/chosen": -2.896589756011963, + "logits/rejected": -2.814263105392456, + "logps/chosen": -99.1661376953125, + "logps/rejected": -772.0339965820312, + "loss": 0.0272, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.30495157837867737, + "rewards/margins": 6.970147132873535, + "rewards/rejected": -7.275099277496338, + "step": 3390 + }, + { + "epoch": 0.2, + "learning_rate": 3.379049890677798e-06, + "logits/chosen": -2.8762378692626953, + "logits/rejected": -2.746377468109131, + "logps/chosen": -88.71979522705078, + "logps/rejected": -828.6023559570312, + "loss": 0.022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1964990496635437, + "rewards/margins": 7.65085506439209, + "rewards/rejected": -7.847353935241699, + "step": 3400 + }, + { + "epoch": 0.2, + "learning_rate": 3.388988272709203e-06, + "logits/chosen": -2.868356227874756, + "logits/rejected": -2.7811756134033203, + "logps/chosen": -106.02751159667969, + "logps/rejected": -779.9949951171875, + "loss": 0.0325, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.386616051197052, + "rewards/margins": 6.966149806976318, + "rewards/rejected": -7.3527655601501465, + "step": 3410 + }, + { + "epoch": 0.2, + "learning_rate": 3.3989266547406084e-06, + "logits/chosen": -2.87636661529541, + "logits/rejected": -2.7978787422180176, + "logps/chosen": -101.76636505126953, + "logps/rejected": -801.4238891601562, + "loss": 0.0311, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2667098641395569, + "rewards/margins": 7.293368339538574, + "rewards/rejected": -7.5600786209106445, + "step": 3420 + }, + { + "epoch": 0.2, + "learning_rate": 3.4088650367720137e-06, + "logits/chosen": -2.8780012130737305, + "logits/rejected": -2.7871527671813965, + "logps/chosen": -105.42164611816406, + "logps/rejected": -681.2168579101562, + "loss": 0.0642, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.3864505887031555, + "rewards/margins": 5.998083591461182, + "rewards/rejected": -6.3845343589782715, + "step": 3430 + }, + { + "epoch": 0.21, + "learning_rate": 3.4188034188034193e-06, + "logits/chosen": -2.8682215213775635, + "logits/rejected": -2.7619152069091797, + "logps/chosen": -97.71653747558594, + "logps/rejected": -776.6664428710938, + "loss": 0.0308, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2618548572063446, + "rewards/margins": 7.072068214416504, + "rewards/rejected": -7.33392333984375, + "step": 3440 + }, + { + "epoch": 0.21, + "learning_rate": 3.4287418008348246e-06, + "logits/chosen": -2.90720272064209, + "logits/rejected": -2.794860363006592, + "logps/chosen": -106.35716247558594, + "logps/rejected": -762.6890869140625, + "loss": 0.14, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.36655163764953613, + "rewards/margins": 6.828574180603027, + "rewards/rejected": -7.195125579833984, + "step": 3450 + }, + { + "epoch": 0.21, + "learning_rate": 3.4386801828662298e-06, + "logits/chosen": -2.8883166313171387, + "logits/rejected": -2.779663562774658, + "logps/chosen": -89.3853759765625, + "logps/rejected": -755.2701416015625, + "loss": 0.041, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19726444780826569, + "rewards/margins": 6.923166751861572, + "rewards/rejected": -7.120430946350098, + "step": 3460 + }, + { + "epoch": 0.21, + "learning_rate": 3.448618564897635e-06, + "logits/chosen": -2.882200002670288, + "logits/rejected": -2.7843189239501953, + "logps/chosen": -114.78018951416016, + "logps/rejected": -751.6347045898438, + "loss": 0.0271, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4448794424533844, + "rewards/margins": 6.623892307281494, + "rewards/rejected": -7.0687713623046875, + "step": 3470 + }, + { + "epoch": 0.21, + "learning_rate": 3.4585569469290402e-06, + "logits/chosen": -2.891385555267334, + "logits/rejected": -2.831627368927002, + "logps/chosen": -97.48558044433594, + "logps/rejected": -726.0615234375, + "loss": 0.0291, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2701844573020935, + "rewards/margins": 6.558047294616699, + "rewards/rejected": -6.828232765197754, + "step": 3480 + }, + { + "epoch": 0.21, + "learning_rate": 3.4684953289604455e-06, + "logits/chosen": -2.8722965717315674, + "logits/rejected": -2.790235996246338, + "logps/chosen": -123.46501159667969, + "logps/rejected": -749.0413208007812, + "loss": 0.0504, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5368173718452454, + "rewards/margins": 6.509427070617676, + "rewards/rejected": -7.046244144439697, + "step": 3490 + }, + { + "epoch": 0.21, + "learning_rate": 3.4784337109918507e-06, + "logits/chosen": -2.886448860168457, + "logits/rejected": -2.8091464042663574, + "logps/chosen": -101.61396789550781, + "logps/rejected": -792.6351318359375, + "loss": 0.0267, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.32578447461128235, + "rewards/margins": 7.169427394866943, + "rewards/rejected": -7.495211601257324, + "step": 3500 + }, + { + "epoch": 0.21, + "learning_rate": 3.4883720930232564e-06, + "logits/chosen": -2.860628843307495, + "logits/rejected": -2.749011993408203, + "logps/chosen": -103.96290588378906, + "logps/rejected": -800.7093505859375, + "loss": 0.0396, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.30552858114242554, + "rewards/margins": 7.246849060058594, + "rewards/rejected": -7.5523786544799805, + "step": 3510 + }, + { + "epoch": 0.21, + "learning_rate": 3.4983104750546616e-06, + "logits/chosen": -2.8787035942077637, + "logits/rejected": -2.7889010906219482, + "logps/chosen": -100.57218170166016, + "logps/rejected": -846.87548828125, + "loss": 0.0274, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.25877290964126587, + "rewards/margins": 7.759941101074219, + "rewards/rejected": -8.018714904785156, + "step": 3520 + }, + { + "epoch": 0.21, + "learning_rate": 3.508248857086067e-06, + "logits/chosen": -2.8859002590179443, + "logits/rejected": -2.8225820064544678, + "logps/chosen": -79.11035919189453, + "logps/rejected": -674.9633178710938, + "loss": 0.0346, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13291481137275696, + "rewards/margins": 6.167105674743652, + "rewards/rejected": -6.300020217895508, + "step": 3530 + }, + { + "epoch": 0.21, + "learning_rate": 3.518187239117472e-06, + "logits/chosen": -2.8860044479370117, + "logits/rejected": -2.7526087760925293, + "logps/chosen": -96.11659240722656, + "logps/rejected": -856.5525512695312, + "loss": 0.0202, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2972283959388733, + "rewards/margins": 7.823326110839844, + "rewards/rejected": -8.120553970336914, + "step": 3540 + }, + { + "epoch": 0.21, + "learning_rate": 3.5281256211488773e-06, + "logits/chosen": -2.8699569702148438, + "logits/rejected": -2.767526388168335, + "logps/chosen": -124.4377670288086, + "logps/rejected": -745.9036254882812, + "loss": 0.0431, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5347416996955872, + "rewards/margins": 6.48624324798584, + "rewards/rejected": -7.020984649658203, + "step": 3550 + }, + { + "epoch": 0.21, + "learning_rate": 3.5380640031802825e-06, + "logits/chosen": -2.896300792694092, + "logits/rejected": -2.7928502559661865, + "logps/chosen": -118.41725158691406, + "logps/rejected": -837.6329956054688, + "loss": 0.0358, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.5057901740074158, + "rewards/margins": 7.4295806884765625, + "rewards/rejected": -7.935372352600098, + "step": 3560 + }, + { + "epoch": 0.21, + "learning_rate": 3.5480023852116878e-06, + "logits/chosen": -2.8776259422302246, + "logits/rejected": -2.789034128189087, + "logps/chosen": -102.14850616455078, + "logps/rejected": -792.4380493164062, + "loss": 0.0385, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.33452343940734863, + "rewards/margins": 7.152226448059082, + "rewards/rejected": -7.48675012588501, + "step": 3570 + }, + { + "epoch": 0.21, + "learning_rate": 3.5579407672430934e-06, + "logits/chosen": -2.926281452178955, + "logits/rejected": -2.7721123695373535, + "logps/chosen": -71.54127502441406, + "logps/rejected": -763.0891723632812, + "loss": 0.0202, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07757097482681274, + "rewards/margins": 7.1107587814331055, + "rewards/rejected": -7.188328742980957, + "step": 3580 + }, + { + "epoch": 0.21, + "learning_rate": 3.5678791492744986e-06, + "logits/chosen": -2.902859926223755, + "logits/rejected": -2.7921199798583984, + "logps/chosen": -78.5753402709961, + "logps/rejected": -803.6421508789062, + "loss": 0.0209, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10415215790271759, + "rewards/margins": 7.490170955657959, + "rewards/rejected": -7.594323635101318, + "step": 3590 + }, + { + "epoch": 0.21, + "learning_rate": 3.577817531305904e-06, + "logits/chosen": -2.871922254562378, + "logits/rejected": -2.7558035850524902, + "logps/chosen": -105.42036437988281, + "logps/rejected": -836.2210693359375, + "loss": 0.03, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3553544580936432, + "rewards/margins": 7.554825782775879, + "rewards/rejected": -7.910179138183594, + "step": 3600 + }, + { + "epoch": 0.22, + "learning_rate": 3.587755913337309e-06, + "logits/chosen": -2.856423854827881, + "logits/rejected": -2.780958652496338, + "logps/chosen": -122.5890884399414, + "logps/rejected": -756.6810302734375, + "loss": 0.0533, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.5257158279418945, + "rewards/margins": 6.6078782081604, + "rewards/rejected": -7.133594512939453, + "step": 3610 + }, + { + "epoch": 0.22, + "learning_rate": 3.5976942953687143e-06, + "logits/chosen": -2.9001832008361816, + "logits/rejected": -2.803459644317627, + "logps/chosen": -94.45865631103516, + "logps/rejected": -795.5335693359375, + "loss": 0.0228, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20842942595481873, + "rewards/margins": 7.285524845123291, + "rewards/rejected": -7.493954658508301, + "step": 3620 + }, + { + "epoch": 0.22, + "learning_rate": 3.6076326774001196e-06, + "logits/chosen": -2.881312847137451, + "logits/rejected": -2.7586350440979004, + "logps/chosen": -108.30732727050781, + "logps/rejected": -772.4247436523438, + "loss": 0.0237, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3612791895866394, + "rewards/margins": 6.921075344085693, + "rewards/rejected": -7.282354831695557, + "step": 3630 + }, + { + "epoch": 0.22, + "learning_rate": 3.617571059431525e-06, + "logits/chosen": -2.8905980587005615, + "logits/rejected": -2.7974772453308105, + "logps/chosen": -136.20376586914062, + "logps/rejected": -755.3028564453125, + "loss": 0.0293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6881308555603027, + "rewards/margins": 6.412988185882568, + "rewards/rejected": -7.101118564605713, + "step": 3640 + }, + { + "epoch": 0.22, + "learning_rate": 3.6275094414629304e-06, + "logits/chosen": -2.8601372241973877, + "logits/rejected": -2.7957239151000977, + "logps/chosen": -102.56700134277344, + "logps/rejected": -792.2536010742188, + "loss": 0.0308, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3082342743873596, + "rewards/margins": 7.159157752990723, + "rewards/rejected": -7.4673919677734375, + "step": 3650 + }, + { + "epoch": 0.22, + "learning_rate": 3.6374478234943357e-06, + "logits/chosen": -2.8865582942962646, + "logits/rejected": -2.798985719680786, + "logps/chosen": -125.89750671386719, + "logps/rejected": -893.3277587890625, + "loss": 0.0224, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6020795702934265, + "rewards/margins": 7.8711042404174805, + "rewards/rejected": -8.473184585571289, + "step": 3660 + }, + { + "epoch": 0.22, + "learning_rate": 3.647386205525741e-06, + "logits/chosen": -2.879674196243286, + "logits/rejected": -2.7857232093811035, + "logps/chosen": -83.89434051513672, + "logps/rejected": -848.2125244140625, + "loss": 0.0334, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10604560375213623, + "rewards/margins": 7.938258171081543, + "rewards/rejected": -8.044303894042969, + "step": 3670 + }, + { + "epoch": 0.22, + "learning_rate": 3.657324587557146e-06, + "logits/chosen": -2.915391445159912, + "logits/rejected": -2.8057541847229004, + "logps/chosen": -87.86251831054688, + "logps/rejected": -910.5408325195312, + "loss": 0.0223, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13155916333198547, + "rewards/margins": 8.521614074707031, + "rewards/rejected": -8.653173446655273, + "step": 3680 + }, + { + "epoch": 0.22, + "learning_rate": 3.6672629695885514e-06, + "logits/chosen": -2.8931918144226074, + "logits/rejected": -2.7850656509399414, + "logps/chosen": -109.395751953125, + "logps/rejected": -890.0810546875, + "loss": 0.0218, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3844446837902069, + "rewards/margins": 8.073087692260742, + "rewards/rejected": -8.45753288269043, + "step": 3690 + }, + { + "epoch": 0.22, + "learning_rate": 3.6772013516199566e-06, + "logits/chosen": -2.835175037384033, + "logits/rejected": -2.7215051651000977, + "logps/chosen": -101.25102233886719, + "logps/rejected": -792.8087158203125, + "loss": 0.0382, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.3104367256164551, + "rewards/margins": 7.1722235679626465, + "rewards/rejected": -7.482659816741943, + "step": 3700 + }, + { + "epoch": 0.22, + "learning_rate": 3.687139733651362e-06, + "logits/chosen": -2.8781869411468506, + "logits/rejected": -2.78737211227417, + "logps/chosen": -100.94713592529297, + "logps/rejected": -836.3469848632812, + "loss": 0.0312, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3204847276210785, + "rewards/margins": 7.596199989318848, + "rewards/rejected": -7.916684627532959, + "step": 3710 + }, + { + "epoch": 0.22, + "learning_rate": 3.6970781156827675e-06, + "logits/chosen": -2.888472080230713, + "logits/rejected": -2.7729315757751465, + "logps/chosen": -82.83673095703125, + "logps/rejected": -830.0226440429688, + "loss": 0.0398, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16177232563495636, + "rewards/margins": 7.697686195373535, + "rewards/rejected": -7.859457969665527, + "step": 3720 + }, + { + "epoch": 0.22, + "learning_rate": 3.7070164977141727e-06, + "logits/chosen": -2.896989107131958, + "logits/rejected": -2.809595823287964, + "logps/chosen": -73.58125305175781, + "logps/rejected": -815.8229370117188, + "loss": 0.0285, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.03899950906634331, + "rewards/margins": 7.6676926612854, + "rewards/rejected": -7.706692695617676, + "step": 3730 + }, + { + "epoch": 0.22, + "learning_rate": 3.716954879745578e-06, + "logits/chosen": -2.8610644340515137, + "logits/rejected": -2.7803854942321777, + "logps/chosen": -93.3351821899414, + "logps/rejected": -812.0931396484375, + "loss": 0.0289, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23351725935935974, + "rewards/margins": 7.439298152923584, + "rewards/rejected": -7.672814846038818, + "step": 3740 + }, + { + "epoch": 0.22, + "learning_rate": 3.726893261776983e-06, + "logits/chosen": -2.8699851036071777, + "logits/rejected": -2.8002772331237793, + "logps/chosen": -100.68340301513672, + "logps/rejected": -794.9088134765625, + "loss": 0.0478, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3278631567955017, + "rewards/margins": 7.1846208572387695, + "rewards/rejected": -7.512484073638916, + "step": 3750 + }, + { + "epoch": 0.22, + "learning_rate": 3.7368316438083884e-06, + "logits/chosen": -2.880840301513672, + "logits/rejected": -2.760206699371338, + "logps/chosen": -108.13330078125, + "logps/rejected": -873.52099609375, + "loss": 0.0236, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.39847293496131897, + "rewards/margins": 7.872894287109375, + "rewards/rejected": -8.271367073059082, + "step": 3760 + }, + { + "epoch": 0.22, + "learning_rate": 3.7467700258397936e-06, + "logits/chosen": -2.879608154296875, + "logits/rejected": -2.8046677112579346, + "logps/chosen": -86.13502502441406, + "logps/rejected": -779.44921875, + "loss": 0.023, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19561168551445007, + "rewards/margins": 7.156313896179199, + "rewards/rejected": -7.351926326751709, + "step": 3770 + }, + { + "epoch": 0.23, + "learning_rate": 3.756708407871199e-06, + "logits/chosen": -2.867663860321045, + "logits/rejected": -2.7800052165985107, + "logps/chosen": -99.9644546508789, + "logps/rejected": -821.3148193359375, + "loss": 0.0287, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.29270297288894653, + "rewards/margins": 7.474002838134766, + "rewards/rejected": -7.766706943511963, + "step": 3780 + }, + { + "epoch": 0.23, + "learning_rate": 3.7666467899026045e-06, + "logits/chosen": -2.877105236053467, + "logits/rejected": -2.7769827842712402, + "logps/chosen": -92.62809753417969, + "logps/rejected": -827.7981567382812, + "loss": 0.0288, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.26205572485923767, + "rewards/margins": 7.566201686859131, + "rewards/rejected": -7.8282575607299805, + "step": 3790 + }, + { + "epoch": 0.23, + "learning_rate": 3.7765851719340098e-06, + "logits/chosen": -2.906179428100586, + "logits/rejected": -2.797752857208252, + "logps/chosen": -116.90071105957031, + "logps/rejected": -897.9083862304688, + "loss": 0.0199, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4460543096065521, + "rewards/margins": 8.078927040100098, + "rewards/rejected": -8.524980545043945, + "step": 3800 + }, + { + "epoch": 0.23, + "learning_rate": 3.786523553965415e-06, + "logits/chosen": -2.8837838172912598, + "logits/rejected": -2.7829415798187256, + "logps/chosen": -108.4169921875, + "logps/rejected": -881.8220825195312, + "loss": 0.0202, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.40168505907058716, + "rewards/margins": 7.981532096862793, + "rewards/rejected": -8.383216857910156, + "step": 3810 + }, + { + "epoch": 0.23, + "learning_rate": 3.7964619359968202e-06, + "logits/chosen": -2.858712673187256, + "logits/rejected": -2.7730491161346436, + "logps/chosen": -81.34638977050781, + "logps/rejected": -772.7445068359375, + "loss": 0.0458, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14849510788917542, + "rewards/margins": 7.142930507659912, + "rewards/rejected": -7.291426181793213, + "step": 3820 + }, + { + "epoch": 0.23, + "learning_rate": 3.8064003180282254e-06, + "logits/chosen": -2.9057259559631348, + "logits/rejected": -2.831407070159912, + "logps/chosen": -76.30581665039062, + "logps/rejected": -745.2847900390625, + "loss": 0.0314, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05467040464282036, + "rewards/margins": 6.951416969299316, + "rewards/rejected": -7.006086826324463, + "step": 3830 + }, + { + "epoch": 0.23, + "learning_rate": 3.816338700059631e-06, + "logits/chosen": -2.8766393661499023, + "logits/rejected": -2.806602954864502, + "logps/chosen": -87.28684997558594, + "logps/rejected": -762.3047485351562, + "loss": 0.0272, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14597368240356445, + "rewards/margins": 7.0367608070373535, + "rewards/rejected": -7.182734489440918, + "step": 3840 + }, + { + "epoch": 0.23, + "learning_rate": 3.826277082091036e-06, + "logits/chosen": -2.8573126792907715, + "logits/rejected": -2.762861490249634, + "logps/chosen": -102.32160949707031, + "logps/rejected": -865.3707275390625, + "loss": 0.0246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.338026762008667, + "rewards/margins": 7.863969326019287, + "rewards/rejected": -8.201996803283691, + "step": 3850 + }, + { + "epoch": 0.23, + "learning_rate": 3.836215464122441e-06, + "logits/chosen": -2.8573455810546875, + "logits/rejected": -2.7690844535827637, + "logps/chosen": -109.85398864746094, + "logps/rejected": -746.8798217773438, + "loss": 0.0291, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3081837594509125, + "rewards/margins": 6.719918727874756, + "rewards/rejected": -7.028102874755859, + "step": 3860 + }, + { + "epoch": 0.23, + "learning_rate": 3.846153846153847e-06, + "logits/chosen": -2.8753738403320312, + "logits/rejected": -2.805210590362549, + "logps/chosen": -90.96142578125, + "logps/rejected": -845.4622192382812, + "loss": 0.0256, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17259983718395233, + "rewards/margins": 7.8229851722717285, + "rewards/rejected": -7.995584011077881, + "step": 3870 + }, + { + "epoch": 0.23, + "learning_rate": 3.856092228185252e-06, + "logits/chosen": -2.8908207416534424, + "logits/rejected": -2.7796504497528076, + "logps/chosen": -74.8409194946289, + "logps/rejected": -792.25732421875, + "loss": 0.0438, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10067535936832428, + "rewards/margins": 7.377572536468506, + "rewards/rejected": -7.47824764251709, + "step": 3880 + }, + { + "epoch": 0.23, + "learning_rate": 3.866030610216657e-06, + "logits/chosen": -2.864095687866211, + "logits/rejected": -2.782834768295288, + "logps/chosen": -90.51029968261719, + "logps/rejected": -771.181640625, + "loss": 0.0192, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17342619597911835, + "rewards/margins": 7.100832939147949, + "rewards/rejected": -7.2742600440979, + "step": 3890 + }, + { + "epoch": 0.23, + "learning_rate": 3.875968992248063e-06, + "logits/chosen": -2.8606183528900146, + "logits/rejected": -2.7344233989715576, + "logps/chosen": -110.42926025390625, + "logps/rejected": -805.1851196289062, + "loss": 0.0293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.36736783385276794, + "rewards/margins": 7.236051082611084, + "rewards/rejected": -7.603418827056885, + "step": 3900 + }, + { + "epoch": 0.23, + "learning_rate": 3.885907374279468e-06, + "logits/chosen": -2.889190196990967, + "logits/rejected": -2.76423978805542, + "logps/chosen": -81.89059448242188, + "logps/rejected": -892.5358276367188, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09410565346479416, + "rewards/margins": 8.393627166748047, + "rewards/rejected": -8.487733840942383, + "step": 3910 + }, + { + "epoch": 0.23, + "learning_rate": 3.895845756310873e-06, + "logits/chosen": -2.8750643730163574, + "logits/rejected": -2.7506401538848877, + "logps/chosen": -95.61688232421875, + "logps/rejected": -886.45361328125, + "loss": 0.0284, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19417890906333923, + "rewards/margins": 8.221367835998535, + "rewards/rejected": -8.415545463562012, + "step": 3920 + }, + { + "epoch": 0.23, + "learning_rate": 3.905784138342278e-06, + "logits/chosen": -2.9076006412506104, + "logits/rejected": -2.7855331897735596, + "logps/chosen": -81.91569519042969, + "logps/rejected": -764.2933349609375, + "loss": 0.0338, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12308193743228912, + "rewards/margins": 7.089491844177246, + "rewards/rejected": -7.212574005126953, + "step": 3930 + }, + { + "epoch": 0.23, + "learning_rate": 3.915722520373684e-06, + "logits/chosen": -2.8755691051483154, + "logits/rejected": -2.806122064590454, + "logps/chosen": -107.7904052734375, + "logps/rejected": -838.9762573242188, + "loss": 0.0204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3728943467140198, + "rewards/margins": 7.574209690093994, + "rewards/rejected": -7.947103977203369, + "step": 3940 + }, + { + "epoch": 0.24, + "learning_rate": 3.925660902405089e-06, + "logits/chosen": -2.88643479347229, + "logits/rejected": -2.8059637546539307, + "logps/chosen": -113.22367858886719, + "logps/rejected": -866.9595947265625, + "loss": 0.0263, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4272967278957367, + "rewards/margins": 7.786580562591553, + "rewards/rejected": -8.21387767791748, + "step": 3950 + }, + { + "epoch": 0.24, + "learning_rate": 3.935599284436494e-06, + "logits/chosen": -2.876192569732666, + "logits/rejected": -2.7956976890563965, + "logps/chosen": -84.031982421875, + "logps/rejected": -840.7574462890625, + "loss": 0.0255, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1334693431854248, + "rewards/margins": 7.81915283203125, + "rewards/rejected": -7.9526214599609375, + "step": 3960 + }, + { + "epoch": 0.24, + "learning_rate": 3.9455376664679e-06, + "logits/chosen": -2.87864089012146, + "logits/rejected": -2.789992332458496, + "logps/chosen": -99.45648193359375, + "logps/rejected": -884.9114990234375, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2999047636985779, + "rewards/margins": 8.109491348266602, + "rewards/rejected": -8.40939712524414, + "step": 3970 + }, + { + "epoch": 0.24, + "learning_rate": 3.955476048499305e-06, + "logits/chosen": -2.8718109130859375, + "logits/rejected": -2.7711336612701416, + "logps/chosen": -98.18769836425781, + "logps/rejected": -813.6920776367188, + "loss": 0.024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3121017515659332, + "rewards/margins": 7.382171630859375, + "rewards/rejected": -7.694273471832275, + "step": 3980 + }, + { + "epoch": 0.24, + "learning_rate": 3.96541443053071e-06, + "logits/chosen": -2.9112489223480225, + "logits/rejected": -2.793928861618042, + "logps/chosen": -103.82417297363281, + "logps/rejected": -828.8631591796875, + "loss": 0.0294, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22519716620445251, + "rewards/margins": 7.631234169006348, + "rewards/rejected": -7.856431007385254, + "step": 3990 + }, + { + "epoch": 0.24, + "learning_rate": 3.975352812562115e-06, + "logits/chosen": -2.9005661010742188, + "logits/rejected": -2.808104991912842, + "logps/chosen": -97.10401916503906, + "logps/rejected": -915.4630737304688, + "loss": 0.0356, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.2212142050266266, + "rewards/margins": 8.492456436157227, + "rewards/rejected": -8.71367073059082, + "step": 4000 + }, + { + "epoch": 0.24, + "learning_rate": 3.985291194593521e-06, + "logits/chosen": -2.891897678375244, + "logits/rejected": -2.814427137374878, + "logps/chosen": -99.99630737304688, + "logps/rejected": -845.1765747070312, + "loss": 0.0332, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.33133023977279663, + "rewards/margins": 7.669382572174072, + "rewards/rejected": -8.000712394714355, + "step": 4010 + }, + { + "epoch": 0.24, + "learning_rate": 3.995229576624926e-06, + "logits/chosen": -2.9120559692382812, + "logits/rejected": -2.80311918258667, + "logps/chosen": -89.0915298461914, + "logps/rejected": -895.9578247070312, + "loss": 0.0234, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1533184051513672, + "rewards/margins": 8.355627059936523, + "rewards/rejected": -8.50894546508789, + "step": 4020 + }, + { + "epoch": 0.24, + "learning_rate": 4.005167958656331e-06, + "logits/chosen": -2.8761541843414307, + "logits/rejected": -2.7823047637939453, + "logps/chosen": -94.7694091796875, + "logps/rejected": -935.4983520507812, + "loss": 0.0176, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2034631222486496, + "rewards/margins": 8.69825553894043, + "rewards/rejected": -8.901717185974121, + "step": 4030 + }, + { + "epoch": 0.24, + "learning_rate": 4.015106340687737e-06, + "logits/chosen": -2.860116481781006, + "logits/rejected": -2.7542643547058105, + "logps/chosen": -94.5132827758789, + "logps/rejected": -921.6387939453125, + "loss": 0.0782, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21807973086833954, + "rewards/margins": 8.567270278930664, + "rewards/rejected": -8.785350799560547, + "step": 4040 + }, + { + "epoch": 0.24, + "learning_rate": 4.025044722719142e-06, + "logits/chosen": -2.8903229236602783, + "logits/rejected": -2.766087055206299, + "logps/chosen": -102.55953216552734, + "logps/rejected": -963.88818359375, + "loss": 0.0244, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.282512366771698, + "rewards/margins": 8.909109115600586, + "rewards/rejected": -9.191620826721191, + "step": 4050 + }, + { + "epoch": 0.24, + "learning_rate": 4.034983104750547e-06, + "logits/chosen": -2.8861663341522217, + "logits/rejected": -2.800924777984619, + "logps/chosen": -72.39839172363281, + "logps/rejected": -914.64501953125, + "loss": 0.0165, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11409604549407959, + "rewards/margins": 8.589935302734375, + "rewards/rejected": -8.704030990600586, + "step": 4060 + }, + { + "epoch": 0.24, + "learning_rate": 4.044921486781952e-06, + "logits/chosen": -2.8806681632995605, + "logits/rejected": -2.8021364212036133, + "logps/chosen": -83.6947021484375, + "logps/rejected": -875.9315185546875, + "loss": 0.0285, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1401219666004181, + "rewards/margins": 8.178439140319824, + "rewards/rejected": -8.318561553955078, + "step": 4070 + }, + { + "epoch": 0.24, + "learning_rate": 4.054859868813357e-06, + "logits/chosen": -2.873992443084717, + "logits/rejected": -2.77535343170166, + "logps/chosen": -113.083740234375, + "logps/rejected": -830.1140747070312, + "loss": 0.0388, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.42511075735092163, + "rewards/margins": 7.4380059242248535, + "rewards/rejected": -7.863117218017578, + "step": 4080 + }, + { + "epoch": 0.24, + "learning_rate": 4.064798250844763e-06, + "logits/chosen": -2.851682186126709, + "logits/rejected": -2.7545952796936035, + "logps/chosen": -97.78514099121094, + "logps/rejected": -847.607421875, + "loss": 0.0318, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19734561443328857, + "rewards/margins": 7.8329291343688965, + "rewards/rejected": -8.030275344848633, + "step": 4090 + }, + { + "epoch": 0.24, + "learning_rate": 4.0747366328761675e-06, + "logits/chosen": -2.888852596282959, + "logits/rejected": -2.79797625541687, + "logps/chosen": -108.75630187988281, + "logps/rejected": -886.4036865234375, + "loss": 0.0244, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3529484272003174, + "rewards/margins": 8.06701374053955, + "rewards/rejected": -8.419961929321289, + "step": 4100 + }, + { + "epoch": 0.25, + "learning_rate": 4.084675014907573e-06, + "logits/chosen": -2.8554725646972656, + "logits/rejected": -2.7662456035614014, + "logps/chosen": -98.52278137207031, + "logps/rejected": -867.1477661132812, + "loss": 0.0367, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.275216281414032, + "rewards/margins": 7.953598976135254, + "rewards/rejected": -8.228815078735352, + "step": 4110 + }, + { + "epoch": 0.25, + "learning_rate": 4.094613396938979e-06, + "logits/chosen": -2.8818554878234863, + "logits/rejected": -2.7957167625427246, + "logps/chosen": -87.67933654785156, + "logps/rejected": -914.9788818359375, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21106281876564026, + "rewards/margins": 8.486927032470703, + "rewards/rejected": -8.697988510131836, + "step": 4120 + }, + { + "epoch": 0.25, + "learning_rate": 4.104551778970384e-06, + "logits/chosen": -2.862668752670288, + "logits/rejected": -2.8188345432281494, + "logps/chosen": -105.35040283203125, + "logps/rejected": -805.2366333007812, + "loss": 0.0454, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.32017913460731506, + "rewards/margins": 7.28512716293335, + "rewards/rejected": -7.605307102203369, + "step": 4130 + }, + { + "epoch": 0.25, + "learning_rate": 4.114490161001789e-06, + "logits/chosen": -2.8726305961608887, + "logits/rejected": -2.76788592338562, + "logps/chosen": -105.2334976196289, + "logps/rejected": -913.54541015625, + "loss": 0.0339, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.29013746976852417, + "rewards/margins": 8.40369701385498, + "rewards/rejected": -8.69383430480957, + "step": 4140 + }, + { + "epoch": 0.25, + "learning_rate": 4.124428543033194e-06, + "logits/chosen": -2.8909621238708496, + "logits/rejected": -2.775150775909424, + "logps/chosen": -88.94282531738281, + "logps/rejected": -806.9899291992188, + "loss": 0.02, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2068389356136322, + "rewards/margins": 7.422381401062012, + "rewards/rejected": -7.629220485687256, + "step": 4150 + }, + { + "epoch": 0.25, + "learning_rate": 4.1343669250646e-06, + "logits/chosen": -2.9240756034851074, + "logits/rejected": -2.8449745178222656, + "logps/chosen": -77.8985824584961, + "logps/rejected": -781.2958984375, + "loss": 0.0245, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07662646472454071, + "rewards/margins": 7.296517372131348, + "rewards/rejected": -7.373143672943115, + "step": 4160 + }, + { + "epoch": 0.25, + "learning_rate": 4.1443053070960046e-06, + "logits/chosen": -2.9118590354919434, + "logits/rejected": -2.8079867362976074, + "logps/chosen": -127.09245300292969, + "logps/rejected": -929.02734375, + "loss": 0.0344, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5973612666130066, + "rewards/margins": 8.256863594055176, + "rewards/rejected": -8.854225158691406, + "step": 4170 + }, + { + "epoch": 0.25, + "learning_rate": 4.15424368912741e-06, + "logits/chosen": -2.8738856315612793, + "logits/rejected": -2.7924232482910156, + "logps/chosen": -163.99371337890625, + "logps/rejected": -882.0316162109375, + "loss": 0.0399, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.932905375957489, + "rewards/margins": 7.44718074798584, + "rewards/rejected": -8.380086898803711, + "step": 4180 + }, + { + "epoch": 0.25, + "learning_rate": 4.164182071158816e-06, + "logits/chosen": -2.876330852508545, + "logits/rejected": -2.804779529571533, + "logps/chosen": -110.51788330078125, + "logps/rejected": -998.640625, + "loss": 0.0241, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.43114566802978516, + "rewards/margins": 9.121639251708984, + "rewards/rejected": -9.552785873413086, + "step": 4190 + }, + { + "epoch": 0.25, + "learning_rate": 4.174120453190221e-06, + "logits/chosen": -2.882387638092041, + "logits/rejected": -2.8201606273651123, + "logps/chosen": -106.5680160522461, + "logps/rejected": -997.2721557617188, + "loss": 0.0184, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3588947355747223, + "rewards/margins": 9.157763481140137, + "rewards/rejected": -9.516657829284668, + "step": 4200 + }, + { + "epoch": 0.25, + "learning_rate": 4.184058835221626e-06, + "logits/chosen": -2.9162776470184326, + "logits/rejected": -2.774803638458252, + "logps/chosen": -96.96968078613281, + "logps/rejected": -944.7139892578125, + "loss": 0.0242, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22434909641742706, + "rewards/margins": 8.772921562194824, + "rewards/rejected": -8.997271537780762, + "step": 4210 + }, + { + "epoch": 0.25, + "learning_rate": 4.193997217253031e-06, + "logits/chosen": -2.879441499710083, + "logits/rejected": -2.7686612606048584, + "logps/chosen": -89.79487609863281, + "logps/rejected": -833.0220947265625, + "loss": 0.0783, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1868824064731598, + "rewards/margins": 7.699825286865234, + "rewards/rejected": -7.886707305908203, + "step": 4220 + }, + { + "epoch": 0.25, + "learning_rate": 4.203935599284437e-06, + "logits/chosen": -2.8855624198913574, + "logits/rejected": -2.792113780975342, + "logps/chosen": -83.40242767333984, + "logps/rejected": -897.0490112304688, + "loss": 0.0277, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14716050028800964, + "rewards/margins": 8.38559627532959, + "rewards/rejected": -8.532755851745605, + "step": 4230 + }, + { + "epoch": 0.25, + "learning_rate": 4.213873981315842e-06, + "logits/chosen": -2.88733172416687, + "logits/rejected": -2.8250551223754883, + "logps/chosen": -102.56843566894531, + "logps/rejected": -889.6915893554688, + "loss": 0.0216, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.33990198373794556, + "rewards/margins": 8.100381851196289, + "rewards/rejected": -8.440281867980957, + "step": 4240 + }, + { + "epoch": 0.25, + "learning_rate": 4.223812363347247e-06, + "logits/chosen": -2.87554931640625, + "logits/rejected": -2.766371726989746, + "logps/chosen": -162.18190002441406, + "logps/rejected": -862.4373168945312, + "loss": 0.0259, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8809512853622437, + "rewards/margins": 7.30731201171875, + "rewards/rejected": -8.188264846801758, + "step": 4250 + }, + { + "epoch": 0.25, + "learning_rate": 4.233750745378653e-06, + "logits/chosen": -2.8737339973449707, + "logits/rejected": -2.7623019218444824, + "logps/chosen": -155.3220672607422, + "logps/rejected": -878.88037109375, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8721407651901245, + "rewards/margins": 7.473598480224609, + "rewards/rejected": -8.345738410949707, + "step": 4260 + }, + { + "epoch": 0.25, + "learning_rate": 4.243689127410058e-06, + "logits/chosen": -2.8918302059173584, + "logits/rejected": -2.8027617931365967, + "logps/chosen": -102.66436767578125, + "logps/rejected": -856.4224853515625, + "loss": 0.0316, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3372226357460022, + "rewards/margins": 7.785907745361328, + "rewards/rejected": -8.123129844665527, + "step": 4270 + }, + { + "epoch": 0.26, + "learning_rate": 4.253627509441463e-06, + "logits/chosen": -2.877969264984131, + "logits/rejected": -2.790073871612549, + "logps/chosen": -119.9470443725586, + "logps/rejected": -845.1678466796875, + "loss": 0.0326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.45711955428123474, + "rewards/margins": 7.5487823486328125, + "rewards/rejected": -8.005901336669922, + "step": 4280 + }, + { + "epoch": 0.26, + "learning_rate": 4.263565891472868e-06, + "logits/chosen": -2.890664577484131, + "logits/rejected": -2.778040885925293, + "logps/chosen": -95.6941146850586, + "logps/rejected": -960.9384765625, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2568924129009247, + "rewards/margins": 8.904181480407715, + "rewards/rejected": -9.161072731018066, + "step": 4290 + }, + { + "epoch": 0.26, + "learning_rate": 4.273504273504274e-06, + "logits/chosen": -2.872647762298584, + "logits/rejected": -2.7867259979248047, + "logps/chosen": -73.24678802490234, + "logps/rejected": -930.8653564453125, + "loss": 0.0232, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.02836598828434944, + "rewards/margins": 8.846495628356934, + "rewards/rejected": -8.874862670898438, + "step": 4300 + }, + { + "epoch": 0.26, + "learning_rate": 4.283442655535679e-06, + "logits/chosen": -2.8743538856506348, + "logits/rejected": -2.8038012981414795, + "logps/chosen": -92.22464752197266, + "logps/rejected": -944.1321411132812, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23403319716453552, + "rewards/margins": 8.7694673538208, + "rewards/rejected": -9.003499984741211, + "step": 4310 + }, + { + "epoch": 0.26, + "learning_rate": 4.293381037567084e-06, + "logits/chosen": -2.8812294006347656, + "logits/rejected": -2.7842190265655518, + "logps/chosen": -108.43003845214844, + "logps/rejected": -871.1005859375, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3701080083847046, + "rewards/margins": 7.8617119789123535, + "rewards/rejected": -8.231820106506348, + "step": 4320 + }, + { + "epoch": 0.26, + "learning_rate": 4.30331941959849e-06, + "logits/chosen": -2.9160892963409424, + "logits/rejected": -2.788403034210205, + "logps/chosen": -93.42713928222656, + "logps/rejected": -979.8961181640625, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2382420301437378, + "rewards/margins": 9.114102363586426, + "rewards/rejected": -9.35234260559082, + "step": 4330 + }, + { + "epoch": 0.26, + "learning_rate": 4.313257801629895e-06, + "logits/chosen": -2.8456289768218994, + "logits/rejected": -2.7633216381073, + "logps/chosen": -89.84139251708984, + "logps/rejected": -892.3646240234375, + "loss": 0.0491, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21581485867500305, + "rewards/margins": 8.262726783752441, + "rewards/rejected": -8.478540420532227, + "step": 4340 + }, + { + "epoch": 0.26, + "learning_rate": 4.3231961836613e-06, + "logits/chosen": -2.8892834186553955, + "logits/rejected": -2.7718114852905273, + "logps/chosen": -110.25992584228516, + "logps/rejected": -860.5211791992188, + "loss": 0.0247, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.40792709589004517, + "rewards/margins": 7.756684303283691, + "rewards/rejected": -8.16461181640625, + "step": 4350 + }, + { + "epoch": 0.26, + "learning_rate": 4.333134565692705e-06, + "logits/chosen": -2.862164258956909, + "logits/rejected": -2.800736665725708, + "logps/chosen": -111.46165466308594, + "logps/rejected": -896.2294921875, + "loss": 0.0467, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4251784682273865, + "rewards/margins": 8.103158950805664, + "rewards/rejected": -8.528337478637695, + "step": 4360 + }, + { + "epoch": 0.26, + "learning_rate": 4.343072947724111e-06, + "logits/chosen": -2.858721971511841, + "logits/rejected": -2.7669737339019775, + "logps/chosen": -103.7558822631836, + "logps/rejected": -846.3435668945312, + "loss": 0.0326, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35419735312461853, + "rewards/margins": 7.673600673675537, + "rewards/rejected": -8.027798652648926, + "step": 4370 + }, + { + "epoch": 0.26, + "learning_rate": 4.353011329755516e-06, + "logits/chosen": -2.8532283306121826, + "logits/rejected": -2.7598159313201904, + "logps/chosen": -100.93025970458984, + "logps/rejected": -826.1634521484375, + "loss": 0.0339, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.331317275762558, + "rewards/margins": 7.482695579528809, + "rewards/rejected": -7.8140130043029785, + "step": 4380 + }, + { + "epoch": 0.26, + "learning_rate": 4.362949711786921e-06, + "logits/chosen": -2.8733901977539062, + "logits/rejected": -2.7785534858703613, + "logps/chosen": -76.26295471191406, + "logps/rejected": -860.4874267578125, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0420655682682991, + "rewards/margins": 8.130142211914062, + "rewards/rejected": -8.172208786010742, + "step": 4390 + }, + { + "epoch": 0.26, + "learning_rate": 4.372888093818327e-06, + "logits/chosen": -2.8437306880950928, + "logits/rejected": -2.755568265914917, + "logps/chosen": -91.65035247802734, + "logps/rejected": -944.2357177734375, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15216465294361115, + "rewards/margins": 8.835390090942383, + "rewards/rejected": -8.987553596496582, + "step": 4400 + }, + { + "epoch": 0.26, + "learning_rate": 4.382826475849732e-06, + "logits/chosen": -2.865375280380249, + "logits/rejected": -2.7993836402893066, + "logps/chosen": -137.43081665039062, + "logps/rejected": -836.2421875, + "loss": 0.0266, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6617446541786194, + "rewards/margins": 7.248546600341797, + "rewards/rejected": -7.9102911949157715, + "step": 4410 + }, + { + "epoch": 0.26, + "learning_rate": 4.3927648578811375e-06, + "logits/chosen": -2.8883824348449707, + "logits/rejected": -2.7975573539733887, + "logps/chosen": -205.75631713867188, + "logps/rejected": -946.5140380859375, + "loss": 0.0417, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.3911893367767334, + "rewards/margins": 7.620826721191406, + "rewards/rejected": -9.012015342712402, + "step": 4420 + }, + { + "epoch": 0.26, + "learning_rate": 4.402703239912542e-06, + "logits/chosen": -2.88316011428833, + "logits/rejected": -2.829528331756592, + "logps/chosen": -114.13917541503906, + "logps/rejected": -834.2014770507812, + "loss": 0.034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.40693727135658264, + "rewards/margins": 7.490669250488281, + "rewards/rejected": -7.89760684967041, + "step": 4430 + }, + { + "epoch": 0.26, + "learning_rate": 4.412641621943948e-06, + "logits/chosen": -2.8419644832611084, + "logits/rejected": -2.813293218612671, + "logps/chosen": -70.41014099121094, + "logps/rejected": -858.7379760742188, + "loss": 0.0437, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.0170558150857687, + "rewards/margins": 8.129571914672852, + "rewards/rejected": -8.146627426147461, + "step": 4440 + }, + { + "epoch": 0.27, + "learning_rate": 4.422580003975353e-06, + "logits/chosen": -2.89115834236145, + "logits/rejected": -2.789508581161499, + "logps/chosen": -69.30640411376953, + "logps/rejected": -822.9002075195312, + "loss": 0.0348, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00809780228883028, + "rewards/margins": 7.801293849945068, + "rewards/rejected": -7.809391975402832, + "step": 4450 + }, + { + "epoch": 0.27, + "learning_rate": 4.432518386006758e-06, + "logits/chosen": -2.8826160430908203, + "logits/rejected": -2.8245630264282227, + "logps/chosen": -86.43590545654297, + "logps/rejected": -932.1904296875, + "loss": 0.0164, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10299674421548843, + "rewards/margins": 8.775313377380371, + "rewards/rejected": -8.878310203552246, + "step": 4460 + }, + { + "epoch": 0.27, + "learning_rate": 4.442456768038164e-06, + "logits/chosen": -2.90889310836792, + "logits/rejected": -2.7720751762390137, + "logps/chosen": -92.70089721679688, + "logps/rejected": -882.6103515625, + "loss": 0.0293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20210126042366028, + "rewards/margins": 8.179990768432617, + "rewards/rejected": -8.38209342956543, + "step": 4470 + }, + { + "epoch": 0.27, + "learning_rate": 4.452395150069569e-06, + "logits/chosen": -2.880842685699463, + "logits/rejected": -2.776214122772217, + "logps/chosen": -80.7531967163086, + "logps/rejected": -978.7205810546875, + "loss": 0.0289, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11011705547571182, + "rewards/margins": 9.23988151550293, + "rewards/rejected": -9.349998474121094, + "step": 4480 + }, + { + "epoch": 0.27, + "learning_rate": 4.4623335321009745e-06, + "logits/chosen": -2.906372308731079, + "logits/rejected": -2.8285484313964844, + "logps/chosen": -73.73820495605469, + "logps/rejected": -855.4788208007812, + "loss": 0.0236, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.038873083889484406, + "rewards/margins": 8.083259582519531, + "rewards/rejected": -8.122132301330566, + "step": 4490 + }, + { + "epoch": 0.27, + "learning_rate": 4.472271914132379e-06, + "logits/chosen": -2.8901922702789307, + "logits/rejected": -2.8041534423828125, + "logps/chosen": -81.15570831298828, + "logps/rejected": -926.1275634765625, + "loss": 0.0292, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10167503356933594, + "rewards/margins": 8.710947036743164, + "rewards/rejected": -8.812623023986816, + "step": 4500 + }, + { + "epoch": 0.27, + "learning_rate": 4.482210296163785e-06, + "logits/chosen": -2.889254570007324, + "logits/rejected": -2.7758641242980957, + "logps/chosen": -97.00717163085938, + "logps/rejected": -824.8802490234375, + "loss": 0.0246, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.26803889870643616, + "rewards/margins": 7.5416107177734375, + "rewards/rejected": -7.809649467468262, + "step": 4510 + }, + { + "epoch": 0.27, + "learning_rate": 4.49214867819519e-06, + "logits/chosen": -2.878541946411133, + "logits/rejected": -2.795063018798828, + "logps/chosen": -100.54473114013672, + "logps/rejected": -943.3299560546875, + "loss": 0.0258, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2713661789894104, + "rewards/margins": 8.729387283325195, + "rewards/rejected": -9.000752449035645, + "step": 4520 + }, + { + "epoch": 0.27, + "learning_rate": 4.502087060226595e-06, + "logits/chosen": -2.8543143272399902, + "logits/rejected": -2.7880778312683105, + "logps/chosen": -104.0167465209961, + "logps/rejected": -911.3601684570312, + "loss": 0.0225, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31012827157974243, + "rewards/margins": 8.35793399810791, + "rewards/rejected": -8.668062210083008, + "step": 4530 + }, + { + "epoch": 0.27, + "learning_rate": 4.512025442258001e-06, + "logits/chosen": -2.8965344429016113, + "logits/rejected": -2.8222427368164062, + "logps/chosen": -100.06700134277344, + "logps/rejected": -927.0545654296875, + "loss": 0.0185, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3318369686603546, + "rewards/margins": 8.50373363494873, + "rewards/rejected": -8.8355712890625, + "step": 4540 + }, + { + "epoch": 0.27, + "learning_rate": 4.521963824289406e-06, + "logits/chosen": -2.898953437805176, + "logits/rejected": -2.8077282905578613, + "logps/chosen": -113.17926025390625, + "logps/rejected": -885.6700439453125, + "loss": 0.0277, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.46761026978492737, + "rewards/margins": 7.945561408996582, + "rewards/rejected": -8.41317081451416, + "step": 4550 + }, + { + "epoch": 0.27, + "learning_rate": 4.5319022063208115e-06, + "logits/chosen": -2.8784239292144775, + "logits/rejected": -2.812331199645996, + "logps/chosen": -104.33197021484375, + "logps/rejected": -854.6854248046875, + "loss": 0.0272, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2731979191303253, + "rewards/margins": 7.8292131423950195, + "rewards/rejected": -8.102411270141602, + "step": 4560 + }, + { + "epoch": 0.27, + "learning_rate": 4.541840588352216e-06, + "logits/chosen": -2.858334541320801, + "logits/rejected": -2.792424440383911, + "logps/chosen": -80.45789337158203, + "logps/rejected": -787.6712646484375, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08452922105789185, + "rewards/margins": 7.354048252105713, + "rewards/rejected": -7.438577175140381, + "step": 4570 + }, + { + "epoch": 0.27, + "learning_rate": 4.551778970383622e-06, + "logits/chosen": -2.8811147212982178, + "logits/rejected": -2.7914910316467285, + "logps/chosen": -71.01238250732422, + "logps/rejected": -936.9698486328125, + "loss": 0.0198, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026032889261841774, + "rewards/margins": 8.902639389038086, + "rewards/rejected": -8.92867374420166, + "step": 4580 + }, + { + "epoch": 0.27, + "learning_rate": 4.561717352415027e-06, + "logits/chosen": -2.8755991458892822, + "logits/rejected": -2.8072402477264404, + "logps/chosen": -90.09197235107422, + "logps/rejected": -807.59375, + "loss": 0.0685, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22098632156848907, + "rewards/margins": 7.407328128814697, + "rewards/rejected": -7.628314018249512, + "step": 4590 + }, + { + "epoch": 0.27, + "learning_rate": 4.5716557344464325e-06, + "logits/chosen": -2.925112247467041, + "logits/rejected": -2.8315765857696533, + "logps/chosen": -112.901123046875, + "logps/rejected": -930.5799560546875, + "loss": 0.0346, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.460366815328598, + "rewards/margins": 8.404306411743164, + "rewards/rejected": -8.864673614501953, + "step": 4600 + }, + { + "epoch": 0.27, + "learning_rate": 4.581594116477838e-06, + "logits/chosen": -2.853515863418579, + "logits/rejected": -2.7696750164031982, + "logps/chosen": -88.01664733886719, + "logps/rejected": -968.1888427734375, + "loss": 0.022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2102958858013153, + "rewards/margins": 9.036482810974121, + "rewards/rejected": -9.24677848815918, + "step": 4610 + }, + { + "epoch": 0.28, + "learning_rate": 4.591532498509243e-06, + "logits/chosen": -2.8714489936828613, + "logits/rejected": -2.8106112480163574, + "logps/chosen": -93.63993835449219, + "logps/rejected": -892.5885009765625, + "loss": 0.0255, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2017170637845993, + "rewards/margins": 8.28296184539795, + "rewards/rejected": -8.484679222106934, + "step": 4620 + }, + { + "epoch": 0.28, + "learning_rate": 4.6014708805406486e-06, + "logits/chosen": -2.882611036300659, + "logits/rejected": -2.824857711791992, + "logps/chosen": -70.57460021972656, + "logps/rejected": -833.49267578125, + "loss": 0.0291, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.021585632115602493, + "rewards/margins": 7.8662919998168945, + "rewards/rejected": -7.887876987457275, + "step": 4630 + }, + { + "epoch": 0.28, + "learning_rate": 4.611409262572053e-06, + "logits/chosen": -2.885829448699951, + "logits/rejected": -2.805079460144043, + "logps/chosen": -85.69660949707031, + "logps/rejected": -870.3895263671875, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14330899715423584, + "rewards/margins": 8.127403259277344, + "rewards/rejected": -8.270711898803711, + "step": 4640 + }, + { + "epoch": 0.28, + "learning_rate": 4.621347644603459e-06, + "logits/chosen": -2.881643533706665, + "logits/rejected": -2.812030792236328, + "logps/chosen": -147.7859649658203, + "logps/rejected": -935.3680419921875, + "loss": 0.0288, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7582032680511475, + "rewards/margins": 8.166765213012695, + "rewards/rejected": -8.924968719482422, + "step": 4650 + }, + { + "epoch": 0.28, + "learning_rate": 4.631286026634864e-06, + "logits/chosen": -2.837900161743164, + "logits/rejected": -2.7592737674713135, + "logps/chosen": -107.87874603271484, + "logps/rejected": -992.6653442382812, + "loss": 0.0272, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.36032983660697937, + "rewards/margins": 9.110422134399414, + "rewards/rejected": -9.47075366973877, + "step": 4660 + }, + { + "epoch": 0.28, + "learning_rate": 4.6412244086662695e-06, + "logits/chosen": -2.8753323554992676, + "logits/rejected": -2.7746171951293945, + "logps/chosen": -63.38336181640625, + "logps/rejected": -856.1290283203125, + "loss": 0.0409, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.031739939004182816, + "rewards/margins": 8.156184196472168, + "rewards/rejected": -8.124444961547852, + "step": 4670 + }, + { + "epoch": 0.28, + "learning_rate": 4.651162790697675e-06, + "logits/chosen": -2.834101438522339, + "logits/rejected": -2.752824306488037, + "logps/chosen": -84.80122375488281, + "logps/rejected": -989.6591796875, + "loss": 0.0233, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1308513581752777, + "rewards/margins": 9.330103874206543, + "rewards/rejected": -9.460953712463379, + "step": 4680 + }, + { + "epoch": 0.28, + "learning_rate": 4.66110117272908e-06, + "logits/chosen": -2.884183883666992, + "logits/rejected": -2.8069701194763184, + "logps/chosen": -115.38414001464844, + "logps/rejected": -983.2620849609375, + "loss": 0.0345, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.48282259702682495, + "rewards/margins": 8.918584823608398, + "rewards/rejected": -9.401407241821289, + "step": 4690 + }, + { + "epoch": 0.28, + "learning_rate": 4.671039554760486e-06, + "logits/chosen": -2.8920505046844482, + "logits/rejected": -2.786271095275879, + "logps/chosen": -133.25765991210938, + "logps/rejected": -844.6759643554688, + "loss": 0.031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6105977296829224, + "rewards/margins": 7.383652687072754, + "rewards/rejected": -7.994250297546387, + "step": 4700 + }, + { + "epoch": 0.28, + "learning_rate": 4.68097793679189e-06, + "logits/chosen": -2.859210729598999, + "logits/rejected": -2.744523048400879, + "logps/chosen": -86.75988006591797, + "logps/rejected": -839.8449096679688, + "loss": 0.0342, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13507045805454254, + "rewards/margins": 7.8170485496521, + "rewards/rejected": -7.95211935043335, + "step": 4710 + }, + { + "epoch": 0.28, + "learning_rate": 4.690916318823296e-06, + "logits/chosen": -2.9024012088775635, + "logits/rejected": -2.79534649848938, + "logps/chosen": -79.65396881103516, + "logps/rejected": -877.07861328125, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11196842044591904, + "rewards/margins": 8.202281951904297, + "rewards/rejected": -8.314249992370605, + "step": 4720 + }, + { + "epoch": 0.28, + "learning_rate": 4.700854700854701e-06, + "logits/chosen": -2.8984415531158447, + "logits/rejected": -2.812039852142334, + "logps/chosen": -104.2564697265625, + "logps/rejected": -779.1575927734375, + "loss": 0.0758, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.278194785118103, + "rewards/margins": 7.070542335510254, + "rewards/rejected": -7.348736763000488, + "step": 4730 + }, + { + "epoch": 0.28, + "learning_rate": 4.7107930828861065e-06, + "logits/chosen": -2.892812967300415, + "logits/rejected": -2.824160099029541, + "logps/chosen": -109.27354431152344, + "logps/rejected": -916.9251708984375, + "loss": 0.0285, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3620317280292511, + "rewards/margins": 8.361618995666504, + "rewards/rejected": -8.723649978637695, + "step": 4740 + }, + { + "epoch": 0.28, + "learning_rate": 4.720731464917512e-06, + "logits/chosen": -2.8768837451934814, + "logits/rejected": -2.773188352584839, + "logps/chosen": -91.87501525878906, + "logps/rejected": -935.03955078125, + "loss": 0.0248, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24119503796100616, + "rewards/margins": 8.671121597290039, + "rewards/rejected": -8.912317276000977, + "step": 4750 + }, + { + "epoch": 0.28, + "learning_rate": 4.730669846948917e-06, + "logits/chosen": -2.8862810134887695, + "logits/rejected": -2.7579824924468994, + "logps/chosen": -118.37330627441406, + "logps/rejected": -936.3952026367188, + "loss": 0.0247, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.45037394762039185, + "rewards/margins": 8.464803695678711, + "rewards/rejected": -8.915178298950195, + "step": 4760 + }, + { + "epoch": 0.28, + "learning_rate": 4.740608228980323e-06, + "logits/chosen": -2.84049654006958, + "logits/rejected": -2.7308342456817627, + "logps/chosen": -96.23958587646484, + "logps/rejected": -877.5916748046875, + "loss": 0.0167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3086794912815094, + "rewards/margins": 8.035611152648926, + "rewards/rejected": -8.344289779663086, + "step": 4770 + }, + { + "epoch": 0.29, + "learning_rate": 4.7505466110117275e-06, + "logits/chosen": -2.9016716480255127, + "logits/rejected": -2.798923969268799, + "logps/chosen": -89.29109191894531, + "logps/rejected": -1015.3558349609375, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15331611037254333, + "rewards/margins": 9.55330753326416, + "rewards/rejected": -9.706622123718262, + "step": 4780 + }, + { + "epoch": 0.29, + "learning_rate": 4.760484993043133e-06, + "logits/chosen": -2.896869659423828, + "logits/rejected": -2.766151189804077, + "logps/chosen": -126.7412109375, + "logps/rejected": -955.8396606445312, + "loss": 0.0198, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5335191488265991, + "rewards/margins": 8.58460807800293, + "rewards/rejected": -9.118128776550293, + "step": 4790 + }, + { + "epoch": 0.29, + "learning_rate": 4.770423375074538e-06, + "logits/chosen": -2.8885014057159424, + "logits/rejected": -2.786785840988159, + "logps/chosen": -111.9695053100586, + "logps/rejected": -973.0851440429688, + "loss": 0.0259, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.46662408113479614, + "rewards/margins": 8.824172973632812, + "rewards/rejected": -9.290796279907227, + "step": 4800 + }, + { + "epoch": 0.29, + "learning_rate": 4.780361757105944e-06, + "logits/chosen": -2.8892881870269775, + "logits/rejected": -2.7462844848632812, + "logps/chosen": -80.00074768066406, + "logps/rejected": -995.58984375, + "loss": 0.0184, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11459922790527344, + "rewards/margins": 9.393584251403809, + "rewards/rejected": -9.508184432983398, + "step": 4810 + }, + { + "epoch": 0.29, + "learning_rate": 4.790300139137349e-06, + "logits/chosen": -2.8596444129943848, + "logits/rejected": -2.777484178543091, + "logps/chosen": -76.84935760498047, + "logps/rejected": -926.91357421875, + "loss": 0.0177, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05449553579092026, + "rewards/margins": 8.780755996704102, + "rewards/rejected": -8.835251808166504, + "step": 4820 + }, + { + "epoch": 0.29, + "learning_rate": 4.800238521168754e-06, + "logits/chosen": -2.8894705772399902, + "logits/rejected": -2.7884392738342285, + "logps/chosen": -100.64851379394531, + "logps/rejected": -964.7586059570312, + "loss": 0.0302, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31541380286216736, + "rewards/margins": 8.883004188537598, + "rewards/rejected": -9.198417663574219, + "step": 4830 + }, + { + "epoch": 0.29, + "learning_rate": 4.81017690320016e-06, + "logits/chosen": -2.9025533199310303, + "logits/rejected": -2.7725658416748047, + "logps/chosen": -91.3306655883789, + "logps/rejected": -871.6735229492188, + "loss": 0.0348, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21291127800941467, + "rewards/margins": 8.08076286315918, + "rewards/rejected": -8.293675422668457, + "step": 4840 + }, + { + "epoch": 0.29, + "learning_rate": 4.8201152852315645e-06, + "logits/chosen": -2.8887951374053955, + "logits/rejected": -2.825882911682129, + "logps/chosen": -75.90834045410156, + "logps/rejected": -955.3463134765625, + "loss": 0.0247, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10548841953277588, + "rewards/margins": 9.011628150939941, + "rewards/rejected": -9.117116928100586, + "step": 4850 + }, + { + "epoch": 0.29, + "learning_rate": 4.83005366726297e-06, + "logits/chosen": -2.890552520751953, + "logits/rejected": -2.794327735900879, + "logps/chosen": -93.9808578491211, + "logps/rejected": -970.7510986328125, + "loss": 0.024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.26783376932144165, + "rewards/margins": 9.007509231567383, + "rewards/rejected": -9.27534294128418, + "step": 4860 + }, + { + "epoch": 0.29, + "learning_rate": 4.839992049294375e-06, + "logits/chosen": -2.878385066986084, + "logits/rejected": -2.786320209503174, + "logps/chosen": -90.45911407470703, + "logps/rejected": -1022.26318359375, + "loss": 0.0269, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15092377364635468, + "rewards/margins": 9.634449005126953, + "rewards/rejected": -9.785372734069824, + "step": 4870 + }, + { + "epoch": 0.29, + "learning_rate": 4.849930431325781e-06, + "logits/chosen": -2.890195369720459, + "logits/rejected": -2.798043966293335, + "logps/chosen": -117.10550689697266, + "logps/rejected": -998.5481567382812, + "loss": 0.0318, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4445526599884033, + "rewards/margins": 9.102670669555664, + "rewards/rejected": -9.547222137451172, + "step": 4880 + }, + { + "epoch": 0.29, + "learning_rate": 4.859868813357186e-06, + "logits/chosen": -2.824828624725342, + "logits/rejected": -2.7200264930725098, + "logps/chosen": -128.18399047851562, + "logps/rejected": -999.6248779296875, + "loss": 0.021, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5576019883155823, + "rewards/margins": 9.002005577087402, + "rewards/rejected": -9.559609413146973, + "step": 4890 + }, + { + "epoch": 0.29, + "learning_rate": 4.869807195388591e-06, + "logits/chosen": -2.925899028778076, + "logits/rejected": -2.8096795082092285, + "logps/chosen": -79.11145782470703, + "logps/rejected": -1017.8079833984375, + "loss": 0.0263, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.1356188952922821, + "rewards/margins": 9.601679801940918, + "rewards/rejected": -9.737298011779785, + "step": 4900 + }, + { + "epoch": 0.29, + "learning_rate": 4.879745577419997e-06, + "logits/chosen": -2.8300654888153076, + "logits/rejected": -2.7608776092529297, + "logps/chosen": -77.7479476928711, + "logps/rejected": -825.439453125, + "loss": 0.0371, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.06831531226634979, + "rewards/margins": 7.740384578704834, + "rewards/rejected": -7.808699607849121, + "step": 4910 + }, + { + "epoch": 0.29, + "learning_rate": 4.8896839594514015e-06, + "logits/chosen": -2.865025043487549, + "logits/rejected": -2.7727787494659424, + "logps/chosen": -74.79720306396484, + "logps/rejected": -976.9904174804688, + "loss": 0.0341, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.034891530871391296, + "rewards/margins": 9.367277145385742, + "rewards/rejected": -9.332387924194336, + "step": 4920 + }, + { + "epoch": 0.29, + "learning_rate": 4.899622341482807e-06, + "logits/chosen": -2.8914809226989746, + "logits/rejected": -2.8343589305877686, + "logps/chosen": -77.22371673583984, + "logps/rejected": -790.4819946289062, + "loss": 0.0333, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10589684545993805, + "rewards/margins": 7.359184265136719, + "rewards/rejected": -7.465081214904785, + "step": 4930 + }, + { + "epoch": 0.29, + "learning_rate": 4.909560723514212e-06, + "logits/chosen": -2.87862229347229, + "logits/rejected": -2.780078411102295, + "logps/chosen": -218.3362274169922, + "logps/rejected": -999.27685546875, + "loss": 0.0204, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4889423847198486, + "rewards/margins": 8.043344497680664, + "rewards/rejected": -9.532286643981934, + "step": 4940 + }, + { + "epoch": 0.3, + "learning_rate": 4.919499105545618e-06, + "logits/chosen": -2.9033210277557373, + "logits/rejected": -2.7883615493774414, + "logps/chosen": -160.2443389892578, + "logps/rejected": -965.7054443359375, + "loss": 0.0468, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9407378435134888, + "rewards/margins": 8.280309677124023, + "rewards/rejected": -9.221048355102539, + "step": 4950 + }, + { + "epoch": 0.3, + "learning_rate": 4.929437487577023e-06, + "logits/chosen": -2.905583620071411, + "logits/rejected": -2.808222770690918, + "logps/chosen": -83.41796875, + "logps/rejected": -851.6915893554688, + "loss": 0.0292, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09896432608366013, + "rewards/margins": 7.971621513366699, + "rewards/rejected": -8.070586204528809, + "step": 4960 + }, + { + "epoch": 0.3, + "learning_rate": 4.939375869608428e-06, + "logits/chosen": -2.8962228298187256, + "logits/rejected": -2.773911476135254, + "logps/chosen": -80.39439392089844, + "logps/rejected": -977.8939208984375, + "loss": 0.0299, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07613856345415115, + "rewards/margins": 9.259520530700684, + "rewards/rejected": -9.335659980773926, + "step": 4970 + }, + { + "epoch": 0.3, + "learning_rate": 4.949314251639834e-06, + "logits/chosen": -2.837639331817627, + "logits/rejected": -2.768453598022461, + "logps/chosen": -80.83184051513672, + "logps/rejected": -957.2828369140625, + "loss": 0.0184, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07547406107187271, + "rewards/margins": 9.05916976928711, + "rewards/rejected": -9.1346435546875, + "step": 4980 + }, + { + "epoch": 0.3, + "learning_rate": 4.959252633671239e-06, + "logits/chosen": -2.9022011756896973, + "logits/rejected": -2.8027169704437256, + "logps/chosen": -89.09010314941406, + "logps/rejected": -1003.8643798828125, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1606881320476532, + "rewards/margins": 9.442269325256348, + "rewards/rejected": -9.602956771850586, + "step": 4990 + }, + { + "epoch": 0.3, + "learning_rate": 4.969191015702644e-06, + "logits/chosen": -2.8835768699645996, + "logits/rejected": -2.7949702739715576, + "logps/chosen": -115.48515319824219, + "logps/rejected": -945.8638916015625, + "loss": 0.0285, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.46803441643714905, + "rewards/margins": 8.542829513549805, + "rewards/rejected": -9.0108642578125, + "step": 5000 + }, + { + "epoch": 0.3, + "learning_rate": 4.979129397734049e-06, + "logits/chosen": -2.8523144721984863, + "logits/rejected": -2.7823727130889893, + "logps/chosen": -95.86865997314453, + "logps/rejected": -1012.51123046875, + "loss": 0.0237, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22856438159942627, + "rewards/margins": 9.443161010742188, + "rewards/rejected": -9.67172622680664, + "step": 5010 + }, + { + "epoch": 0.3, + "learning_rate": 4.989067779765455e-06, + "logits/chosen": -2.8718934059143066, + "logits/rejected": -2.7778241634368896, + "logps/chosen": -129.14218139648438, + "logps/rejected": -1084.076416015625, + "loss": 0.0218, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5699118375778198, + "rewards/margins": 9.803573608398438, + "rewards/rejected": -10.37348461151123, + "step": 5020 + }, + { + "epoch": 0.3, + "learning_rate": 4.99900616179686e-06, + "logits/chosen": -2.8578925132751465, + "logits/rejected": -2.781005382537842, + "logps/chosen": -138.49337768554688, + "logps/rejected": -1076.350830078125, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6795462369918823, + "rewards/margins": 9.643610000610352, + "rewards/rejected": -10.323156356811523, + "step": 5030 + }, + { + "epoch": 0.3, + "learning_rate": 4.99999951258251e-06, + "logits/chosen": -2.844679117202759, + "logits/rejected": -2.769289493560791, + "logps/chosen": -107.89360046386719, + "logps/rejected": -1143.4736328125, + "loss": 0.0209, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3335316777229309, + "rewards/margins": 10.659429550170898, + "rewards/rejected": -10.992960929870605, + "step": 5040 + }, + { + "epoch": 0.3, + "learning_rate": 4.999997827682785e-06, + "logits/chosen": -2.8786590099334717, + "logits/rejected": -2.7779812812805176, + "logps/chosen": -105.56673431396484, + "logps/rejected": -1068.960205078125, + "loss": 0.0214, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3662894368171692, + "rewards/margins": 9.879122734069824, + "rewards/rejected": -10.245412826538086, + "step": 5050 + }, + { + "epoch": 0.3, + "learning_rate": 4.999994939284139e-06, + "logits/chosen": -2.87300443649292, + "logits/rejected": -2.7826895713806152, + "logps/chosen": -97.47990417480469, + "logps/rejected": -957.1144409179688, + "loss": 0.0186, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2853287160396576, + "rewards/margins": 8.841194152832031, + "rewards/rejected": -9.126523971557617, + "step": 5060 + }, + { + "epoch": 0.3, + "learning_rate": 4.9999908473879605e-06, + "logits/chosen": -2.883284568786621, + "logits/rejected": -2.778878927230835, + "logps/chosen": -90.03202056884766, + "logps/rejected": -869.6712036132812, + "loss": 0.0282, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24330243468284607, + "rewards/margins": 8.021967887878418, + "rewards/rejected": -8.26526927947998, + "step": 5070 + }, + { + "epoch": 0.3, + "learning_rate": 4.99998555199622e-06, + "logits/chosen": -2.8607161045074463, + "logits/rejected": -2.795592784881592, + "logps/chosen": -83.33605194091797, + "logps/rejected": -1037.9112548828125, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11084593832492828, + "rewards/margins": 9.811139106750488, + "rewards/rejected": -9.921984672546387, + "step": 5080 + }, + { + "epoch": 0.3, + "learning_rate": 4.999979053111467e-06, + "logits/chosen": -2.850515842437744, + "logits/rejected": -2.74926495552063, + "logps/chosen": -88.1969985961914, + "logps/rejected": -839.1744384765625, + "loss": 0.0294, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17902560532093048, + "rewards/margins": 7.7577056884765625, + "rewards/rejected": -7.936731815338135, + "step": 5090 + }, + { + "epoch": 0.3, + "learning_rate": 4.999971350736829e-06, + "logits/chosen": -2.9066543579101562, + "logits/rejected": -2.8119022846221924, + "logps/chosen": -91.59111022949219, + "logps/rejected": -934.1257934570312, + "loss": 0.0212, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2175445556640625, + "rewards/margins": 8.670334815979004, + "rewards/rejected": -8.887880325317383, + "step": 5100 + }, + { + "epoch": 0.3, + "learning_rate": 4.999962444876015e-06, + "logits/chosen": -2.844407320022583, + "logits/rejected": -2.7663371562957764, + "logps/chosen": -85.95719146728516, + "logps/rejected": -851.6063232421875, + "loss": 0.0262, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1889323890209198, + "rewards/margins": 7.8885908126831055, + "rewards/rejected": -8.077524185180664, + "step": 5110 + }, + { + "epoch": 0.31, + "learning_rate": 4.999952335533311e-06, + "logits/chosen": -2.895577907562256, + "logits/rejected": -2.7880630493164062, + "logps/chosen": -94.84596252441406, + "logps/rejected": -943.1453857421875, + "loss": 0.0173, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.25333893299102783, + "rewards/margins": 8.73223876953125, + "rewards/rejected": -8.985578536987305, + "step": 5120 + }, + { + "epoch": 0.31, + "learning_rate": 4.999941022713586e-06, + "logits/chosen": -2.837047815322876, + "logits/rejected": -2.763660430908203, + "logps/chosen": -73.74784088134766, + "logps/rejected": -872.5505981445312, + "loss": 0.0227, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.050895608961582184, + "rewards/margins": 8.234648704528809, + "rewards/rejected": -8.285544395446777, + "step": 5130 + }, + { + "epoch": 0.31, + "learning_rate": 4.999928506422284e-06, + "logits/chosen": -2.8468403816223145, + "logits/rejected": -2.7657713890075684, + "logps/chosen": -84.5938720703125, + "logps/rejected": -986.33740234375, + "loss": 0.0301, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10678932815790176, + "rewards/margins": 9.338483810424805, + "rewards/rejected": -9.445272445678711, + "step": 5140 + }, + { + "epoch": 0.31, + "learning_rate": 4.999914786665431e-06, + "logits/chosen": -2.848414182662964, + "logits/rejected": -2.748990058898926, + "logps/chosen": -101.0417709350586, + "logps/rejected": -940.8655395507812, + "loss": 0.0524, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.2576334774494171, + "rewards/margins": 8.705947875976562, + "rewards/rejected": -8.963581085205078, + "step": 5150 + }, + { + "epoch": 0.31, + "learning_rate": 4.999899863449631e-06, + "logits/chosen": -2.8816328048706055, + "logits/rejected": -2.7506706714630127, + "logps/chosen": -104.9185562133789, + "logps/rejected": -1057.6444091796875, + "loss": 0.0224, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.33373159170150757, + "rewards/margins": 9.791979789733887, + "rewards/rejected": -10.125711441040039, + "step": 5160 + }, + { + "epoch": 0.31, + "learning_rate": 4.999883736782069e-06, + "logits/chosen": -2.8945436477661133, + "logits/rejected": -2.812131404876709, + "logps/chosen": -90.57582092285156, + "logps/rejected": -919.4510498046875, + "loss": 0.025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1633177101612091, + "rewards/margins": 8.592384338378906, + "rewards/rejected": -8.755702018737793, + "step": 5170 + }, + { + "epoch": 0.31, + "learning_rate": 4.999866406670508e-06, + "logits/chosen": -2.8717596530914307, + "logits/rejected": -2.7919280529022217, + "logps/chosen": -88.7566909790039, + "logps/rejected": -954.9832153320312, + "loss": 0.037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1499759554862976, + "rewards/margins": 8.968339920043945, + "rewards/rejected": -9.118316650390625, + "step": 5180 + }, + { + "epoch": 0.31, + "learning_rate": 4.999847873123291e-06, + "logits/chosen": -2.895843982696533, + "logits/rejected": -2.8216373920440674, + "logps/chosen": -84.7372817993164, + "logps/rejected": -853.59375, + "loss": 0.0519, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.11497324705123901, + "rewards/margins": 7.979830741882324, + "rewards/rejected": -8.094803810119629, + "step": 5190 + }, + { + "epoch": 0.31, + "learning_rate": 4.999828136149339e-06, + "logits/chosen": -2.839906692504883, + "logits/rejected": -2.7815299034118652, + "logps/chosen": -81.22260284423828, + "logps/rejected": -929.52783203125, + "loss": 0.0175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05136825516819954, + "rewards/margins": 8.79706859588623, + "rewards/rejected": -8.84843635559082, + "step": 5200 + }, + { + "epoch": 0.31, + "learning_rate": 4.999807195758155e-06, + "logits/chosen": -2.8689942359924316, + "logits/rejected": -2.7501468658447266, + "logps/chosen": -106.19752502441406, + "logps/rejected": -971.22607421875, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.37237533926963806, + "rewards/margins": 8.90121078491211, + "rewards/rejected": -9.27358627319336, + "step": 5210 + }, + { + "epoch": 0.31, + "learning_rate": 4.999785051959819e-06, + "logits/chosen": -2.835212230682373, + "logits/rejected": -2.7451722621917725, + "logps/chosen": -98.01939392089844, + "logps/rejected": -882.3121948242188, + "loss": 0.0296, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2849571406841278, + "rewards/margins": 8.103899002075195, + "rewards/rejected": -8.38885498046875, + "step": 5220 + }, + { + "epoch": 0.31, + "learning_rate": 4.99976170476499e-06, + "logits/chosen": -2.826137065887451, + "logits/rejected": -2.7194154262542725, + "logps/chosen": -112.0563735961914, + "logps/rejected": -930.9816284179688, + "loss": 0.0256, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.40944138169288635, + "rewards/margins": 8.454317092895508, + "rewards/rejected": -8.863758087158203, + "step": 5230 + }, + { + "epoch": 0.31, + "learning_rate": 4.999737154184909e-06, + "logits/chosen": -2.80683970451355, + "logits/rejected": -2.689065933227539, + "logps/chosen": -150.18272399902344, + "logps/rejected": -955.5558471679688, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8011050224304199, + "rewards/margins": 8.305306434631348, + "rewards/rejected": -9.10641098022461, + "step": 5240 + }, + { + "epoch": 0.31, + "learning_rate": 4.999711400231393e-06, + "logits/chosen": -2.865556240081787, + "logits/rejected": -2.7775168418884277, + "logps/chosen": -102.52781677246094, + "logps/rejected": -900.8796997070312, + "loss": 0.04, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3759949505329132, + "rewards/margins": 8.194425582885742, + "rewards/rejected": -8.57042121887207, + "step": 5250 + }, + { + "epoch": 0.31, + "learning_rate": 4.999684442916841e-06, + "logits/chosen": -2.890749931335449, + "logits/rejected": -2.8019461631774902, + "logps/chosen": -87.59014892578125, + "logps/rejected": -965.03759765625, + "loss": 0.0261, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17856602370738983, + "rewards/margins": 9.037381172180176, + "rewards/rejected": -9.215948104858398, + "step": 5260 + }, + { + "epoch": 0.31, + "learning_rate": 4.99965628225423e-06, + "logits/chosen": -2.8734028339385986, + "logits/rejected": -2.770862102508545, + "logps/chosen": -111.6399154663086, + "logps/rejected": -862.978515625, + "loss": 0.0314, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4244050979614258, + "rewards/margins": 7.760258674621582, + "rewards/rejected": -8.184662818908691, + "step": 5270 + }, + { + "epoch": 0.31, + "learning_rate": 4.999626918257117e-06, + "logits/chosen": -2.8641085624694824, + "logits/rejected": -2.73319935798645, + "logps/chosen": -137.85464477539062, + "logps/rejected": -930.4456176757812, + "loss": 0.0312, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5954976081848145, + "rewards/margins": 8.267059326171875, + "rewards/rejected": -8.862558364868164, + "step": 5280 + }, + { + "epoch": 0.32, + "learning_rate": 4.999596350939637e-06, + "logits/chosen": -2.8772735595703125, + "logits/rejected": -2.7569937705993652, + "logps/chosen": -113.51438903808594, + "logps/rejected": -919.8497314453125, + "loss": 0.036, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.41572675108909607, + "rewards/margins": 8.3411226272583, + "rewards/rejected": -8.75684928894043, + "step": 5290 + }, + { + "epoch": 0.32, + "learning_rate": 4.999564580316506e-06, + "logits/chosen": -2.866985559463501, + "logits/rejected": -2.770427942276001, + "logps/chosen": -95.52568054199219, + "logps/rejected": -974.2899169921875, + "loss": 0.0352, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2805863320827484, + "rewards/margins": 9.021206855773926, + "rewards/rejected": -9.30179214477539, + "step": 5300 + }, + { + "epoch": 0.32, + "learning_rate": 4.999531606403018e-06, + "logits/chosen": -2.8598344326019287, + "logits/rejected": -2.8107810020446777, + "logps/chosen": -83.39866638183594, + "logps/rejected": -968.8853759765625, + "loss": 0.0448, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -0.14168737828731537, + "rewards/margins": 9.10383415222168, + "rewards/rejected": -9.245522499084473, + "step": 5310 + }, + { + "epoch": 0.32, + "learning_rate": 4.9994974292150465e-06, + "logits/chosen": -2.8701887130737305, + "logits/rejected": -2.764065742492676, + "logps/chosen": -78.17631530761719, + "logps/rejected": -926.0496215820312, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12099412828683853, + "rewards/margins": 8.697303771972656, + "rewards/rejected": -8.818297386169434, + "step": 5320 + }, + { + "epoch": 0.32, + "learning_rate": 4.999462048769044e-06, + "logits/chosen": -2.8630313873291016, + "logits/rejected": -2.778280258178711, + "logps/chosen": -97.44795227050781, + "logps/rejected": -951.5828247070312, + "loss": 0.0427, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.26364535093307495, + "rewards/margins": 8.811906814575195, + "rewards/rejected": -9.075552940368652, + "step": 5330 + }, + { + "epoch": 0.32, + "learning_rate": 4.999425465082043e-06, + "logits/chosen": -2.8799846172332764, + "logits/rejected": -2.8134191036224365, + "logps/chosen": -92.31000518798828, + "logps/rejected": -1022.00830078125, + "loss": 0.0225, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18813441693782806, + "rewards/margins": 9.596410751342773, + "rewards/rejected": -9.784546852111816, + "step": 5340 + }, + { + "epoch": 0.32, + "learning_rate": 4.999387678171656e-06, + "logits/chosen": -2.8204174041748047, + "logits/rejected": -2.7163872718811035, + "logps/chosen": -82.2776107788086, + "logps/rejected": -994.0906372070312, + "loss": 0.0241, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13918820023536682, + "rewards/margins": 9.370569229125977, + "rewards/rejected": -9.509757995605469, + "step": 5350 + }, + { + "epoch": 0.32, + "learning_rate": 4.999348688056071e-06, + "logits/chosen": -2.879420518875122, + "logits/rejected": -2.796870708465576, + "logps/chosen": -79.39065551757812, + "logps/rejected": -939.0748291015625, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1394973248243332, + "rewards/margins": 8.805173873901367, + "rewards/rejected": -8.944669723510742, + "step": 5360 + }, + { + "epoch": 0.32, + "learning_rate": 4.99930849475406e-06, + "logits/chosen": -2.864455461502075, + "logits/rejected": -2.7709949016571045, + "logps/chosen": -77.62438201904297, + "logps/rejected": -990.2431640625, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05697641521692276, + "rewards/margins": 9.414332389831543, + "rewards/rejected": -9.47131061553955, + "step": 5370 + }, + { + "epoch": 0.32, + "learning_rate": 4.999267098284972e-06, + "logits/chosen": -2.8857221603393555, + "logits/rejected": -2.765843152999878, + "logps/chosen": -81.39768981933594, + "logps/rejected": -983.1765747070312, + "loss": 0.0175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17387275397777557, + "rewards/margins": 9.218256950378418, + "rewards/rejected": -9.392129898071289, + "step": 5380 + }, + { + "epoch": 0.32, + "learning_rate": 4.999224498668735e-06, + "logits/chosen": -2.8702988624572754, + "logits/rejected": -2.8268063068389893, + "logps/chosen": -99.5493392944336, + "logps/rejected": -890.630859375, + "loss": 0.0179, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2623685896396637, + "rewards/margins": 8.206276893615723, + "rewards/rejected": -8.468645095825195, + "step": 5390 + }, + { + "epoch": 0.32, + "learning_rate": 4.999180695925856e-06, + "logits/chosen": -2.874793529510498, + "logits/rejected": -2.773021936416626, + "logps/chosen": -124.54240417480469, + "logps/rejected": -1040.364013671875, + "loss": 0.013, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5596454739570618, + "rewards/margins": 9.382108688354492, + "rewards/rejected": -9.941754341125488, + "step": 5400 + }, + { + "epoch": 0.32, + "learning_rate": 4.999135690077421e-06, + "logits/chosen": -2.897712230682373, + "logits/rejected": -2.7775044441223145, + "logps/chosen": -102.29985046386719, + "logps/rejected": -930.5198364257812, + "loss": 0.0419, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.29252517223358154, + "rewards/margins": 8.575323104858398, + "rewards/rejected": -8.867849349975586, + "step": 5410 + }, + { + "epoch": 0.32, + "learning_rate": 4.999089481145097e-06, + "logits/chosen": -2.8792929649353027, + "logits/rejected": -2.7747395038604736, + "logps/chosen": -83.71781921386719, + "logps/rejected": -879.244140625, + "loss": 0.0295, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15181627869606018, + "rewards/margins": 8.197938919067383, + "rewards/rejected": -8.349756240844727, + "step": 5420 + }, + { + "epoch": 0.32, + "learning_rate": 4.999042069151129e-06, + "logits/chosen": -2.8629391193389893, + "logits/rejected": -2.7894959449768066, + "logps/chosen": -104.77567291259766, + "logps/rejected": -976.9168701171875, + "loss": 0.0224, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.30423110723495483, + "rewards/margins": 9.027546882629395, + "rewards/rejected": -9.331778526306152, + "step": 5430 + }, + { + "epoch": 0.32, + "learning_rate": 4.998993454118341e-06, + "logits/chosen": -2.890188694000244, + "logits/rejected": -2.761462450027466, + "logps/chosen": -104.02595520019531, + "logps/rejected": -1062.713623046875, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2883433401584625, + "rewards/margins": 9.869985580444336, + "rewards/rejected": -10.158327102661133, + "step": 5440 + }, + { + "epoch": 0.32, + "learning_rate": 4.998943636070136e-06, + "logits/chosen": -2.8396804332733154, + "logits/rejected": -2.7484991550445557, + "logps/chosen": -114.13033294677734, + "logps/rejected": -1081.19091796875, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.46671152114868164, + "rewards/margins": 9.885077476501465, + "rewards/rejected": -10.351789474487305, + "step": 5450 + }, + { + "epoch": 0.33, + "learning_rate": 4.998892615030496e-06, + "logits/chosen": -2.865346908569336, + "logits/rejected": -2.7305164337158203, + "logps/chosen": -114.74278259277344, + "logps/rejected": -1059.751708984375, + "loss": 0.0179, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4478020668029785, + "rewards/margins": 9.709622383117676, + "rewards/rejected": -10.15742301940918, + "step": 5460 + }, + { + "epoch": 0.33, + "learning_rate": 4.998840391023984e-06, + "logits/chosen": -2.841677188873291, + "logits/rejected": -2.7697014808654785, + "logps/chosen": -92.29898834228516, + "logps/rejected": -1121.646484375, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24512723088264465, + "rewards/margins": 10.528948783874512, + "rewards/rejected": -10.77407455444336, + "step": 5470 + }, + { + "epoch": 0.33, + "learning_rate": 4.998786964075738e-06, + "logits/chosen": -2.8600053787231445, + "logits/rejected": -2.7996859550476074, + "logps/chosen": -111.3618392944336, + "logps/rejected": -996.56591796875, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.36930590867996216, + "rewards/margins": 9.141902923583984, + "rewards/rejected": -9.511207580566406, + "step": 5480 + }, + { + "epoch": 0.33, + "learning_rate": 4.99873233421148e-06, + "logits/chosen": -2.8878750801086426, + "logits/rejected": -2.8203814029693604, + "logps/chosen": -87.01348876953125, + "logps/rejected": -1019.2249755859375, + "loss": 0.0242, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1202097088098526, + "rewards/margins": 9.635150909423828, + "rewards/rejected": -9.755361557006836, + "step": 5490 + }, + { + "epoch": 0.33, + "learning_rate": 4.998676501457508e-06, + "logits/chosen": -2.8824493885040283, + "logits/rejected": -2.776831865310669, + "logps/chosen": -102.32815551757812, + "logps/rejected": -971.8756713867188, + "loss": 0.0142, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2946009039878845, + "rewards/margins": 8.982550621032715, + "rewards/rejected": -9.277151107788086, + "step": 5500 + }, + { + "epoch": 0.33, + "learning_rate": 4.9986194658407e-06, + "logits/chosen": -2.86017107963562, + "logits/rejected": -2.762253761291504, + "logps/chosen": -100.86671447753906, + "logps/rejected": -1139.77734375, + "loss": 0.0257, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.25885266065597534, + "rewards/margins": 10.693548202514648, + "rewards/rejected": -10.952400207519531, + "step": 5510 + }, + { + "epoch": 0.33, + "learning_rate": 4.9985612273885145e-06, + "logits/chosen": -2.8773651123046875, + "logits/rejected": -2.7898504734039307, + "logps/chosen": -92.82090759277344, + "logps/rejected": -1087.814208984375, + "loss": 0.0335, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22663505375385284, + "rewards/margins": 10.19865608215332, + "rewards/rejected": -10.425291061401367, + "step": 5520 + }, + { + "epoch": 0.33, + "learning_rate": 4.9985017861289845e-06, + "logits/chosen": -2.8792757987976074, + "logits/rejected": -2.7841873168945312, + "logps/chosen": -89.27098083496094, + "logps/rejected": -976.3346557617188, + "loss": 0.022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1920856237411499, + "rewards/margins": 9.124589920043945, + "rewards/rejected": -9.316675186157227, + "step": 5530 + }, + { + "epoch": 0.33, + "learning_rate": 4.998441142090727e-06, + "logits/chosen": -2.875072956085205, + "logits/rejected": -2.7747702598571777, + "logps/chosen": -97.55594635009766, + "logps/rejected": -1183.7984619140625, + "loss": 0.0223, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.302950382232666, + "rewards/margins": 11.079718589782715, + "rewards/rejected": -11.382669448852539, + "step": 5540 + }, + { + "epoch": 0.33, + "learning_rate": 4.998379295302936e-06, + "logits/chosen": -2.886603832244873, + "logits/rejected": -2.7976067066192627, + "logps/chosen": -99.26834869384766, + "logps/rejected": -990.5234375, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2729679048061371, + "rewards/margins": 9.195785522460938, + "rewards/rejected": -9.468753814697266, + "step": 5550 + }, + { + "epoch": 0.33, + "learning_rate": 4.998316245795383e-06, + "logits/chosen": -2.8958377838134766, + "logits/rejected": -2.778620481491089, + "logps/chosen": -118.92022705078125, + "logps/rejected": -1009.7033081054688, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4165474772453308, + "rewards/margins": 9.221906661987305, + "rewards/rejected": -9.63845443725586, + "step": 5560 + }, + { + "epoch": 0.33, + "learning_rate": 4.998251993598422e-06, + "logits/chosen": -2.8409457206726074, + "logits/rejected": -2.7163748741149902, + "logps/chosen": -102.20428466796875, + "logps/rejected": -1054.566162109375, + "loss": 0.0213, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2958456873893738, + "rewards/margins": 9.807398796081543, + "rewards/rejected": -10.10324478149414, + "step": 5570 + }, + { + "epoch": 0.33, + "learning_rate": 4.9981865387429825e-06, + "logits/chosen": -2.9050748348236084, + "logits/rejected": -2.7730095386505127, + "logps/chosen": -91.78691864013672, + "logps/rejected": -1064.236572265625, + "loss": 0.0168, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2547686696052551, + "rewards/margins": 9.934346199035645, + "rewards/rejected": -10.189114570617676, + "step": 5580 + }, + { + "epoch": 0.33, + "learning_rate": 4.998119881260576e-06, + "logits/chosen": -2.885603904724121, + "logits/rejected": -2.7789297103881836, + "logps/chosen": -120.5267333984375, + "logps/rejected": -1057.04345703125, + "loss": 0.0302, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.4975496232509613, + "rewards/margins": 9.629546165466309, + "rewards/rejected": -10.127095222473145, + "step": 5590 + }, + { + "epoch": 0.33, + "learning_rate": 4.9980520211832894e-06, + "logits/chosen": -2.8508429527282715, + "logits/rejected": -2.7570393085479736, + "logps/chosen": -86.61888122558594, + "logps/rejected": -883.5057373046875, + "loss": 0.0228, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15450075268745422, + "rewards/margins": 8.23705005645752, + "rewards/rejected": -8.39155101776123, + "step": 5600 + }, + { + "epoch": 0.33, + "learning_rate": 4.997982958543792e-06, + "logits/chosen": -2.8349039554595947, + "logits/rejected": -2.7911713123321533, + "logps/chosen": -81.1885757446289, + "logps/rejected": -992.8806762695312, + "loss": 0.0179, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11003895103931427, + "rewards/margins": 9.381539344787598, + "rewards/rejected": -9.491578102111816, + "step": 5610 + }, + { + "epoch": 0.34, + "learning_rate": 4.997912693375331e-06, + "logits/chosen": -2.874218463897705, + "logits/rejected": -2.804551124572754, + "logps/chosen": -88.09857940673828, + "logps/rejected": -748.6707763671875, + "loss": 0.0833, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.2331649363040924, + "rewards/margins": 6.8149733543396, + "rewards/rejected": -7.0481367111206055, + "step": 5620 + }, + { + "epoch": 0.34, + "learning_rate": 4.99784122571173e-06, + "logits/chosen": -2.8568549156188965, + "logits/rejected": -2.7542309761047363, + "logps/chosen": -87.94371032714844, + "logps/rejected": -835.7515869140625, + "loss": 0.0252, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14200739562511444, + "rewards/margins": 7.787380218505859, + "rewards/rejected": -7.929388523101807, + "step": 5630 + }, + { + "epoch": 0.34, + "learning_rate": 4.9977685555873955e-06, + "logits/chosen": -2.837651491165161, + "logits/rejected": -2.731248617172241, + "logps/chosen": -90.04285430908203, + "logps/rejected": -993.4127807617188, + "loss": 0.0086, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.155365988612175, + "rewards/margins": 9.33078670501709, + "rewards/rejected": -9.486152648925781, + "step": 5640 + }, + { + "epoch": 0.34, + "learning_rate": 4.997694683037311e-06, + "logits/chosen": -2.8825695514678955, + "logits/rejected": -2.735319137573242, + "logps/chosen": -109.47664642333984, + "logps/rejected": -1026.1048583984375, + "loss": 0.0197, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.40055546164512634, + "rewards/margins": 9.404574394226074, + "rewards/rejected": -9.805131912231445, + "step": 5650 + }, + { + "epoch": 0.34, + "learning_rate": 4.997619608097036e-06, + "logits/chosen": -2.868393898010254, + "logits/rejected": -2.7323031425476074, + "logps/chosen": -102.20716857910156, + "logps/rejected": -976.0416259765625, + "loss": 0.0183, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31122761964797974, + "rewards/margins": 9.00806713104248, + "rewards/rejected": -9.319294929504395, + "step": 5660 + }, + { + "epoch": 0.34, + "learning_rate": 4.997543330802716e-06, + "logits/chosen": -2.8772130012512207, + "logits/rejected": -2.784759759902954, + "logps/chosen": -103.09321594238281, + "logps/rejected": -1026.4012451171875, + "loss": 0.0217, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35508057475090027, + "rewards/margins": 9.46030330657959, + "rewards/rejected": -9.815382957458496, + "step": 5670 + }, + { + "epoch": 0.34, + "learning_rate": 4.997465851191066e-06, + "logits/chosen": -2.891869068145752, + "logits/rejected": -2.75433611869812, + "logps/chosen": -106.99009704589844, + "logps/rejected": -914.7220458984375, + "loss": 0.0209, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2909832000732422, + "rewards/margins": 8.421646118164062, + "rewards/rejected": -8.712629318237305, + "step": 5680 + }, + { + "epoch": 0.34, + "learning_rate": 4.997387169299388e-06, + "logits/chosen": -2.891097068786621, + "logits/rejected": -2.7934207916259766, + "logps/chosen": -90.09648895263672, + "logps/rejected": -882.1295166015625, + "loss": 0.0293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21893629431724548, + "rewards/margins": 8.177952766418457, + "rewards/rejected": -8.396888732910156, + "step": 5690 + }, + { + "epoch": 0.34, + "learning_rate": 4.997307285165559e-06, + "logits/chosen": -2.8204102516174316, + "logits/rejected": -2.7428536415100098, + "logps/chosen": -94.22312927246094, + "logps/rejected": -916.5993041992188, + "loss": 0.0229, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22160491347312927, + "rewards/margins": 8.498708724975586, + "rewards/rejected": -8.720314025878906, + "step": 5700 + }, + { + "epoch": 0.34, + "learning_rate": 4.9972261988280346e-06, + "logits/chosen": -2.8782856464385986, + "logits/rejected": -2.7779300212860107, + "logps/chosen": -93.50756072998047, + "logps/rejected": -1092.0567626953125, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18164996802806854, + "rewards/margins": 10.287232398986816, + "rewards/rejected": -10.468881607055664, + "step": 5710 + }, + { + "epoch": 0.34, + "learning_rate": 4.997143910325849e-06, + "logits/chosen": -2.8698322772979736, + "logits/rejected": -2.7700772285461426, + "logps/chosen": -86.90312957763672, + "logps/rejected": -999.9410400390625, + "loss": 0.0221, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20222774147987366, + "rewards/margins": 9.347708702087402, + "rewards/rejected": -9.549936294555664, + "step": 5720 + }, + { + "epoch": 0.34, + "learning_rate": 4.997060419698618e-06, + "logits/chosen": -2.866276979446411, + "logits/rejected": -2.7743258476257324, + "logps/chosen": -116.61909484863281, + "logps/rejected": -1000.4385986328125, + "loss": 0.0278, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.45243334770202637, + "rewards/margins": 9.117535591125488, + "rewards/rejected": -9.569969177246094, + "step": 5730 + }, + { + "epoch": 0.34, + "learning_rate": 4.9969757269865325e-06, + "logits/chosen": -2.8526480197906494, + "logits/rejected": -2.7103397846221924, + "logps/chosen": -98.03345489501953, + "logps/rejected": -1041.865478515625, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2868651747703552, + "rewards/margins": 9.693255424499512, + "rewards/rejected": -9.980120658874512, + "step": 5740 + }, + { + "epoch": 0.34, + "learning_rate": 4.996889832230364e-06, + "logits/chosen": -2.8486738204956055, + "logits/rejected": -2.731071949005127, + "logps/chosen": -107.25129699707031, + "logps/rejected": -952.818359375, + "loss": 0.0579, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.4053794741630554, + "rewards/margins": 8.693920135498047, + "rewards/rejected": -9.099300384521484, + "step": 5750 + }, + { + "epoch": 0.34, + "learning_rate": 4.996802735471461e-06, + "logits/chosen": -2.875649929046631, + "logits/rejected": -2.7858481407165527, + "logps/chosen": -97.1933364868164, + "logps/rejected": -1071.1541748046875, + "loss": 0.0139, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2915171980857849, + "rewards/margins": 9.962873458862305, + "rewards/rejected": -10.254390716552734, + "step": 5760 + }, + { + "epoch": 0.34, + "learning_rate": 4.996714436751754e-06, + "logits/chosen": -2.829580783843994, + "logits/rejected": -2.753713369369507, + "logps/chosen": -91.81318664550781, + "logps/rejected": -1087.4423828125, + "loss": 0.0189, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18202371895313263, + "rewards/margins": 10.261846542358398, + "rewards/rejected": -10.443870544433594, + "step": 5770 + }, + { + "epoch": 0.34, + "learning_rate": 4.9966249361137485e-06, + "logits/chosen": -2.877453565597534, + "logits/rejected": -2.760840654373169, + "logps/chosen": -79.38204193115234, + "logps/rejected": -945.1511840820312, + "loss": 0.0458, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13070917129516602, + "rewards/margins": 8.877816200256348, + "rewards/rejected": -9.008523941040039, + "step": 5780 + }, + { + "epoch": 0.35, + "learning_rate": 4.996534233600531e-06, + "logits/chosen": -2.864905595779419, + "logits/rejected": -2.7753379344940186, + "logps/chosen": -79.3863296508789, + "logps/rejected": -936.8619995117188, + "loss": 0.0213, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11023255437612534, + "rewards/margins": 8.816895484924316, + "rewards/rejected": -8.927128791809082, + "step": 5790 + }, + { + "epoch": 0.35, + "learning_rate": 4.996442329255765e-06, + "logits/chosen": -2.8361868858337402, + "logits/rejected": -2.719416618347168, + "logps/chosen": -88.22953796386719, + "logps/rejected": -973.6760864257812, + "loss": 0.0095, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16746632754802704, + "rewards/margins": 9.113134384155273, + "rewards/rejected": -9.280600547790527, + "step": 5800 + }, + { + "epoch": 0.35, + "learning_rate": 4.996349223123693e-06, + "logits/chosen": -2.868873119354248, + "logits/rejected": -2.744396924972534, + "logps/chosen": -124.86759948730469, + "logps/rejected": -1048.982177734375, + "loss": 0.0291, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.48259878158569336, + "rewards/margins": 9.558441162109375, + "rewards/rejected": -10.04103946685791, + "step": 5810 + }, + { + "epoch": 0.35, + "learning_rate": 4.996254915249138e-06, + "logits/chosen": -2.8247337341308594, + "logits/rejected": -2.7633042335510254, + "logps/chosen": -118.34843444824219, + "logps/rejected": -1040.43017578125, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5053179264068604, + "rewards/margins": 9.458664894104004, + "rewards/rejected": -9.963983535766602, + "step": 5820 + }, + { + "epoch": 0.35, + "learning_rate": 4.996159405677499e-06, + "logits/chosen": -2.837290048599243, + "logits/rejected": -2.7637112140655518, + "logps/chosen": -85.29856872558594, + "logps/rejected": -1062.761474609375, + "loss": 0.012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13600033521652222, + "rewards/margins": 10.04765510559082, + "rewards/rejected": -10.183655738830566, + "step": 5830 + }, + { + "epoch": 0.35, + "learning_rate": 4.996062694454752e-06, + "logits/chosen": -2.8562419414520264, + "logits/rejected": -2.762296438217163, + "logps/chosen": -80.62400817871094, + "logps/rejected": -935.0916748046875, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10768361389636993, + "rewards/margins": 8.791327476501465, + "rewards/rejected": -8.899011611938477, + "step": 5840 + }, + { + "epoch": 0.35, + "learning_rate": 4.995964781627457e-06, + "logits/chosen": -2.863356113433838, + "logits/rejected": -2.7451672554016113, + "logps/chosen": -111.53287506103516, + "logps/rejected": -888.0652465820312, + "loss": 0.0778, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.39775925874710083, + "rewards/margins": 8.049300193786621, + "rewards/rejected": -8.447061538696289, + "step": 5850 + }, + { + "epoch": 0.35, + "learning_rate": 4.995865667242748e-06, + "logits/chosen": -2.8719191551208496, + "logits/rejected": -2.7281506061553955, + "logps/chosen": -126.27647399902344, + "logps/rejected": -1112.29150390625, + "loss": 0.051, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.5048094391822815, + "rewards/margins": 10.192010879516602, + "rewards/rejected": -10.696820259094238, + "step": 5860 + }, + { + "epoch": 0.35, + "learning_rate": 4.995765351348339e-06, + "logits/chosen": -2.8924922943115234, + "logits/rejected": -2.794528007507324, + "logps/chosen": -116.2362060546875, + "logps/rejected": -989.9505615234375, + "loss": 0.0302, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.47875863313674927, + "rewards/margins": 8.982992172241211, + "rewards/rejected": -9.461750984191895, + "step": 5870 + }, + { + "epoch": 0.35, + "learning_rate": 4.99566383399252e-06, + "logits/chosen": -2.8415448665618896, + "logits/rejected": -2.7373201847076416, + "logps/chosen": -129.90931701660156, + "logps/rejected": -1077.337158203125, + "loss": 0.0197, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5460450053215027, + "rewards/margins": 9.7931489944458, + "rewards/rejected": -10.339194297790527, + "step": 5880 + }, + { + "epoch": 0.35, + "learning_rate": 4.9955611152241645e-06, + "logits/chosen": -2.841820240020752, + "logits/rejected": -2.7459444999694824, + "logps/chosen": -144.3531951904297, + "logps/rejected": -879.1474609375, + "loss": 0.0357, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7501929998397827, + "rewards/margins": 7.5964508056640625, + "rewards/rejected": -8.346643447875977, + "step": 5890 + }, + { + "epoch": 0.35, + "learning_rate": 4.995457195092719e-06, + "logits/chosen": -2.8293557167053223, + "logits/rejected": -2.732442855834961, + "logps/chosen": -131.51242065429688, + "logps/rejected": -1059.0865478515625, + "loss": 0.0242, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5870695114135742, + "rewards/margins": 9.554573059082031, + "rewards/rejected": -10.141641616821289, + "step": 5900 + }, + { + "epoch": 0.35, + "learning_rate": 4.995352073648213e-06, + "logits/chosen": -2.80732798576355, + "logits/rejected": -2.7069995403289795, + "logps/chosen": -121.39400482177734, + "logps/rejected": -1015.4548950195312, + "loss": 0.0236, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4936566948890686, + "rewards/margins": 9.209264755249023, + "rewards/rejected": -9.702920913696289, + "step": 5910 + }, + { + "epoch": 0.35, + "learning_rate": 4.9952457509412495e-06, + "logits/chosen": -2.855853319168091, + "logits/rejected": -2.7244009971618652, + "logps/chosen": -197.28598022460938, + "logps/rejected": -1041.75830078125, + "loss": 0.0259, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2874542474746704, + "rewards/margins": 8.707128524780273, + "rewards/rejected": -9.994583129882812, + "step": 5920 + }, + { + "epoch": 0.35, + "learning_rate": 4.995138227023013e-06, + "logits/chosen": -2.8909430503845215, + "logits/rejected": -2.7591357231140137, + "logps/chosen": -216.03390502929688, + "logps/rejected": -1064.662109375, + "loss": 0.0278, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.460698127746582, + "rewards/margins": 8.754083633422852, + "rewards/rejected": -10.21478271484375, + "step": 5930 + }, + { + "epoch": 0.35, + "learning_rate": 4.9950295019452665e-06, + "logits/chosen": -2.837111711502075, + "logits/rejected": -2.7232730388641357, + "logps/chosen": -172.21160888671875, + "logps/rejected": -1077.967529296875, + "loss": 0.02, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0197370052337646, + "rewards/margins": 9.31914234161377, + "rewards/rejected": -10.338878631591797, + "step": 5940 + }, + { + "epoch": 0.35, + "learning_rate": 4.99491957576035e-06, + "logits/chosen": -2.8758866786956787, + "logits/rejected": -2.7404747009277344, + "logps/chosen": -148.3928680419922, + "logps/rejected": -1008.51318359375, + "loss": 0.0226, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7469266057014465, + "rewards/margins": 8.896544456481934, + "rewards/rejected": -9.64346981048584, + "step": 5950 + }, + { + "epoch": 0.36, + "learning_rate": 4.994808448521181e-06, + "logits/chosen": -2.8837621212005615, + "logits/rejected": -2.7798938751220703, + "logps/chosen": -137.8159942626953, + "logps/rejected": -1113.6925048828125, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6443921327590942, + "rewards/margins": 10.038885116577148, + "rewards/rejected": -10.68327808380127, + "step": 5960 + }, + { + "epoch": 0.36, + "learning_rate": 4.9946961202812566e-06, + "logits/chosen": -2.8304550647735596, + "logits/rejected": -2.755340337753296, + "logps/chosen": -95.44120025634766, + "logps/rejected": -974.14501953125, + "loss": 0.0271, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3040868043899536, + "rewards/margins": 9.002573013305664, + "rewards/rejected": -9.306660652160645, + "step": 5970 + }, + { + "epoch": 0.36, + "learning_rate": 4.994582591094652e-06, + "logits/chosen": -2.824028491973877, + "logits/rejected": -2.7336349487304688, + "logps/chosen": -101.11412811279297, + "logps/rejected": -1013.8076171875, + "loss": 0.0191, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19001387059688568, + "rewards/margins": 9.504179000854492, + "rewards/rejected": -9.694193840026855, + "step": 5980 + }, + { + "epoch": 0.36, + "learning_rate": 4.9944678610160195e-06, + "logits/chosen": -2.866589069366455, + "logits/rejected": -2.7728590965270996, + "logps/chosen": -84.56968688964844, + "logps/rejected": -1136.9007568359375, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1552964746952057, + "rewards/margins": 10.76179313659668, + "rewards/rejected": -10.917089462280273, + "step": 5990 + }, + { + "epoch": 0.36, + "learning_rate": 4.99435193010059e-06, + "logits/chosen": -2.819800853729248, + "logits/rejected": -2.685945749282837, + "logps/chosen": -95.38417053222656, + "logps/rejected": -1000.0623779296875, + "loss": 0.0177, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1565397083759308, + "rewards/margins": 9.40877914428711, + "rewards/rejected": -9.565317153930664, + "step": 6000 + }, + { + "epoch": 0.36, + "learning_rate": 4.994234798404174e-06, + "logits/chosen": -2.8545045852661133, + "logits/rejected": -2.720797061920166, + "logps/chosen": -85.23140716552734, + "logps/rejected": -1030.2203369140625, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13435958325862885, + "rewards/margins": 9.73658561706543, + "rewards/rejected": -9.87094497680664, + "step": 6010 + }, + { + "epoch": 0.36, + "learning_rate": 4.994116465983158e-06, + "logits/chosen": -2.832564353942871, + "logits/rejected": -2.754326105117798, + "logps/chosen": -79.41764831542969, + "logps/rejected": -981.9638671875, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14471688866615295, + "rewards/margins": 9.234491348266602, + "rewards/rejected": -9.379209518432617, + "step": 6020 + }, + { + "epoch": 0.36, + "learning_rate": 4.993996932894507e-06, + "logits/chosen": -2.8643574714660645, + "logits/rejected": -2.7446398735046387, + "logps/chosen": -91.9593734741211, + "logps/rejected": -1046.4627685546875, + "loss": 0.0225, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21943211555480957, + "rewards/margins": 9.799996376037598, + "rewards/rejected": -10.019427299499512, + "step": 6030 + }, + { + "epoch": 0.36, + "learning_rate": 4.993876199195764e-06, + "logits/chosen": -2.886744737625122, + "logits/rejected": -2.785329580307007, + "logps/chosen": -81.71095275878906, + "logps/rejected": -980.7774658203125, + "loss": 0.0171, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12313757836818695, + "rewards/margins": 9.2462158203125, + "rewards/rejected": -9.369352340698242, + "step": 6040 + }, + { + "epoch": 0.36, + "learning_rate": 4.993754264945051e-06, + "logits/chosen": -2.8607308864593506, + "logits/rejected": -2.7534987926483154, + "logps/chosen": -85.49272155761719, + "logps/rejected": -1054.5147705078125, + "loss": 0.0114, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16972659528255463, + "rewards/margins": 9.925309181213379, + "rewards/rejected": -10.095036506652832, + "step": 6050 + }, + { + "epoch": 0.36, + "learning_rate": 4.993631130201066e-06, + "logits/chosen": -2.8518481254577637, + "logits/rejected": -2.7452194690704346, + "logps/chosen": -103.50215148925781, + "logps/rejected": -945.3824462890625, + "loss": 0.024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3096039891242981, + "rewards/margins": 8.715206146240234, + "rewards/rejected": -9.024810791015625, + "step": 6060 + }, + { + "epoch": 0.36, + "learning_rate": 4.993506795023087e-06, + "logits/chosen": -2.8591318130493164, + "logits/rejected": -2.764462947845459, + "logps/chosen": -109.9842758178711, + "logps/rejected": -1042.355712890625, + "loss": 0.0203, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.40174394845962524, + "rewards/margins": 9.582122802734375, + "rewards/rejected": -9.983865737915039, + "step": 6070 + }, + { + "epoch": 0.36, + "learning_rate": 4.993381259470968e-06, + "logits/chosen": -2.8834433555603027, + "logits/rejected": -2.7895684242248535, + "logps/chosen": -93.2333984375, + "logps/rejected": -1017.0689697265625, + "loss": 0.0179, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15938293933868408, + "rewards/margins": 9.579432487487793, + "rewards/rejected": -9.738815307617188, + "step": 6080 + }, + { + "epoch": 0.36, + "learning_rate": 4.993254523605144e-06, + "logits/chosen": -2.8473591804504395, + "logits/rejected": -2.7549102306365967, + "logps/chosen": -84.49934387207031, + "logps/rejected": -1084.5828857421875, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17377933859825134, + "rewards/margins": 10.232751846313477, + "rewards/rejected": -10.40653133392334, + "step": 6090 + }, + { + "epoch": 0.36, + "learning_rate": 4.993126587486623e-06, + "logits/chosen": -2.8495426177978516, + "logits/rejected": -2.740593433380127, + "logps/chosen": -78.69758605957031, + "logps/rejected": -967.1017456054688, + "loss": 0.0371, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.049415335059165955, + "rewards/margins": 9.193384170532227, + "rewards/rejected": -9.242798805236816, + "step": 6100 + }, + { + "epoch": 0.36, + "learning_rate": 4.992997451176995e-06, + "logits/chosen": -2.8794190883636475, + "logits/rejected": -2.7673630714416504, + "logps/chosen": -79.04580688476562, + "logps/rejected": -1055.802001953125, + "loss": 0.0184, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06543741375207901, + "rewards/margins": 10.036157608032227, + "rewards/rejected": -10.101594924926758, + "step": 6110 + }, + { + "epoch": 0.36, + "learning_rate": 4.9928671147384255e-06, + "logits/chosen": -2.824097156524658, + "logits/rejected": -2.7279114723205566, + "logps/chosen": -75.74333190917969, + "logps/rejected": -1085.4373779296875, + "loss": 0.0182, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07069632411003113, + "rewards/margins": 10.341090202331543, + "rewards/rejected": -10.411786079406738, + "step": 6120 + }, + { + "epoch": 0.37, + "learning_rate": 4.992735578233658e-06, + "logits/chosen": -2.8170173168182373, + "logits/rejected": -2.707951068878174, + "logps/chosen": -85.38341522216797, + "logps/rejected": -1130.037353515625, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10446202754974365, + "rewards/margins": 10.74354076385498, + "rewards/rejected": -10.848004341125488, + "step": 6130 + }, + { + "epoch": 0.37, + "learning_rate": 4.992602841726016e-06, + "logits/chosen": -2.9125454425811768, + "logits/rejected": -2.789853572845459, + "logps/chosen": -100.0194320678711, + "logps/rejected": -1083.5196533203125, + "loss": 0.0215, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.259854793548584, + "rewards/margins": 10.133074760437012, + "rewards/rejected": -10.392929077148438, + "step": 6140 + }, + { + "epoch": 0.37, + "learning_rate": 4.992468905279398e-06, + "logits/chosen": -2.8611199855804443, + "logits/rejected": -2.7241532802581787, + "logps/chosen": -86.87162780761719, + "logps/rejected": -1011.1385498046875, + "loss": 0.0353, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.18668285012245178, + "rewards/margins": 9.500238418579102, + "rewards/rejected": -9.686922073364258, + "step": 6150 + }, + { + "epoch": 0.37, + "learning_rate": 4.99233376895828e-06, + "logits/chosen": -2.857591152191162, + "logits/rejected": -2.7668190002441406, + "logps/chosen": -75.15544128417969, + "logps/rejected": -1014.4747314453125, + "loss": 0.0322, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11937995254993439, + "rewards/margins": 9.574254989624023, + "rewards/rejected": -9.693634986877441, + "step": 6160 + }, + { + "epoch": 0.37, + "learning_rate": 4.9921974328277175e-06, + "logits/chosen": -2.8643832206726074, + "logits/rejected": -2.722567081451416, + "logps/chosen": -77.85453796386719, + "logps/rejected": -983.0377807617188, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10900320112705231, + "rewards/margins": 9.27419662475586, + "rewards/rejected": -9.383199691772461, + "step": 6170 + }, + { + "epoch": 0.37, + "learning_rate": 4.992059896953343e-06, + "logits/chosen": -2.8597054481506348, + "logits/rejected": -2.758347272872925, + "logps/chosen": -75.27635955810547, + "logps/rejected": -962.4560546875, + "loss": 0.0212, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.005975815467536449, + "rewards/margins": 9.176641464233398, + "rewards/rejected": -9.182616233825684, + "step": 6180 + }, + { + "epoch": 0.37, + "learning_rate": 4.991921161401366e-06, + "logits/chosen": -2.8781778812408447, + "logits/rejected": -2.754544734954834, + "logps/chosen": -80.2876205444336, + "logps/rejected": -962.5618286132812, + "loss": 0.0167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11159811168909073, + "rewards/margins": 9.081779479980469, + "rewards/rejected": -9.193377494812012, + "step": 6190 + }, + { + "epoch": 0.37, + "learning_rate": 4.991781226238573e-06, + "logits/chosen": -2.838768243789673, + "logits/rejected": -2.729720115661621, + "logps/chosen": -94.8681640625, + "logps/rejected": -969.4010009765625, + "loss": 0.0204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22619493305683136, + "rewards/margins": 9.008465766906738, + "rewards/rejected": -9.234660148620605, + "step": 6200 + }, + { + "epoch": 0.37, + "learning_rate": 4.99164009153233e-06, + "logits/chosen": -2.847085475921631, + "logits/rejected": -2.7592029571533203, + "logps/chosen": -97.43214416503906, + "logps/rejected": -1120.4674072265625, + "loss": 0.0187, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2826923429965973, + "rewards/margins": 10.491547584533691, + "rewards/rejected": -10.774240493774414, + "step": 6210 + }, + { + "epoch": 0.37, + "learning_rate": 4.991497757350578e-06, + "logits/chosen": -2.889768123626709, + "logits/rejected": -2.730175495147705, + "logps/chosen": -94.32430267333984, + "logps/rejected": -1029.720947265625, + "loss": 0.0184, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24467575550079346, + "rewards/margins": 9.60936164855957, + "rewards/rejected": -9.85403823852539, + "step": 6220 + }, + { + "epoch": 0.37, + "learning_rate": 4.991354223761839e-06, + "logits/chosen": -2.8149733543395996, + "logits/rejected": -2.7393572330474854, + "logps/chosen": -83.58454895019531, + "logps/rejected": -1061.6265869140625, + "loss": 0.0598, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.12088409811258316, + "rewards/margins": 10.062891006469727, + "rewards/rejected": -10.18377685546875, + "step": 6230 + }, + { + "epoch": 0.37, + "learning_rate": 4.991209490835207e-06, + "logits/chosen": -2.8878660202026367, + "logits/rejected": -2.7880942821502686, + "logps/chosen": -98.37051391601562, + "logps/rejected": -1016.2542724609375, + "loss": 0.0211, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2892311215400696, + "rewards/margins": 9.425946235656738, + "rewards/rejected": -9.715177536010742, + "step": 6240 + }, + { + "epoch": 0.37, + "learning_rate": 4.991063558640359e-06, + "logits/chosen": -2.844566822052002, + "logits/rejected": -2.7127013206481934, + "logps/chosen": -102.30368041992188, + "logps/rejected": -1069.210693359375, + "loss": 0.0295, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.25833025574684143, + "rewards/margins": 10.001519203186035, + "rewards/rejected": -10.259848594665527, + "step": 6250 + }, + { + "epoch": 0.37, + "learning_rate": 4.990916427247544e-06, + "logits/chosen": -2.8318252563476562, + "logits/rejected": -2.733450412750244, + "logps/chosen": -93.7423324584961, + "logps/rejected": -911.1585693359375, + "loss": 0.0228, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2379368543624878, + "rewards/margins": 8.431787490844727, + "rewards/rejected": -8.669724464416504, + "step": 6260 + }, + { + "epoch": 0.37, + "learning_rate": 4.990768096727594e-06, + "logits/chosen": -2.8655197620391846, + "logits/rejected": -2.7633962631225586, + "logps/chosen": -93.04314422607422, + "logps/rejected": -968.5587768554688, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2794722616672516, + "rewards/margins": 8.955486297607422, + "rewards/rejected": -9.234959602355957, + "step": 6270 + }, + { + "epoch": 0.37, + "learning_rate": 4.990618567151914e-06, + "logits/chosen": -2.894196033477783, + "logits/rejected": -2.781888484954834, + "logps/chosen": -114.3109130859375, + "logps/rejected": -910.7276611328125, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4302552342414856, + "rewards/margins": 8.236082077026367, + "rewards/rejected": -8.666338920593262, + "step": 6280 + }, + { + "epoch": 0.38, + "learning_rate": 4.990467838592486e-06, + "logits/chosen": -2.856534481048584, + "logits/rejected": -2.7852210998535156, + "logps/chosen": -74.87226104736328, + "logps/rejected": -881.0589599609375, + "loss": 0.02, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07134991139173508, + "rewards/margins": 8.299285888671875, + "rewards/rejected": -8.370635032653809, + "step": 6290 + }, + { + "epoch": 0.38, + "learning_rate": 4.990315911121874e-06, + "logits/chosen": -2.8851895332336426, + "logits/rejected": -2.774014949798584, + "logps/chosen": -77.09825134277344, + "logps/rejected": -1062.0777587890625, + "loss": 0.0125, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07684598863124847, + "rewards/margins": 10.094446182250977, + "rewards/rejected": -10.171293258666992, + "step": 6300 + }, + { + "epoch": 0.38, + "learning_rate": 4.990162784813214e-06, + "logits/chosen": -2.8453879356384277, + "logits/rejected": -2.7881786823272705, + "logps/chosen": -95.4285659790039, + "logps/rejected": -1054.6597900390625, + "loss": 0.0089, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24586184322834015, + "rewards/margins": 9.854063987731934, + "rewards/rejected": -10.099924087524414, + "step": 6310 + }, + { + "epoch": 0.38, + "learning_rate": 4.99000845974022e-06, + "logits/chosen": -2.878973960876465, + "logits/rejected": -2.7835898399353027, + "logps/chosen": -90.92822265625, + "logps/rejected": -1021.2404174804688, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16721206903457642, + "rewards/margins": 9.605581283569336, + "rewards/rejected": -9.772794723510742, + "step": 6320 + }, + { + "epoch": 0.38, + "learning_rate": 4.989852935977187e-06, + "logits/chosen": -2.8782780170440674, + "logits/rejected": -2.7628581523895264, + "logps/chosen": -96.49330139160156, + "logps/rejected": -1079.93896484375, + "loss": 0.0276, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24046535789966583, + "rewards/margins": 10.100900650024414, + "rewards/rejected": -10.341364860534668, + "step": 6330 + }, + { + "epoch": 0.38, + "learning_rate": 4.98969621359898e-06, + "logits/chosen": -2.8649351596832275, + "logits/rejected": -2.7465875148773193, + "logps/chosen": -75.44749450683594, + "logps/rejected": -997.9232177734375, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.047353245317935944, + "rewards/margins": 9.482558250427246, + "rewards/rejected": -9.529911994934082, + "step": 6340 + }, + { + "epoch": 0.38, + "learning_rate": 4.989538292681049e-06, + "logits/chosen": -2.8604397773742676, + "logits/rejected": -2.768730640411377, + "logps/chosen": -89.19337463378906, + "logps/rejected": -1029.1842041015625, + "loss": 0.0219, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1986398547887802, + "rewards/margins": 9.655150413513184, + "rewards/rejected": -9.853791236877441, + "step": 6350 + }, + { + "epoch": 0.38, + "learning_rate": 4.989379173299416e-06, + "logits/chosen": -2.842869520187378, + "logits/rejected": -2.7630245685577393, + "logps/chosen": -104.572998046875, + "logps/rejected": -994.7445068359375, + "loss": 0.0235, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35896068811416626, + "rewards/margins": 9.145153045654297, + "rewards/rejected": -9.504114151000977, + "step": 6360 + }, + { + "epoch": 0.38, + "learning_rate": 4.98921885553068e-06, + "logits/chosen": -2.879728317260742, + "logits/rejected": -2.7838993072509766, + "logps/chosen": -88.58274841308594, + "logps/rejected": -1067.944091796875, + "loss": 0.0291, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17638444900512695, + "rewards/margins": 10.06000804901123, + "rewards/rejected": -10.2363920211792, + "step": 6370 + }, + { + "epoch": 0.38, + "learning_rate": 4.989057339452019e-06, + "logits/chosen": -2.8624978065490723, + "logits/rejected": -2.7264952659606934, + "logps/chosen": -91.03730773925781, + "logps/rejected": -931.87548828125, + "loss": 0.0187, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1654445230960846, + "rewards/margins": 8.710175514221191, + "rewards/rejected": -8.875619888305664, + "step": 6380 + }, + { + "epoch": 0.38, + "learning_rate": 4.988894625141186e-06, + "logits/chosen": -2.862191915512085, + "logits/rejected": -2.7529497146606445, + "logps/chosen": -84.1357421875, + "logps/rejected": -922.8560791015625, + "loss": 0.0353, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2000402957201004, + "rewards/margins": 8.583680152893066, + "rewards/rejected": -8.783720016479492, + "step": 6390 + }, + { + "epoch": 0.38, + "learning_rate": 4.988730712676513e-06, + "logits/chosen": -2.8570339679718018, + "logits/rejected": -2.7313013076782227, + "logps/chosen": -77.96482849121094, + "logps/rejected": -1052.4638671875, + "loss": 0.0195, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1103677898645401, + "rewards/margins": 9.977703094482422, + "rewards/rejected": -10.088069915771484, + "step": 6400 + }, + { + "epoch": 0.38, + "learning_rate": 4.9885656021369054e-06, + "logits/chosen": -2.844604015350342, + "logits/rejected": -2.728583335876465, + "logps/chosen": -113.58302307128906, + "logps/rejected": -1053.626953125, + "loss": 0.0166, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4180574417114258, + "rewards/margins": 9.680901527404785, + "rewards/rejected": -10.098958969116211, + "step": 6410 + }, + { + "epoch": 0.38, + "learning_rate": 4.98839929360185e-06, + "logits/chosen": -2.8604588508605957, + "logits/rejected": -2.772819995880127, + "logps/chosen": -111.1179428100586, + "logps/rejected": -1029.83251953125, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3950466513633728, + "rewards/margins": 9.458197593688965, + "rewards/rejected": -9.85324478149414, + "step": 6420 + }, + { + "epoch": 0.38, + "learning_rate": 4.988231787151406e-06, + "logits/chosen": -2.8588626384735107, + "logits/rejected": -2.7666072845458984, + "logps/chosen": -128.085205078125, + "logps/rejected": -876.7955322265625, + "loss": 0.0299, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6269791722297668, + "rewards/margins": 7.708470821380615, + "rewards/rejected": -8.335450172424316, + "step": 6430 + }, + { + "epoch": 0.38, + "learning_rate": 4.988063082866212e-06, + "logits/chosen": -2.857917070388794, + "logits/rejected": -2.726804256439209, + "logps/chosen": -137.801025390625, + "logps/rejected": -1104.3460693359375, + "loss": 0.0193, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6997302770614624, + "rewards/margins": 9.890993118286133, + "rewards/rejected": -10.590723991394043, + "step": 6440 + }, + { + "epoch": 0.38, + "learning_rate": 4.9878931808274796e-06, + "logits/chosen": -2.9012885093688965, + "logits/rejected": -2.76401948928833, + "logps/chosen": -148.73785400390625, + "logps/rejected": -1036.9642333984375, + "loss": 0.0515, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7602577209472656, + "rewards/margins": 9.147088050842285, + "rewards/rejected": -9.907346725463867, + "step": 6450 + }, + { + "epoch": 0.39, + "learning_rate": 4.987722081117004e-06, + "logits/chosen": -2.8701565265655518, + "logits/rejected": -2.7461555004119873, + "logps/chosen": -91.30675506591797, + "logps/rejected": -960.9021606445312, + "loss": 0.0176, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1995578408241272, + "rewards/margins": 8.98405647277832, + "rewards/rejected": -9.183614730834961, + "step": 6460 + }, + { + "epoch": 0.39, + "learning_rate": 4.987549783817148e-06, + "logits/chosen": -2.8621885776519775, + "logits/rejected": -2.757124662399292, + "logps/chosen": -93.64064025878906, + "logps/rejected": -937.64697265625, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.234915092587471, + "rewards/margins": 8.705764770507812, + "rewards/rejected": -8.940678596496582, + "step": 6470 + }, + { + "epoch": 0.39, + "learning_rate": 4.9873762890108596e-06, + "logits/chosen": -2.8911705017089844, + "logits/rejected": -2.7717232704162598, + "logps/chosen": -78.17686462402344, + "logps/rejected": -1006.5031127929688, + "loss": 0.0114, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10923222452402115, + "rewards/margins": 9.51962947845459, + "rewards/rejected": -9.628862380981445, + "step": 6480 + }, + { + "epoch": 0.39, + "learning_rate": 4.987201596781657e-06, + "logits/chosen": -2.880112886428833, + "logits/rejected": -2.767019271850586, + "logps/chosen": -79.64859008789062, + "logps/rejected": -1076.8486328125, + "loss": 0.0304, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05476031452417374, + "rewards/margins": 10.274023056030273, + "rewards/rejected": -10.328783988952637, + "step": 6490 + }, + { + "epoch": 0.39, + "learning_rate": 4.987025707213638e-06, + "logits/chosen": -2.868051528930664, + "logits/rejected": -2.7989964485168457, + "logps/chosen": -84.3449478149414, + "logps/rejected": -962.3760986328125, + "loss": 0.0187, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13877049088478088, + "rewards/margins": 9.044047355651855, + "rewards/rejected": -9.182817459106445, + "step": 6500 + }, + { + "epoch": 0.39, + "learning_rate": 4.986848620391473e-06, + "logits/chosen": -2.8739447593688965, + "logits/rejected": -2.7484278678894043, + "logps/chosen": -76.3927001953125, + "logps/rejected": -1016.6946411132812, + "loss": 0.0147, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07281289249658585, + "rewards/margins": 9.644214630126953, + "rewards/rejected": -9.71702766418457, + "step": 6510 + }, + { + "epoch": 0.39, + "learning_rate": 4.986670336400416e-06, + "logits/chosen": -2.8868865966796875, + "logits/rejected": -2.7850661277770996, + "logps/chosen": -92.38011932373047, + "logps/rejected": -971.8704833984375, + "loss": 0.0426, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23119449615478516, + "rewards/margins": 9.06200122833252, + "rewards/rejected": -9.293196678161621, + "step": 6520 + }, + { + "epoch": 0.39, + "learning_rate": 4.986490855326291e-06, + "logits/chosen": -2.8497400283813477, + "logits/rejected": -2.7761387825012207, + "logps/chosen": -96.12339782714844, + "logps/rejected": -1072.453369140625, + "loss": 0.024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23359891772270203, + "rewards/margins": 10.065162658691406, + "rewards/rejected": -10.298761367797852, + "step": 6530 + }, + { + "epoch": 0.39, + "learning_rate": 4.986310177255498e-06, + "logits/chosen": -2.8781185150146484, + "logits/rejected": -2.7879347801208496, + "logps/chosen": -93.9285659790039, + "logps/rejected": -1103.159423828125, + "loss": 0.0169, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23016805946826935, + "rewards/margins": 10.348945617675781, + "rewards/rejected": -10.57911491394043, + "step": 6540 + }, + { + "epoch": 0.39, + "learning_rate": 4.986128302275018e-06, + "logits/chosen": -2.895021915435791, + "logits/rejected": -2.757979393005371, + "logps/chosen": -112.7436294555664, + "logps/rejected": -1110.0906982421875, + "loss": 0.0154, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.40490421652793884, + "rewards/margins": 10.23462963104248, + "rewards/rejected": -10.639533996582031, + "step": 6550 + }, + { + "epoch": 0.39, + "learning_rate": 4.985945230472406e-06, + "logits/chosen": -2.8649659156799316, + "logits/rejected": -2.763258934020996, + "logps/chosen": -107.67581939697266, + "logps/rejected": -1012.0592041015625, + "loss": 0.015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3479551672935486, + "rewards/margins": 9.333402633666992, + "rewards/rejected": -9.681360244750977, + "step": 6560 + }, + { + "epoch": 0.39, + "learning_rate": 4.985760961935791e-06, + "logits/chosen": -2.8558948040008545, + "logits/rejected": -2.764627456665039, + "logps/chosen": -104.2562484741211, + "logps/rejected": -1005.9757690429688, + "loss": 0.0249, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2968907952308655, + "rewards/margins": 9.320588111877441, + "rewards/rejected": -9.617477416992188, + "step": 6570 + }, + { + "epoch": 0.39, + "learning_rate": 4.985575496753881e-06, + "logits/chosen": -2.8581740856170654, + "logits/rejected": -2.7769579887390137, + "logps/chosen": -121.88063049316406, + "logps/rejected": -996.5872802734375, + "loss": 0.0252, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5416587591171265, + "rewards/margins": 8.984092712402344, + "rewards/rejected": -9.525751113891602, + "step": 6580 + }, + { + "epoch": 0.39, + "learning_rate": 4.985388835015958e-06, + "logits/chosen": -2.8557238578796387, + "logits/rejected": -2.7589848041534424, + "logps/chosen": -93.11974334716797, + "logps/rejected": -1007.8682861328125, + "loss": 0.0178, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22555169463157654, + "rewards/margins": 9.415470123291016, + "rewards/rejected": -9.641020774841309, + "step": 6590 + }, + { + "epoch": 0.39, + "learning_rate": 4.985200976811882e-06, + "logits/chosen": -2.8465869426727295, + "logits/rejected": -2.74824595451355, + "logps/chosen": -87.58549499511719, + "logps/rejected": -1095.3348388671875, + "loss": 0.0317, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2011781483888626, + "rewards/margins": 10.317588806152344, + "rewards/rejected": -10.518767356872559, + "step": 6600 + }, + { + "epoch": 0.39, + "learning_rate": 4.985011922232087e-06, + "logits/chosen": -2.8732452392578125, + "logits/rejected": -2.7531867027282715, + "logps/chosen": -80.53604888916016, + "logps/rejected": -1070.269287109375, + "loss": 0.0118, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11033640801906586, + "rewards/margins": 10.149062156677246, + "rewards/rejected": -10.2593994140625, + "step": 6610 + }, + { + "epoch": 0.39, + "learning_rate": 4.984821671367585e-06, + "logits/chosen": -2.833533525466919, + "logits/rejected": -2.7007884979248047, + "logps/chosen": -104.37284088134766, + "logps/rejected": -959.0774536132812, + "loss": 0.0167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3669954538345337, + "rewards/margins": 8.774602890014648, + "rewards/rejected": -9.141596794128418, + "step": 6620 + }, + { + "epoch": 0.4, + "learning_rate": 4.9846302243099624e-06, + "logits/chosen": -2.8597381114959717, + "logits/rejected": -2.7490649223327637, + "logps/chosen": -91.8733139038086, + "logps/rejected": -1088.1566162109375, + "loss": 0.0201, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.22041288018226624, + "rewards/margins": 10.223590850830078, + "rewards/rejected": -10.444003105163574, + "step": 6630 + }, + { + "epoch": 0.4, + "learning_rate": 4.98443758115138e-06, + "logits/chosen": -2.8882298469543457, + "logits/rejected": -2.7786202430725098, + "logps/chosen": -115.93666076660156, + "logps/rejected": -1025.7537841796875, + "loss": 0.0202, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4559624195098877, + "rewards/margins": 9.363418579101562, + "rewards/rejected": -9.819379806518555, + "step": 6640 + }, + { + "epoch": 0.4, + "learning_rate": 4.984243741984579e-06, + "logits/chosen": -2.8521838188171387, + "logits/rejected": -2.7270309925079346, + "logps/chosen": -135.1658172607422, + "logps/rejected": -1131.69482421875, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6666531562805176, + "rewards/margins": 10.20680046081543, + "rewards/rejected": -10.873454093933105, + "step": 6650 + }, + { + "epoch": 0.4, + "learning_rate": 4.984048706902872e-06, + "logits/chosen": -2.859269618988037, + "logits/rejected": -2.7558822631835938, + "logps/chosen": -127.13493347167969, + "logps/rejected": -1127.4227294921875, + "loss": 0.0173, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5631681084632874, + "rewards/margins": 10.26793384552002, + "rewards/rejected": -10.831101417541504, + "step": 6660 + }, + { + "epoch": 0.4, + "learning_rate": 4.983852476000148e-06, + "logits/chosen": -2.864189863204956, + "logits/rejected": -2.735654354095459, + "logps/chosen": -89.42027282714844, + "logps/rejected": -953.0267333984375, + "loss": 0.0305, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18151307106018066, + "rewards/margins": 8.903812408447266, + "rewards/rejected": -9.085325241088867, + "step": 6670 + }, + { + "epoch": 0.4, + "learning_rate": 4.983655049370876e-06, + "logits/chosen": -2.842754602432251, + "logits/rejected": -2.7330899238586426, + "logps/chosen": -69.17859649658203, + "logps/rejected": -1164.3280029296875, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.00868206936866045, + "rewards/margins": 11.202936172485352, + "rewards/rejected": -11.211617469787598, + "step": 6680 + }, + { + "epoch": 0.4, + "learning_rate": 4.9834564271100925e-06, + "logits/chosen": -2.870887041091919, + "logits/rejected": -2.766007423400879, + "logps/chosen": -90.09000396728516, + "logps/rejected": -933.3209838867188, + "loss": 0.0162, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17917445302009583, + "rewards/margins": 8.70223617553711, + "rewards/rejected": -8.881410598754883, + "step": 6690 + }, + { + "epoch": 0.4, + "learning_rate": 4.983256609313418e-06, + "logits/chosen": -2.894533634185791, + "logits/rejected": -2.778907060623169, + "logps/chosen": -83.30313110351562, + "logps/rejected": -1022.7476806640625, + "loss": 0.0169, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12802140414714813, + "rewards/margins": 9.653684616088867, + "rewards/rejected": -9.781705856323242, + "step": 6700 + }, + { + "epoch": 0.4, + "learning_rate": 4.983055596077044e-06, + "logits/chosen": -2.8762168884277344, + "logits/rejected": -2.7691948413848877, + "logps/chosen": -86.52455139160156, + "logps/rejected": -1062.5745849609375, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1510665863752365, + "rewards/margins": 10.047632217407227, + "rewards/rejected": -10.198698043823242, + "step": 6710 + }, + { + "epoch": 0.4, + "learning_rate": 4.982853387497737e-06, + "logits/chosen": -2.887822389602661, + "logits/rejected": -2.7806193828582764, + "logps/chosen": -98.81522369384766, + "logps/rejected": -1055.55419921875, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.29571133852005005, + "rewards/margins": 9.819233894348145, + "rewards/rejected": -10.114945411682129, + "step": 6720 + }, + { + "epoch": 0.4, + "learning_rate": 4.982649983672841e-06, + "logits/chosen": -2.8574442863464355, + "logits/rejected": -2.7881851196289062, + "logps/chosen": -115.64456939697266, + "logps/rejected": -1162.3057861328125, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4325591027736664, + "rewards/margins": 10.732172966003418, + "rewards/rejected": -11.164731979370117, + "step": 6730 + }, + { + "epoch": 0.4, + "learning_rate": 4.982445384700274e-06, + "logits/chosen": -2.896275520324707, + "logits/rejected": -2.797452211380005, + "logps/chosen": -76.33207702636719, + "logps/rejected": -1096.7095947265625, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.033901892602443695, + "rewards/margins": 10.496334075927734, + "rewards/rejected": -10.53023624420166, + "step": 6740 + }, + { + "epoch": 0.4, + "learning_rate": 4.98223959067853e-06, + "logits/chosen": -2.8676114082336426, + "logits/rejected": -2.756726026535034, + "logps/chosen": -81.90572357177734, + "logps/rejected": -1075.250732421875, + "loss": 0.0169, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10518008470535278, + "rewards/margins": 10.203829765319824, + "rewards/rejected": -10.309008598327637, + "step": 6750 + }, + { + "epoch": 0.4, + "learning_rate": 4.98203260170668e-06, + "logits/chosen": -2.8599233627319336, + "logits/rejected": -2.7868521213531494, + "logps/chosen": -99.03352355957031, + "logps/rejected": -1045.369873046875, + "loss": 0.0232, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.25590553879737854, + "rewards/margins": 9.761087417602539, + "rewards/rejected": -10.016990661621094, + "step": 6760 + }, + { + "epoch": 0.4, + "learning_rate": 4.981824417884367e-06, + "logits/chosen": -2.887263298034668, + "logits/rejected": -2.744392156600952, + "logps/chosen": -135.83157348632812, + "logps/rejected": -1057.519287109375, + "loss": 0.0159, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6390091180801392, + "rewards/margins": 9.492627143859863, + "rewards/rejected": -10.131635665893555, + "step": 6770 + }, + { + "epoch": 0.4, + "learning_rate": 4.9816150393118105e-06, + "logits/chosen": -2.856961727142334, + "logits/rejected": -2.7620131969451904, + "logps/chosen": -119.26322174072266, + "logps/rejected": -1049.786376953125, + "loss": 0.0175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.46787968277931213, + "rewards/margins": 9.585641860961914, + "rewards/rejected": -10.053522109985352, + "step": 6780 + }, + { + "epoch": 0.4, + "learning_rate": 4.981404466089805e-06, + "logits/chosen": -2.8572981357574463, + "logits/rejected": -2.759601593017578, + "logps/chosen": -76.8571548461914, + "logps/rejected": -1001.9095458984375, + "loss": 0.0227, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.0006532620755024254, + "rewards/margins": 9.570249557495117, + "rewards/rejected": -9.569598197937012, + "step": 6790 + }, + { + "epoch": 0.41, + "learning_rate": 4.981192698319722e-06, + "logits/chosen": -2.905216693878174, + "logits/rejected": -2.8280858993530273, + "logps/chosen": -64.65483856201172, + "logps/rejected": -1041.367919921875, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.08000225573778152, + "rewards/margins": 10.047078132629395, + "rewards/rejected": -9.967074394226074, + "step": 6800 + }, + { + "epoch": 0.41, + "learning_rate": 4.980979736103506e-06, + "logits/chosen": -2.9054012298583984, + "logits/rejected": -2.804459810256958, + "logps/chosen": -73.14320373535156, + "logps/rejected": -939.6282348632812, + "loss": 0.0317, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.02387736178934574, + "rewards/margins": 8.971988677978516, + "rewards/rejected": -8.948110580444336, + "step": 6810 + }, + { + "epoch": 0.41, + "learning_rate": 4.980765579543677e-06, + "logits/chosen": -2.8856871128082275, + "logits/rejected": -2.7773985862731934, + "logps/chosen": -71.04296875, + "logps/rejected": -956.9743041992188, + "loss": 0.0175, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.011927307583391666, + "rewards/margins": 9.109151840209961, + "rewards/rejected": -9.12108039855957, + "step": 6820 + }, + { + "epoch": 0.41, + "learning_rate": 4.980550228743329e-06, + "logits/chosen": -2.855215549468994, + "logits/rejected": -2.7590460777282715, + "logps/chosen": -121.68717956542969, + "logps/rejected": -1044.014892578125, + "loss": 0.0288, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5287896394729614, + "rewards/margins": 9.45048713684082, + "rewards/rejected": -9.97927474975586, + "step": 6830 + }, + { + "epoch": 0.41, + "learning_rate": 4.980333683806132e-06, + "logits/chosen": -2.865797281265259, + "logits/rejected": -2.799298048019409, + "logps/chosen": -99.35879516601562, + "logps/rejected": -1139.008544921875, + "loss": 0.0145, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.29535365104675293, + "rewards/margins": 10.647809982299805, + "rewards/rejected": -10.94316291809082, + "step": 6840 + }, + { + "epoch": 0.41, + "learning_rate": 4.980115944836331e-06, + "logits/chosen": -2.8614439964294434, + "logits/rejected": -2.7717576026916504, + "logps/chosen": -89.68180847167969, + "logps/rejected": -905.0167236328125, + "loss": 0.0262, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19921812415122986, + "rewards/margins": 8.429717063903809, + "rewards/rejected": -8.628934860229492, + "step": 6850 + }, + { + "epoch": 0.41, + "learning_rate": 4.979897011938747e-06, + "logits/chosen": -2.8556389808654785, + "logits/rejected": -2.762850761413574, + "logps/chosen": -82.81360626220703, + "logps/rejected": -1125.847412109375, + "loss": 0.0186, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11526590585708618, + "rewards/margins": 10.701292037963867, + "rewards/rejected": -10.816556930541992, + "step": 6860 + }, + { + "epoch": 0.41, + "learning_rate": 4.979676885218772e-06, + "logits/chosen": -2.8606765270233154, + "logits/rejected": -2.7870306968688965, + "logps/chosen": -99.10395812988281, + "logps/rejected": -1040.30712890625, + "loss": 0.0312, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2896137237548828, + "rewards/margins": 9.675824165344238, + "rewards/rejected": -9.965436935424805, + "step": 6870 + }, + { + "epoch": 0.41, + "learning_rate": 4.979455564782377e-06, + "logits/chosen": -2.896195411682129, + "logits/rejected": -2.773070812225342, + "logps/chosen": -89.62725067138672, + "logps/rejected": -1069.345947265625, + "loss": 0.0266, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13960742950439453, + "rewards/margins": 10.106170654296875, + "rewards/rejected": -10.245779037475586, + "step": 6880 + }, + { + "epoch": 0.41, + "learning_rate": 4.979233050736105e-06, + "logits/chosen": -2.89957332611084, + "logits/rejected": -2.7787299156188965, + "logps/chosen": -71.62940216064453, + "logps/rejected": -896.1521606445312, + "loss": 0.0941, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.027392487972974777, + "rewards/margins": 8.484331130981445, + "rewards/rejected": -8.511723518371582, + "step": 6890 + }, + { + "epoch": 0.41, + "learning_rate": 4.979009343187073e-06, + "logits/chosen": -2.856396436691284, + "logits/rejected": -2.7827558517456055, + "logps/chosen": -85.42424011230469, + "logps/rejected": -938.9188232421875, + "loss": 0.0209, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1388898342847824, + "rewards/margins": 8.803838729858398, + "rewards/rejected": -8.942728996276855, + "step": 6900 + }, + { + "epoch": 0.41, + "learning_rate": 4.978784442242975e-06, + "logits/chosen": -2.921147108078003, + "logits/rejected": -2.7926275730133057, + "logps/chosen": -82.68589782714844, + "logps/rejected": -834.5104370117188, + "loss": 0.0204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08838231861591339, + "rewards/margins": 7.8075995445251465, + "rewards/rejected": -7.895982265472412, + "step": 6910 + }, + { + "epoch": 0.41, + "learning_rate": 4.978558348012078e-06, + "logits/chosen": -2.863276958465576, + "logits/rejected": -2.7734603881835938, + "logps/chosen": -89.18284606933594, + "logps/rejected": -925.59375, + "loss": 0.0164, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13697704672813416, + "rewards/margins": 8.678423881530762, + "rewards/rejected": -8.815401077270508, + "step": 6920 + }, + { + "epoch": 0.41, + "learning_rate": 4.9783310606032245e-06, + "logits/chosen": -2.8962130546569824, + "logits/rejected": -2.789273738861084, + "logps/chosen": -85.81217193603516, + "logps/rejected": -1000.1572265625, + "loss": 0.015, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14896582067012787, + "rewards/margins": 9.401949882507324, + "rewards/rejected": -9.55091667175293, + "step": 6930 + }, + { + "epoch": 0.41, + "learning_rate": 4.9781025801258296e-06, + "logits/chosen": -2.8681960105895996, + "logits/rejected": -2.7342095375061035, + "logps/chosen": -163.50167846679688, + "logps/rejected": -1164.833740234375, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9359902143478394, + "rewards/margins": 10.260385513305664, + "rewards/rejected": -11.196374893188477, + "step": 6940 + }, + { + "epoch": 0.41, + "learning_rate": 4.977872906689885e-06, + "logits/chosen": -2.8693671226501465, + "logits/rejected": -2.741170883178711, + "logps/chosen": -121.37178802490234, + "logps/rejected": -1064.5318603515625, + "loss": 0.031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4895530343055725, + "rewards/margins": 9.711076736450195, + "rewards/rejected": -10.200628280639648, + "step": 6950 + }, + { + "epoch": 0.42, + "learning_rate": 4.977642040405954e-06, + "logits/chosen": -2.889638662338257, + "logits/rejected": -2.753681182861328, + "logps/chosen": -135.38697814941406, + "logps/rejected": -1037.3392333984375, + "loss": 0.0124, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5584534406661987, + "rewards/margins": 9.366005897521973, + "rewards/rejected": -9.924459457397461, + "step": 6960 + }, + { + "epoch": 0.42, + "learning_rate": 4.977409981385176e-06, + "logits/chosen": -2.8849949836730957, + "logits/rejected": -2.755640745162964, + "logps/chosen": -110.10563659667969, + "logps/rejected": -1107.690185546875, + "loss": 0.0106, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.30954140424728394, + "rewards/margins": 10.323659896850586, + "rewards/rejected": -10.63320255279541, + "step": 6970 + }, + { + "epoch": 0.42, + "learning_rate": 4.977176729739265e-06, + "logits/chosen": -2.8463454246520996, + "logits/rejected": -2.7512025833129883, + "logps/chosen": -103.33365631103516, + "logps/rejected": -1031.782958984375, + "loss": 0.0287, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3685451149940491, + "rewards/margins": 9.511758804321289, + "rewards/rejected": -9.880303382873535, + "step": 6980 + }, + { + "epoch": 0.42, + "learning_rate": 4.976942285580507e-06, + "logits/chosen": -2.8573098182678223, + "logits/rejected": -2.7461867332458496, + "logps/chosen": -91.82601165771484, + "logps/rejected": -938.0389404296875, + "loss": 0.0205, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1654505431652069, + "rewards/margins": 8.782587051391602, + "rewards/rejected": -8.948038101196289, + "step": 6990 + }, + { + "epoch": 0.42, + "learning_rate": 4.976706649021765e-06, + "logits/chosen": -2.8592872619628906, + "logits/rejected": -2.7711892127990723, + "logps/chosen": -93.58506774902344, + "logps/rejected": -946.8355712890625, + "loss": 0.0241, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17853382229804993, + "rewards/margins": 8.847173690795898, + "rewards/rejected": -9.025707244873047, + "step": 7000 + }, + { + "epoch": 0.42, + "eval_logits/chosen": -2.8327739238739014, + "eval_logits/rejected": -2.8312039375305176, + "eval_logps/chosen": -143.5124053955078, + "eval_logps/rejected": -856.100830078125, + "eval_loss": 0.01010478101670742, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": -0.7643795609474182, + "eval_rewards/margins": 7.341146469116211, + "eval_rewards/rejected": -8.105525970458984, + "eval_runtime": 4.3359, + "eval_samples_per_second": 1.153, + "eval_steps_per_second": 0.231, + "step": 7000 + }, + { + "epoch": 0.42, + "learning_rate": 4.976469820176474e-06, + "logits/chosen": -2.8643553256988525, + "logits/rejected": -2.7381560802459717, + "logps/chosen": -90.50735473632812, + "logps/rejected": -1054.6556396484375, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1895018070936203, + "rewards/margins": 9.931281089782715, + "rewards/rejected": -10.120782852172852, + "step": 7010 + }, + { + "epoch": 0.42, + "learning_rate": 4.976231799158643e-06, + "logits/chosen": -2.8790342807769775, + "logits/rejected": -2.77075457572937, + "logps/chosen": -85.13561248779297, + "logps/rejected": -1037.7835693359375, + "loss": 0.0136, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16010215878486633, + "rewards/margins": 9.77814769744873, + "rewards/rejected": -9.938249588012695, + "step": 7020 + }, + { + "epoch": 0.42, + "learning_rate": 4.975992586082855e-06, + "logits/chosen": -2.8559350967407227, + "logits/rejected": -2.769242525100708, + "logps/chosen": -96.19359588623047, + "logps/rejected": -1098.648193359375, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2663394510746002, + "rewards/margins": 10.257055282592773, + "rewards/rejected": -10.523394584655762, + "step": 7030 + }, + { + "epoch": 0.42, + "learning_rate": 4.9757521810642686e-06, + "logits/chosen": -2.835324287414551, + "logits/rejected": -2.726048231124878, + "logps/chosen": -93.47999572753906, + "logps/rejected": -1155.463623046875, + "loss": 0.0142, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2627992630004883, + "rewards/margins": 10.850542068481445, + "rewards/rejected": -11.113341331481934, + "step": 7040 + }, + { + "epoch": 0.42, + "learning_rate": 4.975510584218614e-06, + "logits/chosen": -2.840242385864258, + "logits/rejected": -2.742239475250244, + "logps/chosen": -76.20268249511719, + "logps/rejected": -1106.3179931640625, + "loss": 0.0149, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07460145652294159, + "rewards/margins": 10.563481330871582, + "rewards/rejected": -10.638082504272461, + "step": 7050 + }, + { + "epoch": 0.42, + "learning_rate": 4.975267795662194e-06, + "logits/chosen": -2.8432862758636475, + "logits/rejected": -2.7316603660583496, + "logps/chosen": -78.16621398925781, + "logps/rejected": -1137.0135498046875, + "loss": 0.0093, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.07767780870199203, + "rewards/margins": 10.845499992370605, + "rewards/rejected": -10.923177719116211, + "step": 7060 + }, + { + "epoch": 0.42, + "learning_rate": 4.97502381551189e-06, + "logits/chosen": -2.8603243827819824, + "logits/rejected": -2.7652618885040283, + "logps/chosen": -75.2294692993164, + "logps/rejected": -987.4402465820312, + "loss": 0.0222, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08178217709064484, + "rewards/margins": 9.349815368652344, + "rewards/rejected": -9.431597709655762, + "step": 7070 + }, + { + "epoch": 0.42, + "learning_rate": 4.974778643885153e-06, + "logits/chosen": -2.8691468238830566, + "logits/rejected": -2.773514986038208, + "logps/chosen": -117.20970153808594, + "logps/rejected": -1008.8447265625, + "loss": 0.0183, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5010514259338379, + "rewards/margins": 9.14426326751709, + "rewards/rejected": -9.645315170288086, + "step": 7080 + }, + { + "epoch": 0.42, + "learning_rate": 4.974532280900007e-06, + "logits/chosen": -2.821929454803467, + "logits/rejected": -2.7213692665100098, + "logps/chosen": -124.25654602050781, + "logps/rejected": -1122.69287109375, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.578635573387146, + "rewards/margins": 10.209429740905762, + "rewards/rejected": -10.788065910339355, + "step": 7090 + }, + { + "epoch": 0.42, + "learning_rate": 4.974284726675054e-06, + "logits/chosen": -2.9003052711486816, + "logits/rejected": -2.8084187507629395, + "logps/chosen": -77.3375015258789, + "logps/rejected": -1141.782958984375, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11932220309972763, + "rewards/margins": 10.855875015258789, + "rewards/rejected": -10.975197792053223, + "step": 7100 + }, + { + "epoch": 0.42, + "learning_rate": 4.974035981329465e-06, + "logits/chosen": -2.841891288757324, + "logits/rejected": -2.7534396648406982, + "logps/chosen": -75.53832244873047, + "logps/rejected": -1057.7261962890625, + "loss": 0.0242, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.05626421049237251, + "rewards/margins": 10.085647583007812, + "rewards/rejected": -10.141911506652832, + "step": 7110 + }, + { + "epoch": 0.42, + "learning_rate": 4.973786044982985e-06, + "logits/chosen": -2.845216751098633, + "logits/rejected": -2.736093759536743, + "logps/chosen": -84.05213928222656, + "logps/rejected": -1091.44873046875, + "loss": 0.0169, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09095393121242523, + "rewards/margins": 10.38542652130127, + "rewards/rejected": -10.476380348205566, + "step": 7120 + }, + { + "epoch": 0.43, + "learning_rate": 4.973534917755936e-06, + "logits/chosen": -2.8319830894470215, + "logits/rejected": -2.7299766540527344, + "logps/chosen": -156.66751098632812, + "logps/rejected": -1112.763671875, + "loss": 0.0282, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8727127313613892, + "rewards/margins": 9.805094718933105, + "rewards/rejected": -10.67780590057373, + "step": 7130 + }, + { + "epoch": 0.43, + "learning_rate": 4.973282599769207e-06, + "logits/chosen": -2.896338939666748, + "logits/rejected": -2.786273956298828, + "logps/chosen": -93.79672241210938, + "logps/rejected": -944.6258544921875, + "loss": 0.0178, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2209416925907135, + "rewards/margins": 8.78611946105957, + "rewards/rejected": -9.007061004638672, + "step": 7140 + }, + { + "epoch": 0.43, + "learning_rate": 4.973029091144268e-06, + "logits/chosen": -2.891880512237549, + "logits/rejected": -2.7600886821746826, + "logps/chosen": -97.42906188964844, + "logps/rejected": -1095.12744140625, + "loss": 0.0194, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3358263075351715, + "rewards/margins": 10.173721313476562, + "rewards/rejected": -10.509546279907227, + "step": 7150 + }, + { + "epoch": 0.43, + "learning_rate": 4.9727743920031555e-06, + "logits/chosen": -2.898037910461426, + "logits/rejected": -2.7619404792785645, + "logps/chosen": -147.47950744628906, + "logps/rejected": -1117.9964599609375, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8174494504928589, + "rewards/margins": 9.919328689575195, + "rewards/rejected": -10.736778259277344, + "step": 7160 + }, + { + "epoch": 0.43, + "learning_rate": 4.972518502468482e-06, + "logits/chosen": -2.859248638153076, + "logits/rejected": -2.739121437072754, + "logps/chosen": -143.3982696533203, + "logps/rejected": -984.2548828125, + "loss": 0.0226, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7695879340171814, + "rewards/margins": 8.638053894042969, + "rewards/rejected": -9.407641410827637, + "step": 7170 + }, + { + "epoch": 0.43, + "learning_rate": 4.9722614226634325e-06, + "logits/chosen": -2.8654847145080566, + "logits/rejected": -2.7498927116394043, + "logps/chosen": -93.3217544555664, + "logps/rejected": -1114.214599609375, + "loss": 0.0196, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23092298209667206, + "rewards/margins": 10.463264465332031, + "rewards/rejected": -10.694186210632324, + "step": 7180 + }, + { + "epoch": 0.43, + "learning_rate": 4.9720031527117664e-06, + "logits/chosen": -2.8704845905303955, + "logits/rejected": -2.7495694160461426, + "logps/chosen": -99.58483123779297, + "logps/rejected": -977.62255859375, + "loss": 0.0298, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.328530490398407, + "rewards/margins": 9.011679649353027, + "rewards/rejected": -9.340211868286133, + "step": 7190 + }, + { + "epoch": 0.43, + "learning_rate": 4.971743692737814e-06, + "logits/chosen": -2.8816440105438232, + "logits/rejected": -2.7707927227020264, + "logps/chosen": -87.47566223144531, + "logps/rejected": -1157.219482421875, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13638387620449066, + "rewards/margins": 10.978429794311523, + "rewards/rejected": -11.114812850952148, + "step": 7200 + }, + { + "epoch": 0.43, + "learning_rate": 4.97148304286648e-06, + "logits/chosen": -2.8423588275909424, + "logits/rejected": -2.7253060340881348, + "logps/chosen": -76.76896667480469, + "logps/rejected": -1130.9779052734375, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06722725927829742, + "rewards/margins": 10.794873237609863, + "rewards/rejected": -10.862100601196289, + "step": 7210 + }, + { + "epoch": 0.43, + "learning_rate": 4.97122120322324e-06, + "logits/chosen": -2.8720703125, + "logits/rejected": -2.7535879611968994, + "logps/chosen": -71.94892120361328, + "logps/rejected": -1096.115234375, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.060562945902347565, + "rewards/margins": 10.472827911376953, + "rewards/rejected": -10.533391952514648, + "step": 7220 + }, + { + "epoch": 0.43, + "learning_rate": 4.970958173934144e-06, + "logits/chosen": -2.835014820098877, + "logits/rejected": -2.737877368927002, + "logps/chosen": -98.33415985107422, + "logps/rejected": -1032.6558837890625, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2931233048439026, + "rewards/margins": 9.595638275146484, + "rewards/rejected": -9.888761520385742, + "step": 7230 + }, + { + "epoch": 0.43, + "learning_rate": 4.970693955125814e-06, + "logits/chosen": -2.8705711364746094, + "logits/rejected": -2.750333547592163, + "logps/chosen": -138.98428344726562, + "logps/rejected": -1068.3863525390625, + "loss": 0.0283, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.670947790145874, + "rewards/margins": 9.577275276184082, + "rewards/rejected": -10.248222351074219, + "step": 7240 + }, + { + "epoch": 0.43, + "learning_rate": 4.970428546925447e-06, + "logits/chosen": -2.8794608116149902, + "logits/rejected": -2.7424516677856445, + "logps/chosen": -165.1876983642578, + "logps/rejected": -1101.1488037109375, + "loss": 0.0207, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9416779279708862, + "rewards/margins": 9.637868881225586, + "rewards/rejected": -10.579545974731445, + "step": 7250 + }, + { + "epoch": 0.43, + "learning_rate": 4.970161949460808e-06, + "logits/chosen": -2.8693835735321045, + "logits/rejected": -2.7725603580474854, + "logps/chosen": -185.16073608398438, + "logps/rejected": -1149.739501953125, + "loss": 0.0138, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0858557224273682, + "rewards/margins": 9.966553688049316, + "rewards/rejected": -11.052411079406738, + "step": 7260 + }, + { + "epoch": 0.43, + "learning_rate": 4.9698941628602385e-06, + "logits/chosen": -2.8429529666900635, + "logits/rejected": -2.7408430576324463, + "logps/chosen": -114.07002258300781, + "logps/rejected": -1041.8470458984375, + "loss": 0.0185, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3732522130012512, + "rewards/margins": 9.605559349060059, + "rewards/rejected": -9.978812217712402, + "step": 7270 + }, + { + "epoch": 0.43, + "learning_rate": 4.96962518725265e-06, + "logits/chosen": -2.813436508178711, + "logits/rejected": -2.724668502807617, + "logps/chosen": -86.05107879638672, + "logps/rejected": -1040.678466796875, + "loss": 0.0248, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20645394921302795, + "rewards/margins": 9.759066581726074, + "rewards/rejected": -9.965520858764648, + "step": 7280 + }, + { + "epoch": 0.43, + "learning_rate": 4.969355022767529e-06, + "logits/chosen": -2.886049270629883, + "logits/rejected": -2.7359585762023926, + "logps/chosen": -89.36450958251953, + "logps/rejected": -1042.4400634765625, + "loss": 0.0243, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19842109084129333, + "rewards/margins": 9.797769546508789, + "rewards/rejected": -9.996190071105957, + "step": 7290 + }, + { + "epoch": 0.44, + "learning_rate": 4.9690836695349305e-06, + "logits/chosen": -2.897089719772339, + "logits/rejected": -2.78271484375, + "logps/chosen": -113.69737243652344, + "logps/rejected": -1166.4139404296875, + "loss": 0.0157, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5067522525787354, + "rewards/margins": 10.704755783081055, + "rewards/rejected": -11.211506843566895, + "step": 7300 + }, + { + "epoch": 0.44, + "learning_rate": 4.968811127685485e-06, + "logits/chosen": -2.787273406982422, + "logits/rejected": -2.696653127670288, + "logps/chosen": -228.659423828125, + "logps/rejected": -1234.12646484375, + "loss": 0.0225, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.587838888168335, + "rewards/margins": 10.30961799621582, + "rewards/rejected": -11.89745807647705, + "step": 7310 + }, + { + "epoch": 0.44, + "learning_rate": 4.968537397350395e-06, + "logits/chosen": -2.8704044818878174, + "logits/rejected": -2.752488613128662, + "logps/chosen": -214.6350555419922, + "logps/rejected": -1180.1656494140625, + "loss": 0.0155, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4706103801727295, + "rewards/margins": 9.892946243286133, + "rewards/rejected": -11.363557815551758, + "step": 7320 + }, + { + "epoch": 0.44, + "learning_rate": 4.968262478661432e-06, + "logits/chosen": -2.870546817779541, + "logits/rejected": -2.7578635215759277, + "logps/chosen": -108.44283294677734, + "logps/rejected": -1193.89697265625, + "loss": 0.0119, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.35628023743629456, + "rewards/margins": 11.13879680633545, + "rewards/rejected": -11.495077133178711, + "step": 7330 + }, + { + "epoch": 0.44, + "learning_rate": 4.967986371750944e-06, + "logits/chosen": -2.844867706298828, + "logits/rejected": -2.696237087249756, + "logps/chosen": -123.17716979980469, + "logps/rejected": -1119.7215576171875, + "loss": 0.039, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.48564401268959045, + "rewards/margins": 10.257049560546875, + "rewards/rejected": -10.742693901062012, + "step": 7340 + }, + { + "epoch": 0.44, + "learning_rate": 4.967709076751848e-06, + "logits/chosen": -2.882688045501709, + "logits/rejected": -2.7566308975219727, + "logps/chosen": -92.1227798461914, + "logps/rejected": -1211.223388671875, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2409590780735016, + "rewards/margins": 11.42439079284668, + "rewards/rejected": -11.665349960327148, + "step": 7350 + }, + { + "epoch": 0.44, + "learning_rate": 4.967430593797634e-06, + "logits/chosen": -2.8340015411376953, + "logits/rejected": -2.714090347290039, + "logps/chosen": -109.56756591796875, + "logps/rejected": -1197.652587890625, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.45134297013282776, + "rewards/margins": 11.076066970825195, + "rewards/rejected": -11.527408599853516, + "step": 7360 + }, + { + "epoch": 0.44, + "learning_rate": 4.967150923022364e-06, + "logits/chosen": -2.8666272163391113, + "logits/rejected": -2.769554376602173, + "logps/chosen": -109.04451751708984, + "logps/rejected": -1168.115478515625, + "loss": 0.0179, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34332799911499023, + "rewards/margins": 10.890738487243652, + "rewards/rejected": -11.234065055847168, + "step": 7370 + }, + { + "epoch": 0.44, + "learning_rate": 4.96687006456067e-06, + "logits/chosen": -2.8514552116394043, + "logits/rejected": -2.740943431854248, + "logps/chosen": -105.9572982788086, + "logps/rejected": -1064.7227783203125, + "loss": 0.0159, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3892866373062134, + "rewards/margins": 9.819215774536133, + "rewards/rejected": -10.208502769470215, + "step": 7380 + }, + { + "epoch": 0.44, + "learning_rate": 4.96658801854776e-06, + "logits/chosen": -2.8669261932373047, + "logits/rejected": -2.7315690517425537, + "logps/chosen": -133.58096313476562, + "logps/rejected": -1056.50048828125, + "loss": 0.0172, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6455832719802856, + "rewards/margins": 9.473417282104492, + "rewards/rejected": -10.119001388549805, + "step": 7390 + }, + { + "epoch": 0.44, + "learning_rate": 4.966304785119408e-06, + "logits/chosen": -2.8820061683654785, + "logits/rejected": -2.7646563053131104, + "logps/chosen": -81.69261169433594, + "logps/rejected": -1128.9627685546875, + "loss": 0.0159, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09454204142093658, + "rewards/margins": 10.75538158416748, + "rewards/rejected": -10.849923133850098, + "step": 7400 + }, + { + "epoch": 0.44, + "learning_rate": 4.966020364411964e-06, + "logits/chosen": -2.857898235321045, + "logits/rejected": -2.7608182430267334, + "logps/chosen": -86.08817291259766, + "logps/rejected": -1077.73095703125, + "loss": 0.014, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12193907797336578, + "rewards/margins": 10.208215713500977, + "rewards/rejected": -10.330155372619629, + "step": 7410 + }, + { + "epoch": 0.44, + "learning_rate": 4.965734756562349e-06, + "logits/chosen": -2.8387887477874756, + "logits/rejected": -2.742058277130127, + "logps/chosen": -94.74108123779297, + "logps/rejected": -990.2894287109375, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24457204341888428, + "rewards/margins": 9.21645450592041, + "rewards/rejected": -9.461027145385742, + "step": 7420 + }, + { + "epoch": 0.44, + "learning_rate": 4.965447961708052e-06, + "logits/chosen": -2.85334849357605, + "logits/rejected": -2.7705202102661133, + "logps/chosen": -79.14310455322266, + "logps/rejected": -1071.206298828125, + "loss": 0.0184, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09707560390233994, + "rewards/margins": 10.164944648742676, + "rewards/rejected": -10.262022018432617, + "step": 7430 + }, + { + "epoch": 0.44, + "learning_rate": 4.965159979987139e-06, + "logits/chosen": -2.8670904636383057, + "logits/rejected": -2.747734546661377, + "logps/chosen": -86.01854705810547, + "logps/rejected": -1181.15625, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.12674593925476074, + "rewards/margins": 11.229013442993164, + "rewards/rejected": -11.35576057434082, + "step": 7440 + }, + { + "epoch": 0.44, + "learning_rate": 4.964870811538243e-06, + "logits/chosen": -2.8489010334014893, + "logits/rejected": -2.700205087661743, + "logps/chosen": -91.22477722167969, + "logps/rejected": -1009.3116455078125, + "loss": 0.0227, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23941679298877716, + "rewards/margins": 9.408393859863281, + "rewards/rejected": -9.647809982299805, + "step": 7450 + }, + { + "epoch": 0.44, + "learning_rate": 4.9645804565005695e-06, + "logits/chosen": -2.8461015224456787, + "logits/rejected": -2.7385714054107666, + "logps/chosen": -104.34039306640625, + "logps/rejected": -1109.421630859375, + "loss": 0.0293, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3244141936302185, + "rewards/margins": 10.338183403015137, + "rewards/rejected": -10.662598609924316, + "step": 7460 + }, + { + "epoch": 0.45, + "learning_rate": 4.964288915013895e-06, + "logits/chosen": -2.8828015327453613, + "logits/rejected": -2.750121831893921, + "logps/chosen": -96.09165954589844, + "logps/rejected": -1111.699462890625, + "loss": 0.0128, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21668855845928192, + "rewards/margins": 10.464941024780273, + "rewards/rejected": -10.681631088256836, + "step": 7470 + }, + { + "epoch": 0.45, + "learning_rate": 4.963996187218569e-06, + "logits/chosen": -2.866685628890991, + "logits/rejected": -2.7260117530822754, + "logps/chosen": -97.98733520507812, + "logps/rejected": -1121.356689453125, + "loss": 0.0129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.26563894748687744, + "rewards/margins": 10.500628471374512, + "rewards/rejected": -10.766267776489258, + "step": 7480 + }, + { + "epoch": 0.45, + "learning_rate": 4.96370227325551e-06, + "logits/chosen": -2.8635756969451904, + "logits/rejected": -2.769043207168579, + "logps/chosen": -112.70074462890625, + "logps/rejected": -1133.9385986328125, + "loss": 0.0208, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4026443362236023, + "rewards/margins": 10.49830436706543, + "rewards/rejected": -10.900947570800781, + "step": 7490 + }, + { + "epoch": 0.45, + "learning_rate": 4.963407173266208e-06, + "logits/chosen": -2.864558219909668, + "logits/rejected": -2.7172749042510986, + "logps/chosen": -80.8797607421875, + "logps/rejected": -1045.7236328125, + "loss": 0.0101, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13710841536521912, + "rewards/margins": 9.87916374206543, + "rewards/rejected": -10.016271591186523, + "step": 7500 + }, + { + "epoch": 0.45, + "learning_rate": 4.963110887392724e-06, + "logits/chosen": -2.822739601135254, + "logits/rejected": -2.7317419052124023, + "logps/chosen": -88.72630310058594, + "logps/rejected": -1134.6961669921875, + "loss": 0.0237, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16312776505947113, + "rewards/margins": 10.737232208251953, + "rewards/rejected": -10.900359153747559, + "step": 7510 + }, + { + "epoch": 0.45, + "learning_rate": 4.96281341577769e-06, + "logits/chosen": -2.8802976608276367, + "logits/rejected": -2.764813184738159, + "logps/chosen": -92.21775817871094, + "logps/rejected": -1010.9537963867188, + "loss": 0.0233, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2652058005332947, + "rewards/margins": 9.412860870361328, + "rewards/rejected": -9.678067207336426, + "step": 7520 + }, + { + "epoch": 0.45, + "learning_rate": 4.962514758564309e-06, + "logits/chosen": -2.866222620010376, + "logits/rejected": -2.74830961227417, + "logps/chosen": -108.38832092285156, + "logps/rejected": -1119.225830078125, + "loss": 0.0144, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3490092158317566, + "rewards/margins": 10.378432273864746, + "rewards/rejected": -10.727441787719727, + "step": 7530 + }, + { + "epoch": 0.45, + "learning_rate": 4.962214915896355e-06, + "logits/chosen": -2.885638475418091, + "logits/rejected": -2.7404894828796387, + "logps/chosen": -107.24710845947266, + "logps/rejected": -1055.85400390625, + "loss": 0.0178, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34834590554237366, + "rewards/margins": 9.78091049194336, + "rewards/rejected": -10.129257202148438, + "step": 7540 + }, + { + "epoch": 0.45, + "learning_rate": 4.961913887918172e-06, + "logits/chosen": -2.8642513751983643, + "logits/rejected": -2.7389473915100098, + "logps/chosen": -120.53790283203125, + "logps/rejected": -1091.712646484375, + "loss": 0.0338, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4634668827056885, + "rewards/margins": 10.007838249206543, + "rewards/rejected": -10.471305847167969, + "step": 7550 + }, + { + "epoch": 0.45, + "learning_rate": 4.961611674774674e-06, + "logits/chosen": -2.850139617919922, + "logits/rejected": -2.762275218963623, + "logps/chosen": -139.3542938232422, + "logps/rejected": -1019.6751098632812, + "loss": 0.0327, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.7159873247146606, + "rewards/margins": 9.032678604125977, + "rewards/rejected": -9.748663902282715, + "step": 7560 + }, + { + "epoch": 0.45, + "learning_rate": 4.961308276611349e-06, + "logits/chosen": -2.8673133850097656, + "logits/rejected": -2.7662529945373535, + "logps/chosen": -96.63893127441406, + "logps/rejected": -970.5333862304688, + "loss": 0.0167, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.25597402453422546, + "rewards/margins": 9.019479751586914, + "rewards/rejected": -9.275453567504883, + "step": 7570 + }, + { + "epoch": 0.45, + "learning_rate": 4.961003693574249e-06, + "logits/chosen": -2.856274366378784, + "logits/rejected": -2.7449839115142822, + "logps/chosen": -85.91889953613281, + "logps/rejected": -1081.9737548828125, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.13909956812858582, + "rewards/margins": 10.228979110717773, + "rewards/rejected": -10.368078231811523, + "step": 7580 + }, + { + "epoch": 0.45, + "learning_rate": 4.960697925810003e-06, + "logits/chosen": -2.905705213546753, + "logits/rejected": -2.796009063720703, + "logps/chosen": -88.73684692382812, + "logps/rejected": -1077.325927734375, + "loss": 0.0717, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15526483952999115, + "rewards/margins": 10.174434661865234, + "rewards/rejected": -10.329699516296387, + "step": 7590 + }, + { + "epoch": 0.45, + "learning_rate": 4.9603909734658064e-06, + "logits/chosen": -2.8825783729553223, + "logits/rejected": -2.7545928955078125, + "logps/chosen": -71.01826477050781, + "logps/rejected": -1019.9766845703125, + "loss": 0.0761, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.026035482063889503, + "rewards/margins": 9.717533111572266, + "rewards/rejected": -9.743569374084473, + "step": 7600 + }, + { + "epoch": 0.45, + "learning_rate": 4.960082836689427e-06, + "logits/chosen": -2.856741428375244, + "logits/rejected": -2.784402847290039, + "logps/chosen": -94.07939147949219, + "logps/rejected": -954.2833251953125, + "loss": 0.0194, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24868810176849365, + "rewards/margins": 8.858464241027832, + "rewards/rejected": -9.107151985168457, + "step": 7610 + }, + { + "epoch": 0.45, + "learning_rate": 4.9597735156292024e-06, + "logits/chosen": -2.8785061836242676, + "logits/rejected": -2.745577812194824, + "logps/chosen": -244.57431030273438, + "logps/rejected": -1074.615478515625, + "loss": 0.0334, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -1.721673607826233, + "rewards/margins": 8.585847854614258, + "rewards/rejected": -10.30752182006836, + "step": 7620 + }, + { + "epoch": 0.45, + "learning_rate": 4.9594630104340375e-06, + "logits/chosen": -2.8949167728424072, + "logits/rejected": -2.7281010150909424, + "logps/chosen": -180.42234802246094, + "logps/rejected": -1146.9002685546875, + "loss": 0.0227, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0658646821975708, + "rewards/margins": 9.952799797058105, + "rewards/rejected": -11.01866340637207, + "step": 7630 + }, + { + "epoch": 0.46, + "learning_rate": 4.959151321253411e-06, + "logits/chosen": -2.8329901695251465, + "logits/rejected": -2.7279200553894043, + "logps/chosen": -148.80142211914062, + "logps/rejected": -1183.149658203125, + "loss": 0.0171, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8190087080001831, + "rewards/margins": 10.570188522338867, + "rewards/rejected": -11.389196395874023, + "step": 7640 + }, + { + "epoch": 0.46, + "learning_rate": 4.9588384482373695e-06, + "logits/chosen": -2.8683347702026367, + "logits/rejected": -2.744189500808716, + "logps/chosen": -271.8529357910156, + "logps/rejected": -1144.50390625, + "loss": 0.0319, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0886411666870117, + "rewards/margins": 8.91357707977295, + "rewards/rejected": -11.002218246459961, + "step": 7650 + }, + { + "epoch": 0.46, + "learning_rate": 4.958524391536531e-06, + "logits/chosen": -2.8603150844573975, + "logits/rejected": -2.743774175643921, + "logps/chosen": -175.11703491210938, + "logps/rejected": -1225.5732421875, + "loss": 0.0142, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0102224349975586, + "rewards/margins": 10.797281265258789, + "rewards/rejected": -11.807503700256348, + "step": 7660 + }, + { + "epoch": 0.46, + "learning_rate": 4.958209151302082e-06, + "logits/chosen": -2.8482167720794678, + "logits/rejected": -2.73848032951355, + "logps/chosen": -98.37306213378906, + "logps/rejected": -1107.971435546875, + "loss": 0.0206, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2280844748020172, + "rewards/margins": 10.40809440612793, + "rewards/rejected": -10.636178970336914, + "step": 7670 + }, + { + "epoch": 0.46, + "learning_rate": 4.957892727685778e-06, + "logits/chosen": -2.8752856254577637, + "logits/rejected": -2.7673206329345703, + "logps/chosen": -89.06876373291016, + "logps/rejected": -1018.462890625, + "loss": 0.0337, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.21054255962371826, + "rewards/margins": 9.531793594360352, + "rewards/rejected": -9.742334365844727, + "step": 7680 + }, + { + "epoch": 0.46, + "learning_rate": 4.957575120839946e-06, + "logits/chosen": -2.8494160175323486, + "logits/rejected": -2.745891571044922, + "logps/chosen": -105.34858703613281, + "logps/rejected": -1164.8341064453125, + "loss": 0.0162, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.31456804275512695, + "rewards/margins": 10.884554862976074, + "rewards/rejected": -11.19912338256836, + "step": 7690 + }, + { + "epoch": 0.46, + "learning_rate": 4.957256330917483e-06, + "logits/chosen": -2.8442776203155518, + "logits/rejected": -2.7738373279571533, + "logps/chosen": -104.95865631103516, + "logps/rejected": -1132.051513671875, + "loss": 0.0103, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.36031267046928406, + "rewards/margins": 10.515310287475586, + "rewards/rejected": -10.87562370300293, + "step": 7700 + }, + { + "epoch": 0.46, + "learning_rate": 4.956936358071853e-06, + "logits/chosen": -2.8528244495391846, + "logits/rejected": -2.7727527618408203, + "logps/chosen": -152.827880859375, + "logps/rejected": -1152.0010986328125, + "loss": 0.0148, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8183635473251343, + "rewards/margins": 10.259957313537598, + "rewards/rejected": -11.078320503234863, + "step": 7710 + }, + { + "epoch": 0.46, + "learning_rate": 4.956615202457092e-06, + "logits/chosen": -2.8565287590026855, + "logits/rejected": -2.7507007122039795, + "logps/chosen": -158.17413330078125, + "logps/rejected": -1147.621826171875, + "loss": 0.019, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8586198687553406, + "rewards/margins": 10.165218353271484, + "rewards/rejected": -11.023837089538574, + "step": 7720 + }, + { + "epoch": 0.46, + "learning_rate": 4.956292864227804e-06, + "logits/chosen": -2.906071901321411, + "logits/rejected": -2.769066095352173, + "logps/chosen": -123.36418151855469, + "logps/rejected": -1035.3038330078125, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.47634822130203247, + "rewards/margins": 9.43515682220459, + "rewards/rejected": -9.911504745483398, + "step": 7730 + }, + { + "epoch": 0.46, + "learning_rate": 4.955969343539162e-06, + "logits/chosen": -2.84739351272583, + "logits/rejected": -2.7463929653167725, + "logps/chosen": -110.03605651855469, + "logps/rejected": -1022.5135498046875, + "loss": 0.0137, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4245133399963379, + "rewards/margins": 9.340261459350586, + "rewards/rejected": -9.764774322509766, + "step": 7740 + }, + { + "epoch": 0.46, + "learning_rate": 4.95564464054691e-06, + "logits/chosen": -2.853588342666626, + "logits/rejected": -2.761723518371582, + "logps/chosen": -118.99046325683594, + "logps/rejected": -1013.7451171875, + "loss": 0.0252, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4701555371284485, + "rewards/margins": 9.220629692077637, + "rewards/rejected": -9.690786361694336, + "step": 7750 + }, + { + "epoch": 0.46, + "learning_rate": 4.955318755407359e-06, + "logits/chosen": -2.885979175567627, + "logits/rejected": -2.761888027191162, + "logps/chosen": -78.35704803466797, + "logps/rejected": -1142.82861328125, + "loss": 0.0238, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.10020262002944946, + "rewards/margins": 10.894159317016602, + "rewards/rejected": -10.99436092376709, + "step": 7760 + }, + { + "epoch": 0.46, + "learning_rate": 4.954991688277391e-06, + "logits/chosen": -2.847180128097534, + "logits/rejected": -2.777435541152954, + "logps/chosen": -85.9129638671875, + "logps/rejected": -1128.804443359375, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16628585755825043, + "rewards/margins": 10.676268577575684, + "rewards/rejected": -10.842554092407227, + "step": 7770 + }, + { + "epoch": 0.46, + "learning_rate": 4.9546634393144544e-06, + "logits/chosen": -2.9000444412231445, + "logits/rejected": -2.7572436332702637, + "logps/chosen": -114.28288269042969, + "logps/rejected": -1077.417236328125, + "loss": 0.0204, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.41664451360702515, + "rewards/margins": 9.910451889038086, + "rewards/rejected": -10.327095985412598, + "step": 7780 + }, + { + "epoch": 0.46, + "learning_rate": 4.95433400867657e-06, + "logits/chosen": -2.867255210876465, + "logits/rejected": -2.734398365020752, + "logps/chosen": -89.079833984375, + "logps/rejected": -1066.4674072265625, + "loss": 0.0141, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2059973180294037, + "rewards/margins": 10.029940605163574, + "rewards/rejected": -10.235939979553223, + "step": 7790 + }, + { + "epoch": 0.47, + "learning_rate": 4.954003396522325e-06, + "logits/chosen": -2.8369765281677246, + "logits/rejected": -2.730602502822876, + "logps/chosen": -114.98908996582031, + "logps/rejected": -1108.25146484375, + "loss": 0.0355, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4519571363925934, + "rewards/margins": 10.180030822753906, + "rewards/rejected": -10.631988525390625, + "step": 7800 + }, + { + "epoch": 0.47, + "learning_rate": 4.953671603010877e-06, + "logits/chosen": -2.847154378890991, + "logits/rejected": -2.74434232711792, + "logps/chosen": -88.02053833007812, + "logps/rejected": -881.8763427734375, + "loss": 0.0313, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18614815175533295, + "rewards/margins": 8.1901273727417, + "rewards/rejected": -8.376276016235352, + "step": 7810 + }, + { + "epoch": 0.47, + "learning_rate": 4.953338628301949e-06, + "logits/chosen": -2.866659641265869, + "logits/rejected": -2.7351021766662598, + "logps/chosen": -82.4190673828125, + "logps/rejected": -1033.171875, + "loss": 0.0229, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1258893758058548, + "rewards/margins": 9.765812873840332, + "rewards/rejected": -9.891701698303223, + "step": 7820 + }, + { + "epoch": 0.47, + "learning_rate": 4.953004472555838e-06, + "logits/chosen": -2.895505428314209, + "logits/rejected": -2.744224786758423, + "logps/chosen": -112.4826431274414, + "logps/rejected": -1053.8028564453125, + "loss": 0.0253, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.3890414834022522, + "rewards/margins": 9.704627990722656, + "rewards/rejected": -10.093671798706055, + "step": 7830 + }, + { + "epoch": 0.47, + "learning_rate": 4.9526691359334045e-06, + "logits/chosen": -2.827378273010254, + "logits/rejected": -2.7112221717834473, + "logps/chosen": -108.18548583984375, + "logps/rejected": -948.1320190429688, + "loss": 0.0181, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.34837237000465393, + "rewards/margins": 8.696601867675781, + "rewards/rejected": -9.044973373413086, + "step": 7840 + }, + { + "epoch": 0.47, + "learning_rate": 4.95233261859608e-06, + "logits/chosen": -2.8531641960144043, + "logits/rejected": -2.711824893951416, + "logps/chosen": -96.7031478881836, + "logps/rejected": -1101.623291015625, + "loss": 0.0176, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.207549050450325, + "rewards/margins": 10.369526863098145, + "rewards/rejected": -10.57707691192627, + "step": 7850 + }, + { + "epoch": 0.47, + "learning_rate": 4.951994920705865e-06, + "logits/chosen": -2.859160900115967, + "logits/rejected": -2.731637477874756, + "logps/chosen": -80.38975524902344, + "logps/rejected": -1103.4127197265625, + "loss": 0.0132, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14578375220298767, + "rewards/margins": 10.441885948181152, + "rewards/rejected": -10.587671279907227, + "step": 7860 + }, + { + "epoch": 0.47, + "learning_rate": 4.9516560424253265e-06, + "logits/chosen": -2.8581531047821045, + "logits/rejected": -2.7144711017608643, + "logps/chosen": -87.98351287841797, + "logps/rejected": -1151.76708984375, + "loss": 0.0216, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15736448764801025, + "rewards/margins": 10.925568580627441, + "rewards/rejected": -11.08293342590332, + "step": 7870 + }, + { + "epoch": 0.47, + "learning_rate": 4.9513159839176e-06, + "logits/chosen": -2.8735687732696533, + "logits/rejected": -2.7481565475463867, + "logps/chosen": -68.29725646972656, + "logps/rejected": -1069.293212890625, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014291726052761078, + "rewards/margins": 10.245096206665039, + "rewards/rejected": -10.259387969970703, + "step": 7880 + }, + { + "epoch": 0.47, + "learning_rate": 4.95097474534639e-06, + "logits/chosen": -2.8607258796691895, + "logits/rejected": -2.7053020000457764, + "logps/chosen": -83.29029846191406, + "logps/rejected": -1001.1072387695312, + "loss": 0.0257, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.14857155084609985, + "rewards/margins": 9.426419258117676, + "rewards/rejected": -9.574991226196289, + "step": 7890 + }, + { + "epoch": 0.47, + "learning_rate": 4.9506323268759695e-06, + "logits/chosen": -2.858133316040039, + "logits/rejected": -2.697761058807373, + "logps/chosen": -276.944580078125, + "logps/rejected": -1291.6058349609375, + "loss": 0.0252, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0561060905456543, + "rewards/margins": 10.414408683776855, + "rewards/rejected": -12.470515251159668, + "step": 7900 + }, + { + "epoch": 0.47, + "learning_rate": 4.950288728671177e-06, + "logits/chosen": -2.8728082180023193, + "logits/rejected": -2.7197067737579346, + "logps/chosen": -335.365478515625, + "logps/rejected": -1298.236083984375, + "loss": 0.0192, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6060290336608887, + "rewards/margins": 9.936685562133789, + "rewards/rejected": -12.542715072631836, + "step": 7910 + }, + { + "epoch": 0.47, + "learning_rate": 4.949943950897422e-06, + "logits/chosen": -2.8608946800231934, + "logits/rejected": -2.7388417720794678, + "logps/chosen": -168.35797119140625, + "logps/rejected": -1079.89013671875, + "loss": 0.0238, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0236942768096924, + "rewards/margins": 9.330083847045898, + "rewards/rejected": -10.353776931762695, + "step": 7920 + }, + { + "epoch": 0.47, + "learning_rate": 4.949597993720681e-06, + "logits/chosen": -2.8951010704040527, + "logits/rejected": -2.7527873516082764, + "logps/chosen": -129.69821166992188, + "logps/rejected": -1144.5748291015625, + "loss": 0.0174, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.592028021812439, + "rewards/margins": 10.405699729919434, + "rewards/rejected": -10.997730255126953, + "step": 7930 + }, + { + "epoch": 0.47, + "learning_rate": 4.949250857307497e-06, + "logits/chosen": -2.852398633956909, + "logits/rejected": -2.7509567737579346, + "logps/chosen": -93.40636444091797, + "logps/rejected": -1134.188232421875, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.29469484090805054, + "rewards/margins": 10.618700981140137, + "rewards/rejected": -10.913395881652832, + "step": 7940 + }, + { + "epoch": 0.47, + "learning_rate": 4.94890254182498e-06, + "logits/chosen": -2.8499627113342285, + "logits/rejected": -2.678219795227051, + "logps/chosen": -112.3321762084961, + "logps/rejected": -1103.70751953125, + "loss": 0.0447, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.46466654539108276, + "rewards/margins": 10.136014938354492, + "rewards/rejected": -10.60068130493164, + "step": 7950 + }, + { + "epoch": 0.47, + "learning_rate": 4.948553047440813e-06, + "logits/chosen": -2.849151372909546, + "logits/rejected": -2.7440409660339355, + "logps/chosen": -119.38970947265625, + "logps/rejected": -1055.8221435546875, + "loss": 0.0131, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5210192799568176, + "rewards/margins": 9.591653823852539, + "rewards/rejected": -10.11267375946045, + "step": 7960 + }, + { + "epoch": 0.48, + "learning_rate": 4.948202374323239e-06, + "logits/chosen": -2.8635525703430176, + "logits/rejected": -2.732304334640503, + "logps/chosen": -91.2094497680664, + "logps/rejected": -1072.06884765625, + "loss": 0.0299, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.20807886123657227, + "rewards/margins": 10.069757461547852, + "rewards/rejected": -10.277835845947266, + "step": 7970 + }, + { + "epoch": 0.48, + "learning_rate": 4.947850522641072e-06, + "logits/chosen": -2.886035442352295, + "logits/rejected": -2.720885992050171, + "logps/chosen": -117.11384582519531, + "logps/rejected": -1117.827392578125, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4263271689414978, + "rewards/margins": 10.307860374450684, + "rewards/rejected": -10.734188079833984, + "step": 7980 + }, + { + "epoch": 0.48, + "learning_rate": 4.9474974925636965e-06, + "logits/chosen": -2.850391387939453, + "logits/rejected": -2.750633716583252, + "logps/chosen": -106.6097412109375, + "logps/rejected": -1051.490966796875, + "loss": 0.0239, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3243061900138855, + "rewards/margins": 9.741000175476074, + "rewards/rejected": -10.065306663513184, + "step": 7990 + }, + { + "epoch": 0.48, + "learning_rate": 4.9471432842610584e-06, + "logits/chosen": -2.8470280170440674, + "logits/rejected": -2.734238386154175, + "logps/chosen": -170.07675170898438, + "logps/rejected": -1208.8990478515625, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.004033088684082, + "rewards/margins": 10.644299507141113, + "rewards/rejected": -11.648332595825195, + "step": 8000 + }, + { + "epoch": 0.48, + "learning_rate": 4.946787897903674e-06, + "logits/chosen": -2.85801100730896, + "logits/rejected": -2.720820188522339, + "logps/chosen": -210.8000946044922, + "logps/rejected": -1096.583740234375, + "loss": 0.1055, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -1.4212865829467773, + "rewards/margins": 9.100495338439941, + "rewards/rejected": -10.521780967712402, + "step": 8010 + }, + { + "epoch": 0.48, + "learning_rate": 4.946431333662628e-06, + "logits/chosen": -2.8601760864257812, + "logits/rejected": -2.713193655014038, + "logps/chosen": -142.53521728515625, + "logps/rejected": -1126.9625244140625, + "loss": 0.0127, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7736454606056213, + "rewards/margins": 10.058855056762695, + "rewards/rejected": -10.832502365112305, + "step": 8020 + }, + { + "epoch": 0.48, + "learning_rate": 4.946073591709568e-06, + "logits/chosen": -2.8151772022247314, + "logits/rejected": -2.652881622314453, + "logps/chosen": -88.28627014160156, + "logps/rejected": -1120.7310791015625, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.23520879447460175, + "rewards/margins": 10.51801586151123, + "rewards/rejected": -10.753225326538086, + "step": 8030 + }, + { + "epoch": 0.48, + "learning_rate": 4.945714672216713e-06, + "logits/chosen": -2.8835232257843018, + "logits/rejected": -2.7161080837249756, + "logps/chosen": -81.79778289794922, + "logps/rejected": -1062.3665771484375, + "loss": 0.0146, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1296870857477188, + "rewards/margins": 10.045610427856445, + "rewards/rejected": -10.175298690795898, + "step": 8040 + }, + { + "epoch": 0.48, + "learning_rate": 4.945354575356846e-06, + "logits/chosen": -2.8433938026428223, + "logits/rejected": -2.687413215637207, + "logps/chosen": -91.32014465332031, + "logps/rejected": -1105.3138427734375, + "loss": 0.0173, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24432781338691711, + "rewards/margins": 10.36766242980957, + "rewards/rejected": -10.611989974975586, + "step": 8050 + }, + { + "epoch": 0.48, + "learning_rate": 4.944993301303317e-06, + "logits/chosen": -2.8511416912078857, + "logits/rejected": -2.6919758319854736, + "logps/chosen": -92.84443664550781, + "logps/rejected": -1059.7767333984375, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2437063753604889, + "rewards/margins": 9.911623001098633, + "rewards/rejected": -10.155330657958984, + "step": 8060 + }, + { + "epoch": 0.48, + "learning_rate": 4.944630850230045e-06, + "logits/chosen": -2.82525372505188, + "logits/rejected": -2.6740849018096924, + "logps/chosen": -118.892333984375, + "logps/rejected": -1123.2099609375, + "loss": 0.0172, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.45510831475257874, + "rewards/margins": 10.316621780395508, + "rewards/rejected": -10.771730422973633, + "step": 8070 + }, + { + "epoch": 0.48, + "learning_rate": 4.944267222311511e-06, + "logits/chosen": -2.859088897705078, + "logits/rejected": -2.7007062435150146, + "logps/chosen": -84.19384002685547, + "logps/rejected": -1060.2767333984375, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1649484932422638, + "rewards/margins": 9.985294342041016, + "rewards/rejected": -10.150242805480957, + "step": 8080 + }, + { + "epoch": 0.48, + "learning_rate": 4.943902417722769e-06, + "logits/chosen": -2.8636093139648438, + "logits/rejected": -2.679323434829712, + "logps/chosen": -91.9188232421875, + "logps/rejected": -1062.393310546875, + "loss": 0.0148, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17007996141910553, + "rewards/margins": 10.012198448181152, + "rewards/rejected": -10.182278633117676, + "step": 8090 + }, + { + "epoch": 0.48, + "learning_rate": 4.9435364366394334e-06, + "logits/chosen": -2.873657464981079, + "logits/rejected": -2.6570382118225098, + "logps/chosen": -114.43702697753906, + "logps/rejected": -1020.9920654296875, + "loss": 0.0162, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.475957453250885, + "rewards/margins": 9.295429229736328, + "rewards/rejected": -9.77138614654541, + "step": 8100 + }, + { + "epoch": 0.48, + "learning_rate": 4.943169279237688e-06, + "logits/chosen": -2.8418102264404297, + "logits/rejected": -2.6355204582214355, + "logps/chosen": -84.60166931152344, + "logps/rejected": -1127.6351318359375, + "loss": 0.0116, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1583913415670395, + "rewards/margins": 10.667875289916992, + "rewards/rejected": -10.82626724243164, + "step": 8110 + }, + { + "epoch": 0.48, + "learning_rate": 4.942800945694284e-06, + "logits/chosen": -2.8554844856262207, + "logits/rejected": -2.6470537185668945, + "logps/chosen": -91.20024108886719, + "logps/rejected": -1092.2275390625, + "loss": 0.0156, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24452921748161316, + "rewards/margins": 10.23759651184082, + "rewards/rejected": -10.482126235961914, + "step": 8120 + }, + { + "epoch": 0.48, + "learning_rate": 4.942431436186536e-06, + "logits/chosen": -2.863220691680908, + "logits/rejected": -2.697500705718994, + "logps/chosen": -83.83858489990234, + "logps/rejected": -1113.2825927734375, + "loss": 0.0152, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1747596561908722, + "rewards/margins": 10.517755508422852, + "rewards/rejected": -10.692514419555664, + "step": 8130 + }, + { + "epoch": 0.49, + "learning_rate": 4.9420607508923244e-06, + "logits/chosen": -2.7809062004089355, + "logits/rejected": -2.6195321083068848, + "logps/chosen": -76.68037414550781, + "logps/rejected": -1048.7308349609375, + "loss": 0.0121, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15362104773521423, + "rewards/margins": 9.891016960144043, + "rewards/rejected": -10.044637680053711, + "step": 8140 + }, + { + "epoch": 0.49, + "learning_rate": 4.9416888899901e-06, + "logits/chosen": -2.845475435256958, + "logits/rejected": -2.63460111618042, + "logps/chosen": -82.2834701538086, + "logps/rejected": -1075.0858154296875, + "loss": 0.0169, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.06480856239795685, + "rewards/margins": 10.251663208007812, + "rewards/rejected": -10.316472053527832, + "step": 8150 + }, + { + "epoch": 0.49, + "learning_rate": 4.941315853658873e-06, + "logits/chosen": -2.816342353820801, + "logits/rejected": -2.6254959106445312, + "logps/chosen": -82.4791488647461, + "logps/rejected": -974.3390502929688, + "loss": 0.0153, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.08232900500297546, + "rewards/margins": 9.218530654907227, + "rewards/rejected": -9.300857543945312, + "step": 8160 + }, + { + "epoch": 0.49, + "learning_rate": 4.9409416420782265e-06, + "logits/chosen": -2.7849819660186768, + "logits/rejected": -2.571629047393799, + "logps/chosen": -84.22157287597656, + "logps/rejected": -1090.9063720703125, + "loss": 0.0151, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.09699822217226028, + "rewards/margins": 10.36839485168457, + "rewards/rejected": -10.465392112731934, + "step": 8170 + }, + { + "epoch": 0.49, + "learning_rate": 4.940566255428305e-06, + "logits/chosen": -2.7972047328948975, + "logits/rejected": -2.5499560832977295, + "logps/chosen": -83.21194458007812, + "logps/rejected": -1071.7213134765625, + "loss": 0.0108, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.11952732503414154, + "rewards/margins": 10.142314910888672, + "rewards/rejected": -10.261842727661133, + "step": 8180 + }, + { + "epoch": 0.49, + "learning_rate": 4.940189693889819e-06, + "logits/chosen": -2.7937278747558594, + "logits/rejected": -2.5996718406677246, + "logps/chosen": -98.93553161621094, + "logps/rejected": -1090.946044921875, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2619193494319916, + "rewards/margins": 10.198055267333984, + "rewards/rejected": -10.45997428894043, + "step": 8190 + }, + { + "epoch": 0.49, + "learning_rate": 4.939811957644045e-06, + "logits/chosen": -2.8066911697387695, + "logits/rejected": -2.5849921703338623, + "logps/chosen": -96.25444793701172, + "logps/rejected": -1220.16845703125, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.27151787281036377, + "rewards/margins": 11.469127655029297, + "rewards/rejected": -11.740647315979004, + "step": 8200 + }, + { + "epoch": 0.49, + "learning_rate": 4.939433046872825e-06, + "logits/chosen": -2.7527008056640625, + "logits/rejected": -2.5438504219055176, + "logps/chosen": -102.46073150634766, + "logps/rejected": -1182.42578125, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3560205101966858, + "rewards/margins": 11.020380020141602, + "rewards/rejected": -11.376399040222168, + "step": 8210 + }, + { + "epoch": 0.49, + "learning_rate": 4.939052961758569e-06, + "logits/chosen": -2.791140556335449, + "logits/rejected": -2.5815985202789307, + "logps/chosen": -95.83058166503906, + "logps/rejected": -984.6638793945312, + "loss": 0.0281, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2383459508419037, + "rewards/margins": 9.159008026123047, + "rewards/rejected": -9.397353172302246, + "step": 8220 + }, + { + "epoch": 0.49, + "learning_rate": 4.938671702484247e-06, + "logits/chosen": -2.8012568950653076, + "logits/rejected": -2.5865259170532227, + "logps/chosen": -98.31891632080078, + "logps/rejected": -1006.3351440429688, + "loss": 0.0158, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2825906574726105, + "rewards/margins": 9.347806930541992, + "rewards/rejected": -9.630399703979492, + "step": 8230 + }, + { + "epoch": 0.49, + "learning_rate": 4.9382892692334e-06, + "logits/chosen": -2.8067169189453125, + "logits/rejected": -2.5080924034118652, + "logps/chosen": -87.20024871826172, + "logps/rejected": -1140.198974609375, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.18654102087020874, + "rewards/margins": 10.778132438659668, + "rewards/rejected": -10.96467399597168, + "step": 8240 + }, + { + "epoch": 0.49, + "learning_rate": 4.937905662190129e-06, + "logits/chosen": -2.7882542610168457, + "logits/rejected": -2.5241200923919678, + "logps/chosen": -86.48379516601562, + "logps/rejected": -1165.376708984375, + "loss": 0.0104, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.19306592643260956, + "rewards/margins": 11.027694702148438, + "rewards/rejected": -11.220762252807617, + "step": 8250 + }, + { + "epoch": 0.49, + "learning_rate": 4.937520881539103e-06, + "logits/chosen": -2.7620749473571777, + "logits/rejected": -2.5087196826934814, + "logps/chosen": -88.07823944091797, + "logps/rejected": -1116.6484375, + "loss": 0.0126, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.15203183889389038, + "rewards/margins": 10.565764427185059, + "rewards/rejected": -10.717798233032227, + "step": 8260 + }, + { + "epoch": 0.49, + "learning_rate": 4.9371349274655554e-06, + "logits/chosen": -2.73854923248291, + "logits/rejected": -2.462399482727051, + "logps/chosen": -87.28633880615234, + "logps/rejected": -1087.7689208984375, + "loss": 0.009, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.2257615029811859, + "rewards/margins": 10.212897300720215, + "rewards/rejected": -10.43865966796875, + "step": 8270 + }, + { + "epoch": 0.49, + "learning_rate": 4.936747800155285e-06, + "logits/chosen": -2.768064260482788, + "logits/rejected": -2.4369709491729736, + "logps/chosen": -89.6531982421875, + "logps/rejected": -1096.7021484375, + "loss": 0.0127, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16490578651428223, + "rewards/margins": 10.369377136230469, + "rewards/rejected": -10.534283638000488, + "step": 8280 + }, + { + "epoch": 0.49, + "learning_rate": 4.936359499794655e-06, + "logits/chosen": -2.763387441635132, + "logits/rejected": -2.5313873291015625, + "logps/chosen": -91.75997161865234, + "logps/rejected": -1236.5867919921875, + "loss": 0.0107, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.17353315651416779, + "rewards/margins": 11.755800247192383, + "rewards/rejected": -11.929333686828613, + "step": 8290 + }, + { + "epoch": 0.49, + "learning_rate": 4.935970026570594e-06, + "logits/chosen": -2.7261383533477783, + "logits/rejected": -2.442786693572998, + "logps/chosen": -154.85885620117188, + "logps/rejected": -1284.572509765625, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8449016809463501, + "rewards/margins": 11.56352424621582, + "rewards/rejected": -12.408422470092773, + "step": 8300 + }, + { + "epoch": 0.5, + "learning_rate": 4.935579380670592e-06, + "logits/chosen": -2.7080798149108887, + "logits/rejected": -2.401885747909546, + "logps/chosen": -174.195068359375, + "logps/rejected": -1266.2960205078125, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0381548404693604, + "rewards/margins": 11.172606468200684, + "rewards/rejected": -12.210762977600098, + "step": 8310 + }, + { + "epoch": 0.5, + "learning_rate": 4.935187562282708e-06, + "logits/chosen": -2.7456510066986084, + "logits/rejected": -2.493036985397339, + "logps/chosen": -181.52951049804688, + "logps/rejected": -1121.7872314453125, + "loss": 0.0171, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0890799760818481, + "rewards/margins": 9.693365097045898, + "rewards/rejected": -10.782444953918457, + "step": 8320 + }, + { + "epoch": 0.5, + "learning_rate": 4.934794571595561e-06, + "logits/chosen": -2.723733901977539, + "logits/rejected": -2.378776788711548, + "logps/chosen": -159.49053955078125, + "logps/rejected": -1241.007568359375, + "loss": 0.0109, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8775874376296997, + "rewards/margins": 11.086641311645508, + "rewards/rejected": -11.964228630065918, + "step": 8330 + }, + { + "epoch": 0.5, + "learning_rate": 4.934400408798339e-06, + "logits/chosen": -2.7112441062927246, + "logits/rejected": -2.4750900268554688, + "logps/chosen": -156.2554168701172, + "logps/rejected": -1226.9696044921875, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7994493246078491, + "rewards/margins": 11.030031204223633, + "rewards/rejected": -11.829480171203613, + "step": 8340 + }, + { + "epoch": 0.5, + "learning_rate": 4.934005074080792e-06, + "logits/chosen": -2.6907193660736084, + "logits/rejected": -2.4379069805145264, + "logps/chosen": -146.80860900878906, + "logps/rejected": -1253.2562255859375, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7756183743476868, + "rewards/margins": 11.312003135681152, + "rewards/rejected": -12.087621688842773, + "step": 8350 + }, + { + "epoch": 0.5, + "learning_rate": 4.9336085676332324e-06, + "logits/chosen": -2.6680819988250732, + "logits/rejected": -2.364891290664673, + "logps/chosen": -140.12152099609375, + "logps/rejected": -1191.304443359375, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.752336859703064, + "rewards/margins": 10.733369827270508, + "rewards/rejected": -11.485706329345703, + "step": 8360 + }, + { + "epoch": 0.5, + "learning_rate": 4.93321088964654e-06, + "logits/chosen": -2.6837241649627686, + "logits/rejected": -2.3483433723449707, + "logps/chosen": -136.3004150390625, + "logps/rejected": -1209.0609130859375, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6557984352111816, + "rewards/margins": 10.98684024810791, + "rewards/rejected": -11.642637252807617, + "step": 8370 + }, + { + "epoch": 0.5, + "learning_rate": 4.932812040312156e-06, + "logits/chosen": -2.7002761363983154, + "logits/rejected": -2.3660330772399902, + "logps/chosen": -174.1647491455078, + "logps/rejected": -1211.975830078125, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0595566034317017, + "rewards/margins": 10.635034561157227, + "rewards/rejected": -11.694592475891113, + "step": 8380 + }, + { + "epoch": 0.5, + "learning_rate": 4.932412019822087e-06, + "logits/chosen": -2.7162821292877197, + "logits/rejected": -2.3310799598693848, + "logps/chosen": -198.69822692871094, + "logps/rejected": -1196.925048828125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2418029308319092, + "rewards/margins": 10.279375076293945, + "rewards/rejected": -11.5211763381958, + "step": 8390 + }, + { + "epoch": 0.5, + "learning_rate": 4.932010828368903e-06, + "logits/chosen": -2.713186740875244, + "logits/rejected": -2.3532936573028564, + "logps/chosen": -193.61093139648438, + "logps/rejected": -1319.718994140625, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1643874645233154, + "rewards/margins": 11.591802597045898, + "rewards/rejected": -12.756190299987793, + "step": 8400 + }, + { + "epoch": 0.5, + "learning_rate": 4.931608466145737e-06, + "logits/chosen": -2.665128231048584, + "logits/rejected": -2.2883107662200928, + "logps/chosen": -197.45925903320312, + "logps/rejected": -1238.516845703125, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.302172303199768, + "rewards/margins": 10.634875297546387, + "rewards/rejected": -11.937047958374023, + "step": 8410 + }, + { + "epoch": 0.5, + "learning_rate": 4.931204933346286e-06, + "logits/chosen": -2.6490397453308105, + "logits/rejected": -2.3311824798583984, + "logps/chosen": -245.8968048095703, + "logps/rejected": -1172.8463134765625, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.698767900466919, + "rewards/margins": 9.596763610839844, + "rewards/rejected": -11.2955322265625, + "step": 8420 + }, + { + "epoch": 0.5, + "learning_rate": 4.930800230164812e-06, + "logits/chosen": -2.6704180240631104, + "logits/rejected": -2.3434951305389404, + "logps/chosen": -158.18045043945312, + "logps/rejected": -1159.16015625, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8940812945365906, + "rewards/margins": 10.254988670349121, + "rewards/rejected": -11.149070739746094, + "step": 8430 + }, + { + "epoch": 0.5, + "learning_rate": 4.930394356796137e-06, + "logits/chosen": -2.645294427871704, + "logits/rejected": -2.225689649581909, + "logps/chosen": -124.64208984375, + "logps/rejected": -1233.655029296875, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.502900242805481, + "rewards/margins": 11.387826919555664, + "rewards/rejected": -11.890726089477539, + "step": 8440 + }, + { + "epoch": 0.5, + "learning_rate": 4.9299873134356505e-06, + "logits/chosen": -2.657586097717285, + "logits/rejected": -2.2588391304016113, + "logps/chosen": -99.67640686035156, + "logps/rejected": -1170.5230712890625, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.24983413517475128, + "rewards/margins": 11.01685619354248, + "rewards/rejected": -11.266690254211426, + "step": 8450 + }, + { + "epoch": 0.5, + "learning_rate": 4.929579100279302e-06, + "logits/chosen": -2.6110262870788574, + "logits/rejected": -2.2358555793762207, + "logps/chosen": -103.1527099609375, + "logps/rejected": -1102.51953125, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.32115548849105835, + "rewards/margins": 10.262894630432129, + "rewards/rejected": -10.584051132202148, + "step": 8460 + }, + { + "epoch": 0.51, + "learning_rate": 4.929169717523606e-06, + "logits/chosen": -2.6501402854919434, + "logits/rejected": -2.2079734802246094, + "logps/chosen": -117.59745025634766, + "logps/rejected": -1200.829833984375, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.47956761717796326, + "rewards/margins": 11.077605247497559, + "rewards/rejected": -11.557172775268555, + "step": 8470 + }, + { + "epoch": 0.51, + "learning_rate": 4.928759165365638e-06, + "logits/chosen": -2.6527247428894043, + "logits/rejected": -2.262042284011841, + "logps/chosen": -116.34294128417969, + "logps/rejected": -1188.951416015625, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.46577364206314087, + "rewards/margins": 10.972543716430664, + "rewards/rejected": -11.438318252563477, + "step": 8480 + }, + { + "epoch": 0.51, + "learning_rate": 4.92834744400304e-06, + "logits/chosen": -2.6446733474731445, + "logits/rejected": -2.2962286472320557, + "logps/chosen": -108.58809661865234, + "logps/rejected": -1085.635498046875, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.41023746132850647, + "rewards/margins": 10.028517723083496, + "rewards/rejected": -10.438755989074707, + "step": 8490 + }, + { + "epoch": 0.51, + "learning_rate": 4.927934553634012e-06, + "logits/chosen": -2.6125893592834473, + "logits/rejected": -2.2501416206359863, + "logps/chosen": -112.4659194946289, + "logps/rejected": -1141.994873046875, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.41731780767440796, + "rewards/margins": 10.562034606933594, + "rewards/rejected": -10.979350090026855, + "step": 8500 + }, + { + "epoch": 0.51, + "learning_rate": 4.927520494457322e-06, + "logits/chosen": -2.6098403930664062, + "logits/rejected": -2.289140224456787, + "logps/chosen": -120.3819808959961, + "logps/rejected": -1187.58740234375, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.46318262815475464, + "rewards/margins": 10.964584350585938, + "rewards/rejected": -11.427766799926758, + "step": 8510 + }, + { + "epoch": 0.51, + "learning_rate": 4.927105266672296e-06, + "logits/chosen": -2.6138861179351807, + "logits/rejected": -2.239011287689209, + "logps/chosen": -127.44168853759766, + "logps/rejected": -1298.4248046875, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5278333425521851, + "rewards/margins": 12.009220123291016, + "rewards/rejected": -12.537053108215332, + "step": 8520 + }, + { + "epoch": 0.51, + "learning_rate": 4.926688870478826e-06, + "logits/chosen": -2.6080334186553955, + "logits/rejected": -2.1187500953674316, + "logps/chosen": -141.58865356445312, + "logps/rejected": -1160.447509765625, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7462449073791504, + "rewards/margins": 10.41910457611084, + "rewards/rejected": -11.165349006652832, + "step": 8530 + }, + { + "epoch": 0.51, + "learning_rate": 4.926271306077365e-06, + "logits/chosen": -2.6295089721679688, + "logits/rejected": -2.230281114578247, + "logps/chosen": -192.72689819335938, + "logps/rejected": -1256.7039794921875, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2145183086395264, + "rewards/margins": 10.917692184448242, + "rewards/rejected": -12.132211685180664, + "step": 8540 + }, + { + "epoch": 0.51, + "learning_rate": 4.925852573668928e-06, + "logits/chosen": -2.5391979217529297, + "logits/rejected": -2.1465065479278564, + "logps/chosen": -256.2874755859375, + "logps/rejected": -1352.09326171875, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8709633350372314, + "rewards/margins": 11.205057144165039, + "rewards/rejected": -13.076019287109375, + "step": 8550 + }, + { + "epoch": 0.51, + "learning_rate": 4.925432673455093e-06, + "logits/chosen": -2.5934720039367676, + "logits/rejected": -2.1257238388061523, + "logps/chosen": -236.5601348876953, + "logps/rejected": -1285.2135009765625, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6405532360076904, + "rewards/margins": 10.768805503845215, + "rewards/rejected": -12.409358978271484, + "step": 8560 + }, + { + "epoch": 0.51, + "learning_rate": 4.9250116056379995e-06, + "logits/chosen": -2.5978126525878906, + "logits/rejected": -2.1567323207855225, + "logps/chosen": -259.41973876953125, + "logps/rejected": -1277.0860595703125, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.8205982446670532, + "rewards/margins": 10.5196533203125, + "rewards/rejected": -12.340250968933105, + "step": 8570 + }, + { + "epoch": 0.51, + "learning_rate": 4.924589370420351e-06, + "logits/chosen": -2.5525307655334473, + "logits/rejected": -2.1641201972961426, + "logps/chosen": -226.9619903564453, + "logps/rejected": -1351.9940185546875, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.5262714624404907, + "rewards/margins": 11.539323806762695, + "rewards/rejected": -13.065594673156738, + "step": 8580 + }, + { + "epoch": 0.51, + "learning_rate": 4.92416596800541e-06, + "logits/chosen": -2.5547022819519043, + "logits/rejected": -2.1575772762298584, + "logps/chosen": -219.4124755859375, + "logps/rejected": -1236.34228515625, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.513289213180542, + "rewards/margins": 10.414794921875, + "rewards/rejected": -11.928084373474121, + "step": 8590 + }, + { + "epoch": 0.51, + "learning_rate": 4.923741398597002e-06, + "logits/chosen": -2.5400283336639404, + "logits/rejected": -2.187659740447998, + "logps/chosen": -190.90476989746094, + "logps/rejected": -1265.1927490234375, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2522859573364258, + "rewards/margins": 10.95875358581543, + "rewards/rejected": -12.211040496826172, + "step": 8600 + }, + { + "epoch": 0.51, + "learning_rate": 4.923315662399517e-06, + "logits/chosen": -2.549731492996216, + "logits/rejected": -2.067566156387329, + "logps/chosen": -188.47486877441406, + "logps/rejected": -1343.1993408203125, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1448856592178345, + "rewards/margins": 11.840471267700195, + "rewards/rejected": -12.985357284545898, + "step": 8610 + }, + { + "epoch": 0.51, + "learning_rate": 4.922888759617902e-06, + "logits/chosen": -2.578444004058838, + "logits/rejected": -2.1173434257507324, + "logps/chosen": -197.18478393554688, + "logps/rejected": -1270.169189453125, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2188738584518433, + "rewards/margins": 11.042170524597168, + "rewards/rejected": -12.261045455932617, + "step": 8620 + }, + { + "epoch": 0.51, + "learning_rate": 4.922460690457669e-06, + "logits/chosen": -2.566720962524414, + "logits/rejected": -2.1462390422821045, + "logps/chosen": -156.75839233398438, + "logps/rejected": -1368.327392578125, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.8996116518974304, + "rewards/margins": 12.352424621582031, + "rewards/rejected": -13.252037048339844, + "step": 8630 + }, + { + "epoch": 0.52, + "learning_rate": 4.9220314551248915e-06, + "logits/chosen": -2.5638468265533447, + "logits/rejected": -2.1381242275238037, + "logps/chosen": -150.8170166015625, + "logps/rejected": -1154.7896728515625, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7850055694580078, + "rewards/margins": 10.316033363342285, + "rewards/rejected": -11.101037979125977, + "step": 8640 + }, + { + "epoch": 0.52, + "learning_rate": 4.921601053826202e-06, + "logits/chosen": -2.538271903991699, + "logits/rejected": -2.084805965423584, + "logps/chosen": -149.67544555664062, + "logps/rejected": -1210.05029296875, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7598694562911987, + "rewards/margins": 10.894105911254883, + "rewards/rejected": -11.653974533081055, + "step": 8650 + }, + { + "epoch": 0.52, + "learning_rate": 4.921169486768795e-06, + "logits/chosen": -2.522334575653076, + "logits/rejected": -2.0509893894195557, + "logps/chosen": -132.68572998046875, + "logps/rejected": -1291.65087890625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.554379403591156, + "rewards/margins": 11.907740592956543, + "rewards/rejected": -12.462118148803711, + "step": 8660 + }, + { + "epoch": 0.52, + "learning_rate": 4.920736754160429e-06, + "logits/chosen": -2.54852032661438, + "logits/rejected": -2.1214566230773926, + "logps/chosen": -141.50221252441406, + "logps/rejected": -1218.0029296875, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7079504728317261, + "rewards/margins": 11.031194686889648, + "rewards/rejected": -11.739145278930664, + "step": 8670 + }, + { + "epoch": 0.52, + "learning_rate": 4.920302856209421e-06, + "logits/chosen": -2.5500588417053223, + "logits/rejected": -2.140646457672119, + "logps/chosen": -133.25222778320312, + "logps/rejected": -1180.8228759765625, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6268365383148193, + "rewards/margins": 10.748812675476074, + "rewards/rejected": -11.375650405883789, + "step": 8680 + }, + { + "epoch": 0.52, + "learning_rate": 4.919867793124647e-06, + "logits/chosen": -2.6059963703155518, + "logits/rejected": -2.1350207328796387, + "logps/chosen": -132.34324645996094, + "logps/rejected": -1064.859130859375, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6261882781982422, + "rewards/margins": 9.58448600769043, + "rewards/rejected": -10.210673332214355, + "step": 8690 + }, + { + "epoch": 0.52, + "learning_rate": 4.91943156511555e-06, + "logits/chosen": -2.5829074382781982, + "logits/rejected": -2.020895481109619, + "logps/chosen": -130.73568725585938, + "logps/rejected": -1142.0009765625, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5101009011268616, + "rewards/margins": 10.471832275390625, + "rewards/rejected": -10.98193359375, + "step": 8700 + }, + { + "epoch": 0.52, + "learning_rate": 4.918994172392127e-06, + "logits/chosen": -2.492659091949463, + "logits/rejected": -2.0696825981140137, + "logps/chosen": -109.44816589355469, + "logps/rejected": -1029.897216796875, + "loss": 0.007, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.42883554100990295, + "rewards/margins": 9.431425094604492, + "rewards/rejected": -9.860260963439941, + "step": 8710 + }, + { + "epoch": 0.52, + "learning_rate": 4.918555615164941e-06, + "logits/chosen": -2.591195821762085, + "logits/rejected": -2.1617109775543213, + "logps/chosen": -109.4277114868164, + "logps/rejected": -1124.1805419921875, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3623427450656891, + "rewards/margins": 10.437860488891602, + "rewards/rejected": -10.800204277038574, + "step": 8720 + }, + { + "epoch": 0.52, + "learning_rate": 4.918115893645113e-06, + "logits/chosen": -2.489237070083618, + "logits/rejected": -1.9729143381118774, + "logps/chosen": -129.11219787597656, + "logps/rejected": -1240.086669921875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5392051935195923, + "rewards/margins": 11.411539077758789, + "rewards/rejected": -11.95074462890625, + "step": 8730 + }, + { + "epoch": 0.52, + "learning_rate": 4.917675008044325e-06, + "logits/chosen": -2.507166624069214, + "logits/rejected": -2.123903274536133, + "logps/chosen": -110.05009460449219, + "logps/rejected": -1189.328857421875, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.41863924264907837, + "rewards/margins": 11.05016040802002, + "rewards/rejected": -11.468799591064453, + "step": 8740 + }, + { + "epoch": 0.52, + "learning_rate": 4.917232958574818e-06, + "logits/chosen": -2.4957025051116943, + "logits/rejected": -1.9906578063964844, + "logps/chosen": -127.26863861083984, + "logps/rejected": -1186.372314453125, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.519241452217102, + "rewards/margins": 10.909570693969727, + "rewards/rejected": -11.428813934326172, + "step": 8750 + }, + { + "epoch": 0.52, + "learning_rate": 4.916789745449396e-06, + "logits/chosen": -2.5405311584472656, + "logits/rejected": -2.0583295822143555, + "logps/chosen": -116.7403793334961, + "logps/rejected": -1182.0587158203125, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.49584469199180603, + "rewards/margins": 10.890067100524902, + "rewards/rejected": -11.38591194152832, + "step": 8760 + }, + { + "epoch": 0.52, + "learning_rate": 4.916345368881421e-06, + "logits/chosen": -2.566662073135376, + "logits/rejected": -2.0812039375305176, + "logps/chosen": -114.72480773925781, + "logps/rejected": -1179.4605712890625, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.42292338609695435, + "rewards/margins": 10.915139198303223, + "rewards/rejected": -11.33806324005127, + "step": 8770 + }, + { + "epoch": 0.52, + "learning_rate": 4.915899829084817e-06, + "logits/chosen": -2.506070852279663, + "logits/rejected": -2.089146375656128, + "logps/chosen": -100.01338195800781, + "logps/rejected": -1146.290283203125, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.32286468148231506, + "rewards/margins": 10.699003219604492, + "rewards/rejected": -11.021869659423828, + "step": 8780 + }, + { + "epoch": 0.52, + "learning_rate": 4.915453126274065e-06, + "logits/chosen": -2.5194497108459473, + "logits/rejected": -1.993635892868042, + "logps/chosen": -126.95014953613281, + "logps/rejected": -1169.710693359375, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.604026198387146, + "rewards/margins": 10.64478874206543, + "rewards/rejected": -11.248815536499023, + "step": 8790 + }, + { + "epoch": 0.52, + "learning_rate": 4.91500526066421e-06, + "logits/chosen": -2.51689076423645, + "logits/rejected": -2.0356264114379883, + "logps/chosen": -171.7598876953125, + "logps/rejected": -1218.410888671875, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.0527945756912231, + "rewards/margins": 10.694745063781738, + "rewards/rejected": -11.747539520263672, + "step": 8800 + }, + { + "epoch": 0.53, + "learning_rate": 4.914556232470852e-06, + "logits/chosen": -2.5172712802886963, + "logits/rejected": -2.1194005012512207, + "logps/chosen": -291.6817932128906, + "logps/rejected": -1355.787841796875, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.239320755004883, + "rewards/margins": 10.87884521484375, + "rewards/rejected": -13.11816692352295, + "step": 8810 + }, + { + "epoch": 0.53, + "learning_rate": 4.914106041910155e-06, + "logits/chosen": -2.535651206970215, + "logits/rejected": -2.0637736320495605, + "logps/chosen": -318.8282165527344, + "logps/rejected": -1451.5145263671875, + "loss": 0.0179, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -2.4629616737365723, + "rewards/margins": 11.6010160446167, + "rewards/rejected": -14.063977241516113, + "step": 8820 + }, + { + "epoch": 0.53, + "learning_rate": 4.9136546891988405e-06, + "logits/chosen": -2.4874765872955322, + "logits/rejected": -2.052802324295044, + "logps/chosen": -176.2159423828125, + "logps/rejected": -1282.90869140625, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1466925144195557, + "rewards/margins": 11.250649452209473, + "rewards/rejected": -12.397340774536133, + "step": 8830 + }, + { + "epoch": 0.53, + "learning_rate": 4.913202174554189e-06, + "logits/chosen": -2.5354113578796387, + "logits/rejected": -2.1110825538635254, + "logps/chosen": -190.0911865234375, + "logps/rejected": -1271.3109130859375, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.1755142211914062, + "rewards/margins": 11.084485054016113, + "rewards/rejected": -12.25999927520752, + "step": 8840 + }, + { + "epoch": 0.53, + "learning_rate": 4.9127484981940425e-06, + "logits/chosen": -2.5048694610595703, + "logits/rejected": -2.046332836151123, + "logps/chosen": -165.88829040527344, + "logps/rejected": -1175.0084228515625, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.9406801462173462, + "rewards/margins": 10.357568740844727, + "rewards/rejected": -11.298248291015625, + "step": 8850 + }, + { + "epoch": 0.53, + "learning_rate": 4.912293660336798e-06, + "logits/chosen": -2.527005672454834, + "logits/rejected": -2.024022340774536, + "logps/chosen": -127.67681884765625, + "logps/rejected": -1364.843505859375, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5572468638420105, + "rewards/margins": 12.631437301635742, + "rewards/rejected": -13.188685417175293, + "step": 8860 + }, + { + "epoch": 0.53, + "learning_rate": 4.911837661201417e-06, + "logits/chosen": -2.4975199699401855, + "logits/rejected": -2.0482470989227295, + "logps/chosen": -126.90157318115234, + "logps/rejected": -1177.91796875, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6208245158195496, + "rewards/margins": 10.713441848754883, + "rewards/rejected": -11.334264755249023, + "step": 8870 + }, + { + "epoch": 0.53, + "learning_rate": 4.911380501007417e-06, + "logits/chosen": -2.49613094329834, + "logits/rejected": -2.0501036643981934, + "logps/chosen": -119.09550476074219, + "logps/rejected": -1103.6058349609375, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5309459567070007, + "rewards/margins": 10.067449569702148, + "rewards/rejected": -10.598396301269531, + "step": 8880 + }, + { + "epoch": 0.53, + "learning_rate": 4.910922179974874e-06, + "logits/chosen": -2.483053684234619, + "logits/rejected": -2.0133414268493652, + "logps/chosen": -123.5906982421875, + "logps/rejected": -1200.047607421875, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5445976853370667, + "rewards/margins": 11.030115127563477, + "rewards/rejected": -11.574711799621582, + "step": 8890 + }, + { + "epoch": 0.53, + "learning_rate": 4.910462698324425e-06, + "logits/chosen": -2.4485747814178467, + "logits/rejected": -1.99379563331604, + "logps/chosen": -128.739990234375, + "logps/rejected": -1175.333251953125, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5309732556343079, + "rewards/margins": 10.771341323852539, + "rewards/rejected": -11.302312850952148, + "step": 8900 + }, + { + "epoch": 0.53, + "learning_rate": 4.910002056277263e-06, + "logits/chosen": -2.4345476627349854, + "logits/rejected": -1.9773731231689453, + "logps/chosen": -123.64151763916016, + "logps/rejected": -1116.333740234375, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5248419642448425, + "rewards/margins": 10.200199127197266, + "rewards/rejected": -10.725041389465332, + "step": 8910 + }, + { + "epoch": 0.53, + "learning_rate": 4.9095402540551426e-06, + "logits/chosen": -2.438912868499756, + "logits/rejected": -1.9748096466064453, + "logps/chosen": -141.9430389404297, + "logps/rejected": -1160.1414794921875, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.7052977085113525, + "rewards/margins": 10.455205917358398, + "rewards/rejected": -11.160504341125488, + "step": 8920 + }, + { + "epoch": 0.53, + "learning_rate": 4.909077291880374e-06, + "logits/chosen": -2.4647209644317627, + "logits/rejected": -1.9365339279174805, + "logps/chosen": -152.30593872070312, + "logps/rejected": -1199.8248291015625, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.79912269115448, + "rewards/margins": 10.759928703308105, + "rewards/rejected": -11.559052467346191, + "step": 8930 + }, + { + "epoch": 0.53, + "learning_rate": 4.908613169975828e-06, + "logits/chosen": -2.449131488800049, + "logits/rejected": -1.8910865783691406, + "logps/chosen": -128.24102783203125, + "logps/rejected": -1243.804443359375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.6105014681816101, + "rewards/margins": 11.377962112426758, + "rewards/rejected": -11.988462448120117, + "step": 8940 + }, + { + "epoch": 0.53, + "learning_rate": 4.908147888564933e-06, + "logits/chosen": -2.4812979698181152, + "logits/rejected": -1.989043951034546, + "logps/chosen": -122.87400817871094, + "logps/rejected": -1054.754638671875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5586776733398438, + "rewards/margins": 9.54277229309082, + "rewards/rejected": -10.101449966430664, + "step": 8950 + }, + { + "epoch": 0.53, + "learning_rate": 4.907681447871675e-06, + "logits/chosen": -2.468430995941162, + "logits/rejected": -1.9127461910247803, + "logps/chosen": -130.66795349121094, + "logps/rejected": -1122.6256103515625, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5761376023292542, + "rewards/margins": 10.199202537536621, + "rewards/rejected": -10.77534008026123, + "step": 8960 + }, + { + "epoch": 0.53, + "learning_rate": 4.9072138481205985e-06, + "logits/chosen": -2.4114468097686768, + "logits/rejected": -1.9359939098358154, + "logps/chosen": -131.7882080078125, + "logps/rejected": -1226.0621337890625, + "loss": 0.0112, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5796363949775696, + "rewards/margins": 11.247169494628906, + "rewards/rejected": -11.826805114746094, + "step": 8970 + }, + { + "epoch": 0.54, + "learning_rate": 4.906745089536807e-06, + "logits/chosen": -2.4809346199035645, + "logits/rejected": -1.9680235385894775, + "logps/chosen": -121.25361633300781, + "logps/rejected": -1216.440673828125, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.44334688782691956, + "rewards/margins": 11.268182754516602, + "rewards/rejected": -11.711529731750488, + "step": 8980 + }, + { + "epoch": 0.54, + "learning_rate": 4.906275172345958e-06, + "logits/chosen": -2.3760221004486084, + "logits/rejected": -1.831313133239746, + "logps/chosen": -110.75362396240234, + "logps/rejected": -1208.471923828125, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4101928770542145, + "rewards/margins": 11.238991737365723, + "rewards/rejected": -11.649184226989746, + "step": 8990 + }, + { + "epoch": 0.54, + "learning_rate": 4.905804096774274e-06, + "logits/chosen": -2.5004141330718994, + "logits/rejected": -2.00439715385437, + "logps/chosen": -120.70208740234375, + "logps/rejected": -1278.545654296875, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.432640016078949, + "rewards/margins": 11.919075012207031, + "rewards/rejected": -12.351713180541992, + "step": 9000 + }, + { + "epoch": 0.54, + "learning_rate": 4.905331863048527e-06, + "logits/chosen": -2.485170364379883, + "logits/rejected": -1.9551622867584229, + "logps/chosen": -115.267822265625, + "logps/rejected": -1154.04833984375, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.42792922258377075, + "rewards/margins": 10.683941841125488, + "rewards/rejected": -11.111871719360352, + "step": 9010 + }, + { + "epoch": 0.54, + "learning_rate": 4.904858471396052e-06, + "logits/chosen": -2.4857840538024902, + "logits/rejected": -1.9795833826065063, + "logps/chosen": -125.89668273925781, + "logps/rejected": -1051.677978515625, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.557021975517273, + "rewards/margins": 9.520426750183105, + "rewards/rejected": -10.077447891235352, + "step": 9020 + }, + { + "epoch": 0.54, + "learning_rate": 4.90438392204474e-06, + "logits/chosen": -2.473426342010498, + "logits/rejected": -2.001879930496216, + "logps/chosen": -114.4852066040039, + "logps/rejected": -1047.224853515625, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.43146514892578125, + "rewards/margins": 9.61073112487793, + "rewards/rejected": -10.042197227478027, + "step": 9030 + }, + { + "epoch": 0.54, + "learning_rate": 4.903908215223039e-06, + "logits/chosen": -2.474687099456787, + "logits/rejected": -1.954705834388733, + "logps/chosen": -104.60765075683594, + "logps/rejected": -1141.2850341796875, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.3660164475440979, + "rewards/margins": 10.611710548400879, + "rewards/rejected": -10.977727890014648, + "step": 9040 + }, + { + "epoch": 0.54, + "learning_rate": 4.903431351159953e-06, + "logits/chosen": -2.487273931503296, + "logits/rejected": -2.0323173999786377, + "logps/chosen": -112.09776306152344, + "logps/rejected": -1206.0609130859375, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.40267008543014526, + "rewards/margins": 11.214639663696289, + "rewards/rejected": -11.6173095703125, + "step": 9050 + }, + { + "epoch": 0.54, + "learning_rate": 4.902953330085045e-06, + "logits/chosen": -2.4751803874969482, + "logits/rejected": -1.9738658666610718, + "logps/chosen": -115.20489501953125, + "logps/rejected": -1177.436767578125, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.4248768389225006, + "rewards/margins": 10.920101165771484, + "rewards/rejected": -11.344978332519531, + "step": 9060 + }, + { + "epoch": 0.54, + "learning_rate": 4.902474152228436e-06, + "logits/chosen": -2.4660487174987793, + "logits/rejected": -1.9734323024749756, + "logps/chosen": -115.88789367675781, + "logps/rejected": -1111.953125, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.46355515718460083, + "rewards/margins": 10.212305068969727, + "rewards/rejected": -10.675859451293945, + "step": 9070 + }, + { + "epoch": 0.54, + "learning_rate": 4.901993817820799e-06, + "logits/chosen": -2.415534496307373, + "logits/rejected": -1.910928726196289, + "logps/chosen": -128.15475463867188, + "logps/rejected": -1246.646728515625, + "loss": 0.0113, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.5948313474655151, + "rewards/margins": 11.433905601501465, + "rewards/rejected": -12.02873706817627, + "step": 9080 + }, + { + "epoch": 0.54, + "learning_rate": 4.901512327093369e-06, + "logits/chosen": -2.4487416744232178, + "logits/rejected": -1.9200732707977295, + "logps/chosen": -199.3600616455078, + "logps/rejected": -1262.329345703125, + "loss": 0.0088, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.305281162261963, + "rewards/margins": 10.8794584274292, + "rewards/rejected": -12.18474006652832, + "step": 9090 + }, + { + "epoch": 0.54, + "learning_rate": 4.901029680277936e-06, + "logits/chosen": -2.451716423034668, + "logits/rejected": -1.8867104053497314, + "logps/chosen": -254.1786651611328, + "logps/rejected": -1375.6614990234375, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.840489387512207, + "rewards/margins": 11.472057342529297, + "rewards/rejected": -13.312545776367188, + "step": 9100 + }, + { + "epoch": 0.54, + "learning_rate": 4.900545877606846e-06, + "logits/chosen": -2.483898162841797, + "logits/rejected": -1.9019031524658203, + "logps/chosen": -267.94866943359375, + "logps/rejected": -1314.880126953125, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9479299783706665, + "rewards/margins": 10.75597095489502, + "rewards/rejected": -12.703900337219238, + "step": 9110 + }, + { + "epoch": 0.54, + "learning_rate": 4.900060919313001e-06, + "logits/chosen": -2.438732624053955, + "logits/rejected": -1.8350410461425781, + "logps/chosen": -264.29998779296875, + "logps/rejected": -1327.3834228515625, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.9393447637557983, + "rewards/margins": 10.889585494995117, + "rewards/rejected": -12.82892894744873, + "step": 9120 + }, + { + "epoch": 0.54, + "learning_rate": 4.89957480562986e-06, + "logits/chosen": -2.446174144744873, + "logits/rejected": -1.8756234645843506, + "logps/chosen": -246.39547729492188, + "logps/rejected": -1273.507080078125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7034847736358643, + "rewards/margins": 10.606338500976562, + "rewards/rejected": -12.309822082519531, + "step": 9130 + }, + { + "epoch": 0.55, + "learning_rate": 4.899087536791437e-06, + "logits/chosen": -2.4365739822387695, + "logits/rejected": -1.832301139831543, + "logps/chosen": -215.7676239013672, + "logps/rejected": -1356.737060546875, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.4140079021453857, + "rewards/margins": 11.709318161010742, + "rewards/rejected": -13.123326301574707, + "step": 9140 + }, + { + "epoch": 0.55, + "learning_rate": 4.8985991130323055e-06, + "logits/chosen": -2.3379762172698975, + "logits/rejected": -1.831356406211853, + "logps/chosen": -193.501953125, + "logps/rejected": -1314.2684326171875, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2729213237762451, + "rewards/margins": 11.418787956237793, + "rewards/rejected": -12.6917085647583, + "step": 9150 + }, + { + "epoch": 0.55, + "learning_rate": 4.898109534587591e-06, + "logits/chosen": -2.3119759559631348, + "logits/rejected": -1.8086111545562744, + "logps/chosen": -199.3477325439453, + "logps/rejected": -1285.159912109375, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2954607009887695, + "rewards/margins": 11.09830093383789, + "rewards/rejected": -12.39376163482666, + "step": 9160 + }, + { + "epoch": 0.55, + "learning_rate": 4.897618801692977e-06, + "logits/chosen": -2.35422682762146, + "logits/rejected": -1.685612678527832, + "logps/chosen": -198.84957885742188, + "logps/rejected": -1241.776611328125, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.2592036724090576, + "rewards/margins": 10.706997871398926, + "rewards/rejected": -11.966201782226562, + "step": 9170 + }, + { + "epoch": 0.55, + "learning_rate": 4.8971269145847036e-06, + "logits/chosen": -2.371396780014038, + "logits/rejected": -1.645612120628357, + "logps/chosen": -239.2095184326172, + "logps/rejected": -1199.647216796875, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.7021490335464478, + "rewards/margins": 9.864858627319336, + "rewards/rejected": -11.567008972167969, + "step": 9180 + }, + { + "epoch": 0.55, + "learning_rate": 4.8966338734995625e-06, + "logits/chosen": -2.314213991165161, + "logits/rejected": -1.6384913921356201, + "logps/chosen": -243.3763427734375, + "logps/rejected": -1390.8961181640625, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6981079578399658, + "rewards/margins": 11.77021312713623, + "rewards/rejected": -13.4683198928833, + "step": 9190 + }, + { + "epoch": 0.55, + "learning_rate": 4.896139678674906e-06, + "logits/chosen": -2.33502459526062, + "logits/rejected": -1.716343641281128, + "logps/chosen": -203.271240234375, + "logps/rejected": -1371.705810546875, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.328599452972412, + "rewards/margins": 11.953265190124512, + "rewards/rejected": -13.28186321258545, + "step": 9200 + }, + { + "epoch": 0.55, + "learning_rate": 4.895644330348639e-06, + "logits/chosen": -2.326385736465454, + "logits/rejected": -1.756500005722046, + "logps/chosen": -238.28759765625, + "logps/rejected": -1309.288818359375, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6925512552261353, + "rewards/margins": 10.97048282623291, + "rewards/rejected": -12.663034439086914, + "step": 9210 + }, + { + "epoch": 0.55, + "learning_rate": 4.895147828759221e-06, + "logits/chosen": -2.287712574005127, + "logits/rejected": -1.5830729007720947, + "logps/chosen": -235.680419921875, + "logps/rejected": -1277.4300537109375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -1.6376383304595947, + "rewards/margins": 10.70191478729248, + "rewards/rejected": -12.339553833007812, + "step": 9220 + }, + { + "epoch": 0.55, + "learning_rate": 4.89465017414567e-06, + "logits/chosen": -2.263793468475342, + "logits/rejected": -1.515984058380127, + "logps/chosen": -267.5029602050781, + "logps/rejected": -1327.024658203125, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.0182528495788574, + "rewards/margins": 10.796907424926758, + "rewards/rejected": -12.815159797668457, + "step": 9230 + }, + { + "epoch": 0.55, + "learning_rate": 4.8941513667475545e-06, + "logits/chosen": -2.264735221862793, + "logits/rejected": -1.6527732610702515, + "logps/chosen": -305.03497314453125, + "logps/rejected": -1364.341796875, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.3580455780029297, + "rewards/margins": 10.84262752532959, + "rewards/rejected": -13.20067310333252, + "step": 9240 + }, + { + "epoch": 0.55, + "learning_rate": 4.8936514068050036e-06, + "logits/chosen": -2.24562406539917, + "logits/rejected": -1.5483133792877197, + "logps/chosen": -392.2589111328125, + "logps/rejected": -1524.176025390625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.209529161453247, + "rewards/margins": 11.599828720092773, + "rewards/rejected": -14.809356689453125, + "step": 9250 + }, + { + "epoch": 0.55, + "learning_rate": 4.893150294558694e-06, + "logits/chosen": -2.220803737640381, + "logits/rejected": -1.554065465927124, + "logps/chosen": -415.74444580078125, + "logps/rejected": -1517.1165771484375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.394227981567383, + "rewards/margins": 11.332823753356934, + "rewards/rejected": -14.727052688598633, + "step": 9260 + }, + { + "epoch": 0.55, + "learning_rate": 4.892648030249863e-06, + "logits/chosen": -2.2497756481170654, + "logits/rejected": -1.4744913578033447, + "logps/chosen": -423.9326171875, + "logps/rejected": -1512.3385009765625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.574402332305908, + "rewards/margins": 11.116795539855957, + "rewards/rejected": -14.691195487976074, + "step": 9270 + }, + { + "epoch": 0.55, + "learning_rate": 4.892144614120302e-06, + "logits/chosen": -2.171947717666626, + "logits/rejected": -1.5441315174102783, + "logps/chosen": -441.9888610839844, + "logps/rejected": -1565.599609375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6988754272460938, + "rewards/margins": 11.506758689880371, + "rewards/rejected": -15.205635070800781, + "step": 9280 + }, + { + "epoch": 0.55, + "learning_rate": 4.891640046412354e-06, + "logits/chosen": -2.2582972049713135, + "logits/rejected": -1.5816400051116943, + "logps/chosen": -469.9560546875, + "logps/rejected": -1675.3583984375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9690468311309814, + "rewards/margins": 12.335711479187012, + "rewards/rejected": -16.304758071899414, + "step": 9290 + }, + { + "epoch": 0.55, + "learning_rate": 4.891134327368919e-06, + "logits/chosen": -2.1502461433410645, + "logits/rejected": -1.2352086305618286, + "logps/chosen": -527.8475341796875, + "logps/rejected": -1568.724609375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.556558132171631, + "rewards/margins": 10.692468643188477, + "rewards/rejected": -15.249025344848633, + "step": 9300 + }, + { + "epoch": 0.56, + "learning_rate": 4.890627457233447e-06, + "logits/chosen": -2.2037558555603027, + "logits/rejected": -1.323549509048462, + "logps/chosen": -582.61767578125, + "logps/rejected": -1681.5670166015625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.145630836486816, + "rewards/margins": 11.232614517211914, + "rewards/rejected": -16.378244400024414, + "step": 9310 + }, + { + "epoch": 0.56, + "learning_rate": 4.890119436249949e-06, + "logits/chosen": -2.2074286937713623, + "logits/rejected": -1.374140977859497, + "logps/chosen": -563.8267822265625, + "logps/rejected": -1678.455810546875, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.956040382385254, + "rewards/margins": 11.370257377624512, + "rewards/rejected": -16.326297760009766, + "step": 9320 + }, + { + "epoch": 0.56, + "learning_rate": 4.889610264662984e-06, + "logits/chosen": -2.2065980434417725, + "logits/rejected": -1.4954293966293335, + "logps/chosen": -607.862548828125, + "logps/rejected": -1670.8304443359375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.385449409484863, + "rewards/margins": 10.881214141845703, + "rewards/rejected": -16.26666259765625, + "step": 9330 + }, + { + "epoch": 0.56, + "learning_rate": 4.8890999427176685e-06, + "logits/chosen": -2.21960711479187, + "logits/rejected": -1.3500912189483643, + "logps/chosen": -613.028564453125, + "logps/rejected": -1718.5709228515625, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.436325550079346, + "rewards/margins": 11.299176216125488, + "rewards/rejected": -16.73550033569336, + "step": 9340 + }, + { + "epoch": 0.56, + "learning_rate": 4.888588470659672e-06, + "logits/chosen": -2.2409396171569824, + "logits/rejected": -1.4177013635635376, + "logps/chosen": -658.2142333984375, + "logps/rejected": -1661.774658203125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9222002029418945, + "rewards/margins": 10.26114559173584, + "rewards/rejected": -16.183345794677734, + "step": 9350 + }, + { + "epoch": 0.56, + "learning_rate": 4.888075848735216e-06, + "logits/chosen": -2.1812713146209717, + "logits/rejected": -1.3793461322784424, + "logps/chosen": -665.5093994140625, + "logps/rejected": -1843.179931640625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.915060520172119, + "rewards/margins": 12.07603931427002, + "rewards/rejected": -17.991100311279297, + "step": 9360 + }, + { + "epoch": 0.56, + "learning_rate": 4.887562077191076e-06, + "logits/chosen": -2.2054154872894287, + "logits/rejected": -1.2675275802612305, + "logps/chosen": -649.4739990234375, + "logps/rejected": -1726.609375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.741918563842773, + "rewards/margins": 11.077942848205566, + "rewards/rejected": -16.819862365722656, + "step": 9370 + }, + { + "epoch": 0.56, + "learning_rate": 4.887047156274582e-06, + "logits/chosen": -2.179262161254883, + "logits/rejected": -1.3902853727340698, + "logps/chosen": -598.9564819335938, + "logps/rejected": -1646.9781494140625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.333521842956543, + "rewards/margins": 10.668790817260742, + "rewards/rejected": -16.0023136138916, + "step": 9380 + }, + { + "epoch": 0.56, + "learning_rate": 4.8865310862336185e-06, + "logits/chosen": -2.193140745162964, + "logits/rejected": -1.4499452114105225, + "logps/chosen": -602.9285888671875, + "logps/rejected": -1688.9525146484375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.324109077453613, + "rewards/margins": 11.122973442077637, + "rewards/rejected": -16.44708251953125, + "step": 9390 + }, + { + "epoch": 0.56, + "learning_rate": 4.88601386731662e-06, + "logits/chosen": -2.264954090118408, + "logits/rejected": -1.6134716272354126, + "logps/chosen": -488.4305725097656, + "logps/rejected": -1709.612060546875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.206101417541504, + "rewards/margins": 12.442547798156738, + "rewards/rejected": -16.64864730834961, + "step": 9400 + }, + { + "epoch": 0.56, + "learning_rate": 4.885495499772577e-06, + "logits/chosen": -2.248412847518921, + "logits/rejected": -1.4623018503189087, + "logps/chosen": -434.98394775390625, + "logps/rejected": -1625.650634765625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5981316566467285, + "rewards/margins": 12.21182632446289, + "rewards/rejected": -15.809957504272461, + "step": 9410 + }, + { + "epoch": 0.56, + "learning_rate": 4.88497598385103e-06, + "logits/chosen": -2.2230143547058105, + "logits/rejected": -1.5362716913223267, + "logps/chosen": -438.2327575683594, + "logps/rejected": -1714.072265625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7328994274139404, + "rewards/margins": 12.959831237792969, + "rewards/rejected": -16.692729949951172, + "step": 9420 + }, + { + "epoch": 0.56, + "learning_rate": 4.884455319802075e-06, + "logits/chosen": -2.3442955017089844, + "logits/rejected": -1.7354028224945068, + "logps/chosen": -446.5459899902344, + "logps/rejected": -1512.3111572265625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7624008655548096, + "rewards/margins": 10.911413192749023, + "rewards/rejected": -14.673812866210938, + "step": 9430 + }, + { + "epoch": 0.56, + "learning_rate": 4.883933507876359e-06, + "logits/chosen": -2.183577060699463, + "logits/rejected": -1.418748378753662, + "logps/chosen": -479.88873291015625, + "logps/rejected": -1623.2200927734375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.037762641906738, + "rewards/margins": 11.74618911743164, + "rewards/rejected": -15.783950805664062, + "step": 9440 + }, + { + "epoch": 0.56, + "learning_rate": 4.883410548325083e-06, + "logits/chosen": -2.2914652824401855, + "logits/rejected": -1.5909273624420166, + "logps/chosen": -443.28009033203125, + "logps/rejected": -1625.1402587890625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7391343116760254, + "rewards/margins": 12.068460464477539, + "rewards/rejected": -15.807594299316406, + "step": 9450 + }, + { + "epoch": 0.56, + "learning_rate": 4.8828864414e-06, + "logits/chosen": -2.210989236831665, + "logits/rejected": -1.5576021671295166, + "logps/chosen": -466.12548828125, + "logps/rejected": -1740.948974609375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9479594230651855, + "rewards/margins": 13.016412734985352, + "rewards/rejected": -16.964370727539062, + "step": 9460 + }, + { + "epoch": 0.56, + "learning_rate": 4.8823611873534134e-06, + "logits/chosen": -2.3046963214874268, + "logits/rejected": -1.5231683254241943, + "logps/chosen": -485.19989013671875, + "logps/rejected": -1747.239013671875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1786627769470215, + "rewards/margins": 12.837244033813477, + "rewards/rejected": -17.01590919494629, + "step": 9470 + }, + { + "epoch": 0.57, + "learning_rate": 4.881834786438183e-06, + "logits/chosen": -2.289167881011963, + "logits/rejected": -1.684522271156311, + "logps/chosen": -488.9524841308594, + "logps/rejected": -1638.191162109375, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2102861404418945, + "rewards/margins": 11.725688934326172, + "rewards/rejected": -15.935976028442383, + "step": 9480 + }, + { + "epoch": 0.57, + "learning_rate": 4.881307238907716e-06, + "logits/chosen": -2.3047173023223877, + "logits/rejected": -1.4057815074920654, + "logps/chosen": -503.7521057128906, + "logps/rejected": -1765.227783203125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.340212821960449, + "rewards/margins": 12.857328414916992, + "rewards/rejected": -17.197542190551758, + "step": 9490 + }, + { + "epoch": 0.57, + "learning_rate": 4.8807785450159745e-06, + "logits/chosen": -2.3061840534210205, + "logits/rejected": -1.5801740884780884, + "logps/chosen": -405.59466552734375, + "logps/rejected": -1687.1650390625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3099091053009033, + "rewards/margins": 13.12306022644043, + "rewards/rejected": -16.43297004699707, + "step": 9500 + }, + { + "epoch": 0.57, + "learning_rate": 4.880248705017472e-06, + "logits/chosen": -2.3427414894104004, + "logits/rejected": -1.6258172988891602, + "logps/chosen": -390.35797119140625, + "logps/rejected": -1692.2574462890625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.21345591545105, + "rewards/margins": 13.277789115905762, + "rewards/rejected": -16.49124526977539, + "step": 9510 + }, + { + "epoch": 0.57, + "learning_rate": 4.8797177191672725e-06, + "logits/chosen": -2.3130557537078857, + "logits/rejected": -1.6256719827651978, + "logps/chosen": -355.9117126464844, + "logps/rejected": -1556.2890625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.849490165710449, + "rewards/margins": 12.275964736938477, + "rewards/rejected": -15.125454902648926, + "step": 9520 + }, + { + "epoch": 0.57, + "learning_rate": 4.879185587720995e-06, + "logits/chosen": -2.3131377696990967, + "logits/rejected": -1.70281183719635, + "logps/chosen": -357.9950256347656, + "logps/rejected": -1703.295166015625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.853206157684326, + "rewards/margins": 13.744196891784668, + "rewards/rejected": -16.597402572631836, + "step": 9530 + }, + { + "epoch": 0.57, + "learning_rate": 4.878652310934804e-06, + "logits/chosen": -2.355252504348755, + "logits/rejected": -1.5308891534805298, + "logps/chosen": -374.7494201660156, + "logps/rejected": -1644.110595703125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.090878486633301, + "rewards/margins": 12.905454635620117, + "rewards/rejected": -15.996332168579102, + "step": 9540 + }, + { + "epoch": 0.57, + "learning_rate": 4.878117889065422e-06, + "logits/chosen": -2.3175716400146484, + "logits/rejected": -1.6007827520370483, + "logps/chosen": -395.1104736328125, + "logps/rejected": -1585.636962890625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1982083320617676, + "rewards/margins": 12.213409423828125, + "rewards/rejected": -15.41161823272705, + "step": 9550 + }, + { + "epoch": 0.57, + "learning_rate": 4.877582322370118e-06, + "logits/chosen": -2.3214588165283203, + "logits/rejected": -1.5859684944152832, + "logps/chosen": -342.2489013671875, + "logps/rejected": -1569.713623046875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.723304271697998, + "rewards/margins": 12.534589767456055, + "rewards/rejected": -15.257893562316895, + "step": 9560 + }, + { + "epoch": 0.57, + "learning_rate": 4.877045611106715e-06, + "logits/chosen": -2.3186450004577637, + "logits/rejected": -1.6536686420440674, + "logps/chosen": -348.774658203125, + "logps/rejected": -1534.404541015625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7525031566619873, + "rewards/margins": 12.146647453308105, + "rewards/rejected": -14.899151802062988, + "step": 9570 + }, + { + "epoch": 0.57, + "learning_rate": 4.876507755533584e-06, + "logits/chosen": -2.308140993118286, + "logits/rejected": -1.487892985343933, + "logps/chosen": -342.5472106933594, + "logps/rejected": -1609.810302734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6737747192382812, + "rewards/margins": 12.968557357788086, + "rewards/rejected": -15.642333984375, + "step": 9580 + }, + { + "epoch": 0.57, + "learning_rate": 4.875968755909651e-06, + "logits/chosen": -2.317688465118408, + "logits/rejected": -1.6480190753936768, + "logps/chosen": -361.68829345703125, + "logps/rejected": -1458.328857421875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9470102787017822, + "rewards/margins": 11.199178695678711, + "rewards/rejected": -14.14618968963623, + "step": 9590 + }, + { + "epoch": 0.57, + "learning_rate": 4.8754286124943885e-06, + "logits/chosen": -2.330268383026123, + "logits/rejected": -1.634772539138794, + "logps/chosen": -371.482666015625, + "logps/rejected": -1580.1578369140625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.004931688308716, + "rewards/margins": 12.371018409729004, + "rewards/rejected": -15.375950813293457, + "step": 9600 + }, + { + "epoch": 0.57, + "learning_rate": 4.874887325547822e-06, + "logits/chosen": -2.3526101112365723, + "logits/rejected": -1.7061420679092407, + "logps/chosen": -358.02685546875, + "logps/rejected": -1540.4295654296875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8843259811401367, + "rewards/margins": 12.08558464050293, + "rewards/rejected": -14.969911575317383, + "step": 9610 + }, + { + "epoch": 0.57, + "learning_rate": 4.874344895330528e-06, + "logits/chosen": -2.3022735118865967, + "logits/rejected": -1.6766659021377563, + "logps/chosen": -367.90216064453125, + "logps/rejected": -1544.2366943359375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.943042755126953, + "rewards/margins": 12.049722671508789, + "rewards/rejected": -14.992765426635742, + "step": 9620 + }, + { + "epoch": 0.57, + "learning_rate": 4.873801322103632e-06, + "logits/chosen": -2.295130729675293, + "logits/rejected": -1.7196531295776367, + "logps/chosen": -381.6406555175781, + "logps/rejected": -1671.3160400390625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.142312526702881, + "rewards/margins": 13.124417304992676, + "rewards/rejected": -16.2667293548584, + "step": 9630 + }, + { + "epoch": 0.57, + "learning_rate": 4.873256606128809e-06, + "logits/chosen": -2.2870893478393555, + "logits/rejected": -1.5744203329086304, + "logps/chosen": -378.1920471191406, + "logps/rejected": -1657.543701171875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.0898327827453613, + "rewards/margins": 13.041587829589844, + "rewards/rejected": -16.13142204284668, + "step": 9640 + }, + { + "epoch": 0.58, + "learning_rate": 4.872710747668286e-06, + "logits/chosen": -2.27610182762146, + "logits/rejected": -1.6383521556854248, + "logps/chosen": -388.7374267578125, + "logps/rejected": -1665.726318359375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.167288303375244, + "rewards/margins": 13.040019035339355, + "rewards/rejected": -16.20730972290039, + "step": 9650 + }, + { + "epoch": 0.58, + "learning_rate": 4.872163746984839e-06, + "logits/chosen": -2.350041389465332, + "logits/rejected": -1.7389014959335327, + "logps/chosen": -385.111083984375, + "logps/rejected": -1612.0074462890625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.170299530029297, + "rewards/margins": 12.508148193359375, + "rewards/rejected": -15.678448677062988, + "step": 9660 + }, + { + "epoch": 0.58, + "learning_rate": 4.871615604341796e-06, + "logits/chosen": -2.336073398590088, + "logits/rejected": -1.6882587671279907, + "logps/chosen": -390.6451110839844, + "logps/rejected": -1562.4796142578125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1452720165252686, + "rewards/margins": 12.033903121948242, + "rewards/rejected": -15.179174423217773, + "step": 9670 + }, + { + "epoch": 0.58, + "learning_rate": 4.8710663200030295e-06, + "logits/chosen": -2.2882628440856934, + "logits/rejected": -1.5934340953826904, + "logps/chosen": -384.35931396484375, + "logps/rejected": -1698.881591796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1444833278656006, + "rewards/margins": 13.383455276489258, + "rewards/rejected": -16.527938842773438, + "step": 9680 + }, + { + "epoch": 0.58, + "learning_rate": 4.8705158942329676e-06, + "logits/chosen": -2.325416326522827, + "logits/rejected": -1.7027803659439087, + "logps/chosen": -394.05242919921875, + "logps/rejected": -1560.640380859375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2070670127868652, + "rewards/margins": 11.944730758666992, + "rewards/rejected": -15.1517972946167, + "step": 9690 + }, + { + "epoch": 0.58, + "learning_rate": 4.869964327296585e-06, + "logits/chosen": -2.3536763191223145, + "logits/rejected": -1.6338655948638916, + "logps/chosen": -387.1971435546875, + "logps/rejected": -1595.1439208984375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.13749623298645, + "rewards/margins": 12.379084587097168, + "rewards/rejected": -15.516581535339355, + "step": 9700 + }, + { + "epoch": 0.58, + "learning_rate": 4.869411619459405e-06, + "logits/chosen": -2.2667434215545654, + "logits/rejected": -1.411388635635376, + "logps/chosen": -383.4383239746094, + "logps/rejected": -1714.485107421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.111652374267578, + "rewards/margins": 13.584358215332031, + "rewards/rejected": -16.696012496948242, + "step": 9710 + }, + { + "epoch": 0.58, + "learning_rate": 4.8688577709875015e-06, + "logits/chosen": -2.3001608848571777, + "logits/rejected": -1.5940030813217163, + "logps/chosen": -377.70208740234375, + "logps/rejected": -1639.266357421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1269919872283936, + "rewards/margins": 12.839136123657227, + "rewards/rejected": -15.966127395629883, + "step": 9720 + }, + { + "epoch": 0.58, + "learning_rate": 4.868302782147497e-06, + "logits/chosen": -2.309725284576416, + "logits/rejected": -1.612256646156311, + "logps/chosen": -396.6005859375, + "logps/rejected": -1629.217041015625, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2379837036132812, + "rewards/margins": 12.616031646728516, + "rewards/rejected": -15.854016304016113, + "step": 9730 + }, + { + "epoch": 0.58, + "learning_rate": 4.867746653206564e-06, + "logits/chosen": -2.3514211177825928, + "logits/rejected": -1.721853256225586, + "logps/chosen": -331.0345764160156, + "logps/rejected": -1501.3438720703125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.602943181991577, + "rewards/margins": 11.964862823486328, + "rewards/rejected": -14.5678071975708, + "step": 9740 + }, + { + "epoch": 0.58, + "learning_rate": 4.8671893844324215e-06, + "logits/chosen": -2.371375322341919, + "logits/rejected": -1.7758678197860718, + "logps/chosen": -317.69183349609375, + "logps/rejected": -1529.5576171875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5025980472564697, + "rewards/margins": 12.339644432067871, + "rewards/rejected": -14.842244148254395, + "step": 9750 + }, + { + "epoch": 0.58, + "learning_rate": 4.86663097609334e-06, + "logits/chosen": -2.4010980129241943, + "logits/rejected": -1.7733519077301025, + "logps/chosen": -327.04669189453125, + "logps/rejected": -1497.9600830078125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5364809036254883, + "rewards/margins": 11.986761093139648, + "rewards/rejected": -14.523241996765137, + "step": 9760 + }, + { + "epoch": 0.58, + "learning_rate": 4.866071428458136e-06, + "logits/chosen": -2.391141891479492, + "logits/rejected": -1.7254180908203125, + "logps/chosen": -328.54901123046875, + "logps/rejected": -1466.5294189453125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5851070880889893, + "rewards/margins": 11.637004852294922, + "rewards/rejected": -14.222111701965332, + "step": 9770 + }, + { + "epoch": 0.58, + "learning_rate": 4.865510741796178e-06, + "logits/chosen": -2.338890552520752, + "logits/rejected": -1.6453098058700562, + "logps/chosen": -334.1186828613281, + "logps/rejected": -1529.055419921875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5982728004455566, + "rewards/margins": 12.25158405303955, + "rewards/rejected": -14.84985637664795, + "step": 9780 + }, + { + "epoch": 0.58, + "learning_rate": 4.864948916377379e-06, + "logits/chosen": -2.351754665374756, + "logits/rejected": -1.6584587097167969, + "logps/chosen": -344.3735046386719, + "logps/rejected": -1583.5953369140625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8027729988098145, + "rewards/margins": 12.598599433898926, + "rewards/rejected": -15.401372909545898, + "step": 9790 + }, + { + "epoch": 0.58, + "learning_rate": 4.8643859524722e-06, + "logits/chosen": -2.299443483352661, + "logits/rejected": -1.6576974391937256, + "logps/chosen": -353.61834716796875, + "logps/rejected": -1606.5152587890625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.832767963409424, + "rewards/margins": 12.790574073791504, + "rewards/rejected": -15.623342514038086, + "step": 9800 + }, + { + "epoch": 0.58, + "learning_rate": 4.863821850351655e-06, + "logits/chosen": -2.4020934104919434, + "logits/rejected": -1.5724455118179321, + "logps/chosen": -345.77667236328125, + "logps/rejected": -1534.543212890625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7939913272857666, + "rewards/margins": 12.102836608886719, + "rewards/rejected": -14.896827697753906, + "step": 9810 + }, + { + "epoch": 0.59, + "learning_rate": 4.863256610287301e-06, + "logits/chosen": -2.3092379570007324, + "logits/rejected": -1.605018973350525, + "logps/chosen": -359.61297607421875, + "logps/rejected": -1604.901611328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.8473477363586426, + "rewards/margins": 12.759544372558594, + "rewards/rejected": -15.606890678405762, + "step": 9820 + }, + { + "epoch": 0.59, + "learning_rate": 4.862690232551246e-06, + "logits/chosen": -2.361164093017578, + "logits/rejected": -1.538800597190857, + "logps/chosen": -348.7550048828125, + "logps/rejected": -1642.955078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7920846939086914, + "rewards/margins": 13.18958854675293, + "rewards/rejected": -15.981674194335938, + "step": 9830 + }, + { + "epoch": 0.59, + "learning_rate": 4.862122717416142e-06, + "logits/chosen": -2.2784175872802734, + "logits/rejected": -1.5250869989395142, + "logps/chosen": -383.6730651855469, + "logps/rejected": -1663.1865234375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1062276363372803, + "rewards/margins": 13.08294677734375, + "rewards/rejected": -16.18917465209961, + "step": 9840 + }, + { + "epoch": 0.59, + "learning_rate": 4.861554065155192e-06, + "logits/chosen": -2.266939401626587, + "logits/rejected": -1.5546085834503174, + "logps/chosen": -412.69464111328125, + "logps/rejected": -1633.615966796875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.42669677734375, + "rewards/margins": 12.46739387512207, + "rewards/rejected": -15.894088745117188, + "step": 9850 + }, + { + "epoch": 0.59, + "learning_rate": 4.860984276042146e-06, + "logits/chosen": -2.3296289443969727, + "logits/rejected": -1.499915361404419, + "logps/chosen": -403.8296813964844, + "logps/rejected": -1688.7779541015625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2878787517547607, + "rewards/margins": 13.164057731628418, + "rewards/rejected": -16.451934814453125, + "step": 9860 + }, + { + "epoch": 0.59, + "learning_rate": 4.860413350351299e-06, + "logits/chosen": -2.273385524749756, + "logits/rejected": -1.6406139135360718, + "logps/chosen": -433.3807067871094, + "logps/rejected": -1722.209716796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6259028911590576, + "rewards/margins": 13.15498161315918, + "rewards/rejected": -16.780881881713867, + "step": 9870 + }, + { + "epoch": 0.59, + "learning_rate": 4.859841288357496e-06, + "logits/chosen": -2.313106060028076, + "logits/rejected": -1.5650959014892578, + "logps/chosen": -427.267333984375, + "logps/rejected": -1705.982177734375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.552624225616455, + "rewards/margins": 13.072929382324219, + "rewards/rejected": -16.625553131103516, + "step": 9880 + }, + { + "epoch": 0.59, + "learning_rate": 4.859268090336125e-06, + "logits/chosen": -2.313354969024658, + "logits/rejected": -1.4915016889572144, + "logps/chosen": -441.2470703125, + "logps/rejected": -1709.267822265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7113285064697266, + "rewards/margins": 12.930192947387695, + "rewards/rejected": -16.641521453857422, + "step": 9890 + }, + { + "epoch": 0.59, + "learning_rate": 4.8586937565631265e-06, + "logits/chosen": -2.291609048843384, + "logits/rejected": -1.5757310390472412, + "logps/chosen": -435.40008544921875, + "logps/rejected": -1563.6767578125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.616590976715088, + "rewards/margins": 11.577680587768555, + "rewards/rejected": -15.194272994995117, + "step": 9900 + }, + { + "epoch": 0.59, + "learning_rate": 4.858118287314984e-06, + "logits/chosen": -2.306222438812256, + "logits/rejected": -1.685829758644104, + "logps/chosen": -441.7105407714844, + "logps/rejected": -1667.587646484375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.700347900390625, + "rewards/margins": 12.54529094696045, + "rewards/rejected": -16.245637893676758, + "step": 9910 + }, + { + "epoch": 0.59, + "learning_rate": 4.857541682868727e-06, + "logits/chosen": -2.3124632835388184, + "logits/rejected": -1.5065261125564575, + "logps/chosen": -418.69830322265625, + "logps/rejected": -1584.959716796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4285309314727783, + "rewards/margins": 11.977086067199707, + "rewards/rejected": -15.405616760253906, + "step": 9920 + }, + { + "epoch": 0.59, + "learning_rate": 4.856963943501935e-06, + "logits/chosen": -2.3004913330078125, + "logits/rejected": -1.4776780605316162, + "logps/chosen": -433.50311279296875, + "logps/rejected": -1602.159912109375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.578261137008667, + "rewards/margins": 11.9971923828125, + "rewards/rejected": -15.575454711914062, + "step": 9930 + }, + { + "epoch": 0.59, + "learning_rate": 4.856385069492731e-06, + "logits/chosen": -2.305677890777588, + "logits/rejected": -1.4151084423065186, + "logps/chosen": -469.8529357910156, + "logps/rejected": -1695.8245849609375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9903392791748047, + "rewards/margins": 12.527538299560547, + "rewards/rejected": -16.517879486083984, + "step": 9940 + }, + { + "epoch": 0.59, + "learning_rate": 4.855805061119783e-06, + "logits/chosen": -2.3499844074249268, + "logits/rejected": -1.583374261856079, + "logps/chosen": -454.5302734375, + "logps/rejected": -1753.8560791015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8433640003204346, + "rewards/margins": 13.258729934692383, + "rewards/rejected": -17.102094650268555, + "step": 9950 + }, + { + "epoch": 0.59, + "learning_rate": 4.85522391866231e-06, + "logits/chosen": -2.2709319591522217, + "logits/rejected": -1.525603175163269, + "logps/chosen": -479.5948181152344, + "logps/rejected": -1803.0755615234375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.077481269836426, + "rewards/margins": 13.503247261047363, + "rewards/rejected": -17.580730438232422, + "step": 9960 + }, + { + "epoch": 0.59, + "learning_rate": 4.854641642400072e-06, + "logits/chosen": -2.3332152366638184, + "logits/rejected": -1.6344425678253174, + "logps/chosen": -477.22650146484375, + "logps/rejected": -1687.0875244140625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.018692970275879, + "rewards/margins": 12.416613578796387, + "rewards/rejected": -16.435306549072266, + "step": 9970 + }, + { + "epoch": 0.6, + "learning_rate": 4.854058232613377e-06, + "logits/chosen": -2.2278342247009277, + "logits/rejected": -1.4650871753692627, + "logps/chosen": -474.5091247558594, + "logps/rejected": -1807.5751953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.019497871398926, + "rewards/margins": 13.614120483398438, + "rewards/rejected": -17.633617401123047, + "step": 9980 + }, + { + "epoch": 0.6, + "learning_rate": 4.85347368958308e-06, + "logits/chosen": -2.25191068649292, + "logits/rejected": -1.4371461868286133, + "logps/chosen": -455.2151794433594, + "logps/rejected": -1657.73828125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8645339012145996, + "rewards/margins": 12.277353286743164, + "rewards/rejected": -16.141887664794922, + "step": 9990 + }, + { + "epoch": 0.6, + "learning_rate": 4.852888013590578e-06, + "logits/chosen": -2.3679463863372803, + "logits/rejected": -1.529876470565796, + "logps/chosen": -469.40167236328125, + "logps/rejected": -1763.008544921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.958000659942627, + "rewards/margins": 13.208361625671387, + "rewards/rejected": -17.166362762451172, + "step": 10000 + }, + { + "epoch": 0.6, + "learning_rate": 4.852301204917817e-06, + "logits/chosen": -2.31992506980896, + "logits/rejected": -1.6072018146514893, + "logps/chosen": -466.525634765625, + "logps/rejected": -1719.9130859375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.93127179145813, + "rewards/margins": 12.816929817199707, + "rewards/rejected": -16.74820327758789, + "step": 10010 + }, + { + "epoch": 0.6, + "learning_rate": 4.8517132638472845e-06, + "logits/chosen": -2.2832045555114746, + "logits/rejected": -1.5844765901565552, + "logps/chosen": -473.851318359375, + "logps/rejected": -1753.65234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.090781211853027, + "rewards/margins": 13.011868476867676, + "rewards/rejected": -17.102651596069336, + "step": 10020 + }, + { + "epoch": 0.6, + "learning_rate": 4.851124190662018e-06, + "logits/chosen": -2.253577470779419, + "logits/rejected": -1.596097707748413, + "logps/chosen": -475.50506591796875, + "logps/rejected": -1733.2347412109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0412702560424805, + "rewards/margins": 12.861485481262207, + "rewards/rejected": -16.902755737304688, + "step": 10030 + }, + { + "epoch": 0.6, + "learning_rate": 4.850533985645596e-06, + "logits/chosen": -2.3281216621398926, + "logits/rejected": -1.550875186920166, + "logps/chosen": -473.8526916503906, + "logps/rejected": -1710.7763671875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.038908958435059, + "rewards/margins": 12.612730026245117, + "rewards/rejected": -16.651639938354492, + "step": 10040 + }, + { + "epoch": 0.6, + "learning_rate": 4.849942649082143e-06, + "logits/chosen": -2.316848039627075, + "logits/rejected": -1.6924753189086914, + "logps/chosen": -456.60113525390625, + "logps/rejected": -1731.9547119140625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.800999879837036, + "rewards/margins": 13.05566120147705, + "rewards/rejected": -16.85666275024414, + "step": 10050 + }, + { + "epoch": 0.6, + "learning_rate": 4.849350181256328e-06, + "logits/chosen": -2.2967631816864014, + "logits/rejected": -1.5331283807754517, + "logps/chosen": -446.1520080566406, + "logps/rejected": -1711.881591796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8036186695098877, + "rewards/margins": 12.87084674835205, + "rewards/rejected": -16.67446517944336, + "step": 10060 + }, + { + "epoch": 0.6, + "learning_rate": 4.848756582453367e-06, + "logits/chosen": -2.3733859062194824, + "logits/rejected": -1.6785045862197876, + "logps/chosen": -444.6415100097656, + "logps/rejected": -1665.856201171875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7605717182159424, + "rewards/margins": 12.462240219116211, + "rewards/rejected": -16.22281265258789, + "step": 10070 + }, + { + "epoch": 0.6, + "learning_rate": 4.848161852959016e-06, + "logits/chosen": -2.2915256023406982, + "logits/rejected": -1.6250845193862915, + "logps/chosen": -458.7969665527344, + "logps/rejected": -1780.0556640625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.868307590484619, + "rewards/margins": 13.48259162902832, + "rewards/rejected": -17.350900650024414, + "step": 10080 + }, + { + "epoch": 0.6, + "learning_rate": 4.84756599305958e-06, + "logits/chosen": -2.3065946102142334, + "logits/rejected": -1.490580677986145, + "logps/chosen": -466.52276611328125, + "logps/rejected": -1693.7955322265625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9066154956817627, + "rewards/margins": 12.589859008789062, + "rewards/rejected": -16.49647331237793, + "step": 10090 + }, + { + "epoch": 0.6, + "learning_rate": 4.846969003041904e-06, + "logits/chosen": -2.3401432037353516, + "logits/rejected": -1.7592684030532837, + "logps/chosen": -387.288330078125, + "logps/rejected": -1567.8284912109375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1910159587860107, + "rewards/margins": 12.050129890441895, + "rewards/rejected": -15.241145133972168, + "step": 10100 + }, + { + "epoch": 0.6, + "learning_rate": 4.84637088319338e-06, + "logits/chosen": -2.4184391498565674, + "logits/rejected": -1.7684799432754517, + "logps/chosen": -330.59210205078125, + "logps/rejected": -1426.1392822265625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5901951789855957, + "rewards/margins": 11.237054824829102, + "rewards/rejected": -13.827247619628906, + "step": 10110 + }, + { + "epoch": 0.6, + "learning_rate": 4.845771633801943e-06, + "logits/chosen": -2.4472100734710693, + "logits/rejected": -1.8372148275375366, + "logps/chosen": -332.27093505859375, + "logps/rejected": -1404.7626953125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.622218132019043, + "rewards/margins": 10.977821350097656, + "rewards/rejected": -13.600038528442383, + "step": 10120 + }, + { + "epoch": 0.6, + "learning_rate": 4.845171255156071e-06, + "logits/chosen": -2.4600298404693604, + "logits/rejected": -1.7609755992889404, + "logps/chosen": -323.5953063964844, + "logps/rejected": -1398.898193359375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.5469110012054443, + "rewards/margins": 11.002888679504395, + "rewards/rejected": -13.549799919128418, + "step": 10130 + }, + { + "epoch": 0.6, + "learning_rate": 4.844569747544788e-06, + "logits/chosen": -2.4434409141540527, + "logits/rejected": -1.8402849435806274, + "logps/chosen": -348.03173828125, + "logps/rejected": -1337.024658203125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7751474380493164, + "rewards/margins": 10.155153274536133, + "rewards/rejected": -12.93030071258545, + "step": 10140 + }, + { + "epoch": 0.61, + "learning_rate": 4.843967111257657e-06, + "logits/chosen": -2.5034077167510986, + "logits/rejected": -1.8496363162994385, + "logps/chosen": -333.05755615234375, + "logps/rejected": -1366.036865234375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6412298679351807, + "rewards/margins": 10.578937530517578, + "rewards/rejected": -13.220166206359863, + "step": 10150 + }, + { + "epoch": 0.61, + "learning_rate": 4.84336334658479e-06, + "logits/chosen": -2.3950326442718506, + "logits/rejected": -1.7771482467651367, + "logps/chosen": -337.4697265625, + "logps/rejected": -1352.304443359375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.665102243423462, + "rewards/margins": 10.40674877166748, + "rewards/rejected": -13.071850776672363, + "step": 10160 + }, + { + "epoch": 0.61, + "learning_rate": 4.842758453816836e-06, + "logits/chosen": -2.4889121055603027, + "logits/rejected": -1.764366865158081, + "logps/chosen": -355.99102783203125, + "logps/rejected": -1457.259033203125, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.7994675636291504, + "rewards/margins": 11.337617874145508, + "rewards/rejected": -14.137086868286133, + "step": 10170 + }, + { + "epoch": 0.61, + "learning_rate": 4.842152433244993e-06, + "logits/chosen": -2.337820529937744, + "logits/rejected": -1.6166445016860962, + "logps/chosen": -405.42205810546875, + "logps/rejected": -1538.40283203125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3574180603027344, + "rewards/margins": 11.585280418395996, + "rewards/rejected": -14.94269847869873, + "step": 10180 + }, + { + "epoch": 0.61, + "learning_rate": 4.841545285160999e-06, + "logits/chosen": -2.3392021656036377, + "logits/rejected": -1.5345871448516846, + "logps/chosen": -421.6534729003906, + "logps/rejected": -1579.59521484375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.498281478881836, + "rewards/margins": 11.843964576721191, + "rewards/rejected": -15.342245101928711, + "step": 10190 + }, + { + "epoch": 0.61, + "learning_rate": 4.840937009857134e-06, + "logits/chosen": -2.431466579437256, + "logits/rejected": -1.7546300888061523, + "logps/chosen": -424.0138244628906, + "logps/rejected": -1567.0355224609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.493412494659424, + "rewards/margins": 11.734922409057617, + "rewards/rejected": -15.228334426879883, + "step": 10200 + }, + { + "epoch": 0.61, + "learning_rate": 4.840327607626222e-06, + "logits/chosen": -2.425391674041748, + "logits/rejected": -1.6900001764297485, + "logps/chosen": -412.4649963378906, + "logps/rejected": -1586.9127197265625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.460069179534912, + "rewards/margins": 11.952142715454102, + "rewards/rejected": -15.412211418151855, + "step": 10210 + }, + { + "epoch": 0.61, + "learning_rate": 4.83971707876163e-06, + "logits/chosen": -2.3691442012786865, + "logits/rejected": -1.7089132070541382, + "logps/chosen": -402.077392578125, + "logps/rejected": -1523.695068359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.295611619949341, + "rewards/margins": 11.513331413269043, + "rewards/rejected": -14.808941841125488, + "step": 10220 + }, + { + "epoch": 0.61, + "learning_rate": 4.839105423557266e-06, + "logits/chosen": -2.4256014823913574, + "logits/rejected": -1.7248318195343018, + "logps/chosen": -416.3977966308594, + "logps/rejected": -1518.9757080078125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4559340476989746, + "rewards/margins": 11.29275131225586, + "rewards/rejected": -14.748684883117676, + "step": 10230 + }, + { + "epoch": 0.61, + "learning_rate": 4.838492642307582e-06, + "logits/chosen": -2.4299137592315674, + "logits/rejected": -1.8032993078231812, + "logps/chosen": -406.89056396484375, + "logps/rejected": -1589.9453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.305891513824463, + "rewards/margins": 12.145023345947266, + "rewards/rejected": -15.450912475585938, + "step": 10240 + }, + { + "epoch": 0.61, + "learning_rate": 4.837878735307568e-06, + "logits/chosen": -2.410402536392212, + "logits/rejected": -1.7604920864105225, + "logps/chosen": -423.964111328125, + "logps/rejected": -1531.781982421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.53564190864563, + "rewards/margins": 11.337638854980469, + "rewards/rejected": -14.873278617858887, + "step": 10250 + }, + { + "epoch": 0.61, + "learning_rate": 4.8372637028527615e-06, + "logits/chosen": -2.3629024028778076, + "logits/rejected": -1.7540137767791748, + "logps/chosen": -403.63214111328125, + "logps/rejected": -1510.798583984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3658180236816406, + "rewards/margins": 11.297067642211914, + "rewards/rejected": -14.662884712219238, + "step": 10260 + }, + { + "epoch": 0.61, + "learning_rate": 4.836647545239238e-06, + "logits/chosen": -2.3919742107391357, + "logits/rejected": -1.6631660461425781, + "logps/chosen": -399.8009338378906, + "logps/rejected": -1603.7806396484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3589606285095215, + "rewards/margins": 12.233489990234375, + "rewards/rejected": -15.592450141906738, + "step": 10270 + }, + { + "epoch": 0.61, + "learning_rate": 4.836030262763617e-06, + "logits/chosen": -2.447124719619751, + "logits/rejected": -1.775372862815857, + "logps/chosen": -403.81036376953125, + "logps/rejected": -1516.9560546875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3086230754852295, + "rewards/margins": 11.415384292602539, + "rewards/rejected": -14.724006652832031, + "step": 10280 + }, + { + "epoch": 0.61, + "learning_rate": 4.835411855723056e-06, + "logits/chosen": -2.388242244720459, + "logits/rejected": -1.6818279027938843, + "logps/chosen": -406.83880615234375, + "logps/rejected": -1558.1953125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3902041912078857, + "rewards/margins": 11.751229286193848, + "rewards/rejected": -15.14143180847168, + "step": 10290 + }, + { + "epoch": 0.61, + "learning_rate": 4.834792324415258e-06, + "logits/chosen": -2.3939270973205566, + "logits/rejected": -1.6922829151153564, + "logps/chosen": -420.74554443359375, + "logps/rejected": -1554.173583984375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.488687515258789, + "rewards/margins": 11.619302749633789, + "rewards/rejected": -15.107989311218262, + "step": 10300 + }, + { + "epoch": 0.61, + "learning_rate": 4.834171669138465e-06, + "logits/chosen": -2.331195116043091, + "logits/rejected": -1.5520906448364258, + "logps/chosen": -440.03363037109375, + "logps/rejected": -1694.6156005859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6979739665985107, + "rewards/margins": 12.810748100280762, + "rewards/rejected": -16.508724212646484, + "step": 10310 + }, + { + "epoch": 0.62, + "learning_rate": 4.83354989019146e-06, + "logits/chosen": -2.389326572418213, + "logits/rejected": -1.646441102027893, + "logps/chosen": -484.1309509277344, + "logps/rejected": -1529.164794921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1588921546936035, + "rewards/margins": 10.689064979553223, + "rewards/rejected": -14.847956657409668, + "step": 10320 + }, + { + "epoch": 0.62, + "learning_rate": 4.832926987873568e-06, + "logits/chosen": -2.3943145275115967, + "logits/rejected": -1.6569563150405884, + "logps/chosen": -485.146484375, + "logps/rejected": -1663.623779296875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.068404197692871, + "rewards/margins": 12.132455825805664, + "rewards/rejected": -16.20085906982422, + "step": 10330 + }, + { + "epoch": 0.62, + "learning_rate": 4.832302962484653e-06, + "logits/chosen": -2.3815627098083496, + "logits/rejected": -1.6946948766708374, + "logps/chosen": -438.61004638671875, + "logps/rejected": -1619.345458984375, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6708312034606934, + "rewards/margins": 12.082921981811523, + "rewards/rejected": -15.753750801086426, + "step": 10340 + }, + { + "epoch": 0.62, + "learning_rate": 4.831677814325122e-06, + "logits/chosen": -2.359438180923462, + "logits/rejected": -1.520464301109314, + "logps/chosen": -409.37939453125, + "logps/rejected": -1679.4814453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3877663612365723, + "rewards/margins": 12.956100463867188, + "rewards/rejected": -16.3438663482666, + "step": 10350 + }, + { + "epoch": 0.62, + "learning_rate": 4.83105154369592e-06, + "logits/chosen": -2.347923994064331, + "logits/rejected": -1.642067313194275, + "logps/chosen": -401.6037292480469, + "logps/rejected": -1658.721923828125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3122801780700684, + "rewards/margins": 12.839370727539062, + "rewards/rejected": -16.15165138244629, + "step": 10360 + }, + { + "epoch": 0.62, + "learning_rate": 4.830424150898536e-06, + "logits/chosen": -2.4089293479919434, + "logits/rejected": -1.8063852787017822, + "logps/chosen": -407.0728759765625, + "logps/rejected": -1680.395751953125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.405064105987549, + "rewards/margins": 12.949975967407227, + "rewards/rejected": -16.355037689208984, + "step": 10370 + }, + { + "epoch": 0.62, + "learning_rate": 4.8297956362349955e-06, + "logits/chosen": -2.3850860595703125, + "logits/rejected": -1.6219425201416016, + "logps/chosen": -472.61480712890625, + "logps/rejected": -1823.168701171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.03763484954834, + "rewards/margins": 13.751856803894043, + "rewards/rejected": -17.789493560791016, + "step": 10380 + }, + { + "epoch": 0.62, + "learning_rate": 4.8291660000078645e-06, + "logits/chosen": -2.386605739593506, + "logits/rejected": -1.6353731155395508, + "logps/chosen": -468.43328857421875, + "logps/rejected": -1725.983154296875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9814724922180176, + "rewards/margins": 12.836883544921875, + "rewards/rejected": -16.818355560302734, + "step": 10390 + }, + { + "epoch": 0.62, + "learning_rate": 4.828535242520251e-06, + "logits/chosen": -2.326812982559204, + "logits/rejected": -1.6147968769073486, + "logps/chosen": -493.498046875, + "logps/rejected": -1880.475830078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.250637531280518, + "rewards/margins": 14.105514526367188, + "rewards/rejected": -18.356151580810547, + "step": 10400 + }, + { + "epoch": 0.62, + "learning_rate": 4.8279033640758026e-06, + "logits/chosen": -2.334757089614868, + "logits/rejected": -1.6100568771362305, + "logps/chosen": -520.9363403320312, + "logps/rejected": -1734.78515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.537538051605225, + "rewards/margins": 12.366394996643066, + "rewards/rejected": -16.903932571411133, + "step": 10410 + }, + { + "epoch": 0.62, + "learning_rate": 4.8272703649787025e-06, + "logits/chosen": -2.285064935684204, + "logits/rejected": -1.6191980838775635, + "logps/chosen": -550.0294189453125, + "logps/rejected": -1853.9564208984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.809708118438721, + "rewards/margins": 13.280553817749023, + "rewards/rejected": -18.09026336669922, + "step": 10420 + }, + { + "epoch": 0.62, + "learning_rate": 4.82663624553368e-06, + "logits/chosen": -2.288691997528076, + "logits/rejected": -1.3741743564605713, + "logps/chosen": -537.1492919921875, + "logps/rejected": -1870.395751953125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.652230262756348, + "rewards/margins": 13.596692085266113, + "rewards/rejected": -18.24892234802246, + "step": 10430 + }, + { + "epoch": 0.62, + "learning_rate": 4.826001006045997e-06, + "logits/chosen": -2.277972936630249, + "logits/rejected": -1.5154342651367188, + "logps/chosen": -528.5621337890625, + "logps/rejected": -1802.9462890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.540243148803711, + "rewards/margins": 13.051379203796387, + "rewards/rejected": -17.591623306274414, + "step": 10440 + }, + { + "epoch": 0.62, + "learning_rate": 4.8253646468214605e-06, + "logits/chosen": -2.3220441341400146, + "logits/rejected": -1.634585976600647, + "logps/chosen": -515.4275512695312, + "logps/rejected": -1920.3470458984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3906354904174805, + "rewards/margins": 14.369260787963867, + "rewards/rejected": -18.759899139404297, + "step": 10450 + }, + { + "epoch": 0.62, + "learning_rate": 4.824727168166412e-06, + "logits/chosen": -2.3321425914764404, + "logits/rejected": -1.5589665174484253, + "logps/chosen": -467.23419189453125, + "logps/rejected": -1700.685791015625, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.968186855316162, + "rewards/margins": 12.597208023071289, + "rewards/rejected": -16.56539535522461, + "step": 10460 + }, + { + "epoch": 0.62, + "learning_rate": 4.824088570387735e-06, + "logits/chosen": -2.3666763305664062, + "logits/rejected": -1.5936152935028076, + "logps/chosen": -445.68463134765625, + "logps/rejected": -1639.165283203125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.741741895675659, + "rewards/margins": 12.212461471557617, + "rewards/rejected": -15.954205513000488, + "step": 10470 + }, + { + "epoch": 0.62, + "learning_rate": 4.8234488537928494e-06, + "logits/chosen": -2.442002773284912, + "logits/rejected": -1.8327852487564087, + "logps/chosen": -440.7886657714844, + "logps/rejected": -1636.8863525390625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.706958293914795, + "rewards/margins": 12.229646682739258, + "rewards/rejected": -15.936605453491211, + "step": 10480 + }, + { + "epoch": 0.63, + "learning_rate": 4.822808018689716e-06, + "logits/chosen": -2.3741378784179688, + "logits/rejected": -1.8274452686309814, + "logps/chosen": -439.83111572265625, + "logps/rejected": -1732.6029052734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.698521852493286, + "rewards/margins": 13.17846965789795, + "rewards/rejected": -16.876991271972656, + "step": 10490 + }, + { + "epoch": 0.63, + "learning_rate": 4.822166065386832e-06, + "logits/chosen": -2.3552584648132324, + "logits/rejected": -1.7586654424667358, + "logps/chosen": -452.31927490234375, + "logps/rejected": -1758.6083984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7766194343566895, + "rewards/margins": 13.359225273132324, + "rewards/rejected": -17.135845184326172, + "step": 10500 + }, + { + "epoch": 0.63, + "learning_rate": 4.821522994193233e-06, + "logits/chosen": -2.392125129699707, + "logits/rejected": -1.644513726234436, + "logps/chosen": -427.2276306152344, + "logps/rejected": -1725.9547119140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.665133237838745, + "rewards/margins": 13.152729988098145, + "rewards/rejected": -16.8178653717041, + "step": 10510 + }, + { + "epoch": 0.63, + "learning_rate": 4.820878805418494e-06, + "logits/chosen": -2.3738455772399902, + "logits/rejected": -1.7174644470214844, + "logps/chosen": -440.838134765625, + "logps/rejected": -1650.5198974609375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6799590587615967, + "rewards/margins": 12.372565269470215, + "rewards/rejected": -16.05252456665039, + "step": 10520 + }, + { + "epoch": 0.63, + "learning_rate": 4.820233499372728e-06, + "logits/chosen": -2.41011381149292, + "logits/rejected": -1.722967505455017, + "logps/chosen": -442.35516357421875, + "logps/rejected": -1593.0517578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6567656993865967, + "rewards/margins": 11.841913223266602, + "rewards/rejected": -15.498678207397461, + "step": 10530 + }, + { + "epoch": 0.63, + "learning_rate": 4.819587076366585e-06, + "logits/chosen": -2.422715902328491, + "logits/rejected": -1.5843665599822998, + "logps/chosen": -422.5785217285156, + "logps/rejected": -1741.5777587890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5665767192840576, + "rewards/margins": 13.402186393737793, + "rewards/rejected": -16.968761444091797, + "step": 10540 + }, + { + "epoch": 0.63, + "learning_rate": 4.8189395367112536e-06, + "logits/chosen": -2.3644866943359375, + "logits/rejected": -1.7805778980255127, + "logps/chosen": -431.19500732421875, + "logps/rejected": -1651.44140625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6359496116638184, + "rewards/margins": 12.432806015014648, + "rewards/rejected": -16.068756103515625, + "step": 10550 + }, + { + "epoch": 0.63, + "learning_rate": 4.8182908807184585e-06, + "logits/chosen": -2.4345521926879883, + "logits/rejected": -1.7543081045150757, + "logps/chosen": -450.5582580566406, + "logps/rejected": -1619.9764404296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7654616832733154, + "rewards/margins": 11.995326042175293, + "rewards/rejected": -15.760787963867188, + "step": 10560 + }, + { + "epoch": 0.63, + "learning_rate": 4.817641108700462e-06, + "logits/chosen": -2.3186638355255127, + "logits/rejected": -1.6900421380996704, + "logps/chosen": -453.237060546875, + "logps/rejected": -1823.6851806640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8061580657958984, + "rewards/margins": 13.974014282226562, + "rewards/rejected": -17.780170440673828, + "step": 10570 + }, + { + "epoch": 0.63, + "learning_rate": 4.816990220970066e-06, + "logits/chosen": -2.3311851024627686, + "logits/rejected": -1.6597343683242798, + "logps/chosen": -413.97222900390625, + "logps/rejected": -1627.908935546875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4992756843566895, + "rewards/margins": 12.351863861083984, + "rewards/rejected": -15.851140975952148, + "step": 10580 + }, + { + "epoch": 0.63, + "learning_rate": 4.816338217840607e-06, + "logits/chosen": -2.424058437347412, + "logits/rejected": -1.7421108484268188, + "logps/chosen": -378.2198181152344, + "logps/rejected": -1507.832275390625, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.135721445083618, + "rewards/margins": 11.497713088989258, + "rewards/rejected": -14.633435249328613, + "step": 10590 + }, + { + "epoch": 0.63, + "learning_rate": 4.8156850996259605e-06, + "logits/chosen": -2.4638330936431885, + "logits/rejected": -1.960537314414978, + "logps/chosen": -363.4701843261719, + "logps/rejected": -1360.508544921875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9542555809020996, + "rewards/margins": 10.200055122375488, + "rewards/rejected": -13.154312133789062, + "step": 10600 + }, + { + "epoch": 0.63, + "learning_rate": 4.815030866640535e-06, + "logits/chosen": -2.4479687213897705, + "logits/rejected": -1.901323676109314, + "logps/chosen": -358.86297607421875, + "logps/rejected": -1353.341064453125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.914776563644409, + "rewards/margins": 10.172582626342773, + "rewards/rejected": -13.087359428405762, + "step": 10610 + }, + { + "epoch": 0.63, + "learning_rate": 4.814375519199281e-06, + "logits/chosen": -2.4505529403686523, + "logits/rejected": -1.9368374347686768, + "logps/chosen": -358.7691955566406, + "logps/rejected": -1371.4451904296875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.92616605758667, + "rewards/margins": 10.341920852661133, + "rewards/rejected": -13.268086433410645, + "step": 10620 + }, + { + "epoch": 0.63, + "learning_rate": 4.813719057617681e-06, + "logits/chosen": -2.4287517070770264, + "logits/rejected": -1.7484779357910156, + "logps/chosen": -362.61236572265625, + "logps/rejected": -1416.947998046875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.96405291557312, + "rewards/margins": 10.73793888092041, + "rewards/rejected": -13.701990127563477, + "step": 10630 + }, + { + "epoch": 0.63, + "learning_rate": 4.813061482211756e-06, + "logits/chosen": -2.3969686031341553, + "logits/rejected": -1.7886526584625244, + "logps/chosen": -364.1539001464844, + "logps/rejected": -1336.5904541015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.9386534690856934, + "rewards/margins": 9.983973503112793, + "rewards/rejected": -12.922627449035645, + "step": 10640 + }, + { + "epoch": 0.64, + "learning_rate": 4.812402793298063e-06, + "logits/chosen": -2.484844446182251, + "logits/rejected": -1.9941890239715576, + "logps/chosen": -378.67626953125, + "logps/rejected": -1410.861328125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.061366558074951, + "rewards/margins": 10.585947036743164, + "rewards/rejected": -13.647314071655273, + "step": 10650 + }, + { + "epoch": 0.64, + "learning_rate": 4.811742991193696e-06, + "logits/chosen": -2.489816188812256, + "logits/rejected": -1.8491714000701904, + "logps/chosen": -373.17706298828125, + "logps/rejected": -1422.6107177734375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.998322010040283, + "rewards/margins": 10.78022575378418, + "rewards/rejected": -13.778547286987305, + "step": 10660 + }, + { + "epoch": 0.64, + "learning_rate": 4.811082076216282e-06, + "logits/chosen": -2.434635639190674, + "logits/rejected": -1.8998874425888062, + "logps/chosen": -370.3057556152344, + "logps/rejected": -1358.1097412109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.01652455329895, + "rewards/margins": 10.110391616821289, + "rewards/rejected": -13.126916885375977, + "step": 10670 + }, + { + "epoch": 0.64, + "learning_rate": 4.810420048683985e-06, + "logits/chosen": -2.4950270652770996, + "logits/rejected": -1.8343188762664795, + "logps/chosen": -378.42681884765625, + "logps/rejected": -1395.0272216796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.126363754272461, + "rewards/margins": 10.380609512329102, + "rewards/rejected": -13.506973266601562, + "step": 10680 + }, + { + "epoch": 0.64, + "learning_rate": 4.809756908915506e-06, + "logits/chosen": -2.4856302738189697, + "logits/rejected": -1.9295085668563843, + "logps/chosen": -372.24700927734375, + "logps/rejected": -1355.0445556640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.058403968811035, + "rewards/margins": 10.056503295898438, + "rewards/rejected": -13.114906311035156, + "step": 10690 + }, + { + "epoch": 0.64, + "learning_rate": 4.8090926572300814e-06, + "logits/chosen": -2.4835004806518555, + "logits/rejected": -1.9360570907592773, + "logps/chosen": -382.4851989746094, + "logps/rejected": -1435.0516357421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.1162006855010986, + "rewards/margins": 10.78503704071045, + "rewards/rejected": -13.901237487792969, + "step": 10700 + }, + { + "epoch": 0.64, + "learning_rate": 4.808427293947481e-06, + "logits/chosen": -2.3993053436279297, + "logits/rejected": -1.7879343032836914, + "logps/chosen": -392.18499755859375, + "logps/rejected": -1498.912841796875, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2353806495666504, + "rewards/margins": 11.307134628295898, + "rewards/rejected": -14.542515754699707, + "step": 10710 + }, + { + "epoch": 0.64, + "learning_rate": 4.807760819388011e-06, + "logits/chosen": -2.4167122840881348, + "logits/rejected": -1.6986404657363892, + "logps/chosen": -445.84490966796875, + "logps/rejected": -1660.4017333984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7174277305603027, + "rewards/margins": 12.45008659362793, + "rewards/rejected": -16.16751480102539, + "step": 10720 + }, + { + "epoch": 0.64, + "learning_rate": 4.8070932338725104e-06, + "logits/chosen": -2.424090623855591, + "logits/rejected": -1.735875129699707, + "logps/chosen": -466.07916259765625, + "logps/rejected": -1687.2701416015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9795804023742676, + "rewards/margins": 12.44456672668457, + "rewards/rejected": -16.424148559570312, + "step": 10730 + }, + { + "epoch": 0.64, + "learning_rate": 4.806424537722359e-06, + "logits/chosen": -2.3999056816101074, + "logits/rejected": -1.6302611827850342, + "logps/chosen": -475.02618408203125, + "logps/rejected": -1633.0286865234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9988512992858887, + "rewards/margins": 11.889790534973145, + "rewards/rejected": -15.888644218444824, + "step": 10740 + }, + { + "epoch": 0.64, + "learning_rate": 4.805754731259462e-06, + "logits/chosen": -2.380183219909668, + "logits/rejected": -1.6425431966781616, + "logps/chosen": -462.166259765625, + "logps/rejected": -1703.831787109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.891911029815674, + "rewards/margins": 12.703222274780273, + "rewards/rejected": -16.595130920410156, + "step": 10750 + }, + { + "epoch": 0.64, + "learning_rate": 4.805083814806268e-06, + "logits/chosen": -2.4024760723114014, + "logits/rejected": -1.6329338550567627, + "logps/chosen": -474.11309814453125, + "logps/rejected": -1771.0625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9499411582946777, + "rewards/margins": 13.323297500610352, + "rewards/rejected": -17.273237228393555, + "step": 10760 + }, + { + "epoch": 0.64, + "learning_rate": 4.804411788685755e-06, + "logits/chosen": -2.4167866706848145, + "logits/rejected": -1.749985933303833, + "logps/chosen": -474.93780517578125, + "logps/rejected": -1732.588623046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9755377769470215, + "rewards/margins": 12.896550178527832, + "rewards/rejected": -16.872089385986328, + "step": 10770 + }, + { + "epoch": 0.64, + "learning_rate": 4.803738653221436e-06, + "logits/chosen": -2.3928921222686768, + "logits/rejected": -1.6314500570297241, + "logps/chosen": -485.0201110839844, + "logps/rejected": -1755.812255859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.119322299957275, + "rewards/margins": 12.992897033691406, + "rewards/rejected": -17.112218856811523, + "step": 10780 + }, + { + "epoch": 0.64, + "learning_rate": 4.8030644087373586e-06, + "logits/chosen": -2.4087977409362793, + "logits/rejected": -1.6895554065704346, + "logps/chosen": -508.90130615234375, + "logps/rejected": -1799.7301025390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.36983060836792, + "rewards/margins": 13.1942138671875, + "rewards/rejected": -17.56404685974121, + "step": 10790 + }, + { + "epoch": 0.64, + "learning_rate": 4.802389055558105e-06, + "logits/chosen": -2.4139175415039062, + "logits/rejected": -1.6875712871551514, + "logps/chosen": -484.192138671875, + "logps/rejected": -1629.8021240234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.104156494140625, + "rewards/margins": 11.754647254943848, + "rewards/rejected": -15.858804702758789, + "step": 10800 + }, + { + "epoch": 0.64, + "learning_rate": 4.801712594008789e-06, + "logits/chosen": -2.471346855163574, + "logits/rejected": -1.7936718463897705, + "logps/chosen": -477.7604064941406, + "logps/rejected": -1794.0582275390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.052040100097656, + "rewards/margins": 13.45250129699707, + "rewards/rejected": -17.504541397094727, + "step": 10810 + }, + { + "epoch": 0.65, + "learning_rate": 4.801035024415058e-06, + "logits/chosen": -2.443294048309326, + "logits/rejected": -1.5765907764434814, + "logps/chosen": -479.3494567871094, + "logps/rejected": -1731.2728271484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.111283302307129, + "rewards/margins": 12.767311096191406, + "rewards/rejected": -16.87859344482422, + "step": 10820 + }, + { + "epoch": 0.65, + "learning_rate": 4.8003563471030974e-06, + "logits/chosen": -2.335731029510498, + "logits/rejected": -1.6924642324447632, + "logps/chosen": -469.7325744628906, + "logps/rejected": -1559.031494140625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.004734992980957, + "rewards/margins": 11.128767013549805, + "rewards/rejected": -15.133502006530762, + "step": 10830 + }, + { + "epoch": 0.65, + "learning_rate": 4.79967656239962e-06, + "logits/chosen": -2.40045428276062, + "logits/rejected": -1.6797945499420166, + "logps/chosen": -478.454345703125, + "logps/rejected": -1600.3480224609375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.060263633728027, + "rewards/margins": 11.51141357421875, + "rewards/rejected": -15.571676254272461, + "step": 10840 + }, + { + "epoch": 0.65, + "learning_rate": 4.7989956706318754e-06, + "logits/chosen": -2.36832332611084, + "logits/rejected": -1.6573588848114014, + "logps/chosen": -475.73699951171875, + "logps/rejected": -1783.6314697265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.061291694641113, + "rewards/margins": 13.32170295715332, + "rewards/rejected": -17.382991790771484, + "step": 10850 + }, + { + "epoch": 0.65, + "learning_rate": 4.7983136721276435e-06, + "logits/chosen": -2.3840110301971436, + "logits/rejected": -1.6780433654785156, + "logps/chosen": -468.4017639160156, + "logps/rejected": -1701.1923828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0172038078308105, + "rewards/margins": 12.55692195892334, + "rewards/rejected": -16.574129104614258, + "step": 10860 + }, + { + "epoch": 0.65, + "learning_rate": 4.79763056721524e-06, + "logits/chosen": -2.4315314292907715, + "logits/rejected": -1.6236121654510498, + "logps/chosen": -470.40594482421875, + "logps/rejected": -1820.8499755859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.062841892242432, + "rewards/margins": 13.699935913085938, + "rewards/rejected": -17.76277732849121, + "step": 10870 + }, + { + "epoch": 0.65, + "learning_rate": 4.79694635622351e-06, + "logits/chosen": -2.3756027221679688, + "logits/rejected": -1.7620656490325928, + "logps/chosen": -471.645263671875, + "logps/rejected": -1707.468505859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.070831298828125, + "rewards/margins": 12.549368858337402, + "rewards/rejected": -16.62019920349121, + "step": 10880 + }, + { + "epoch": 0.65, + "learning_rate": 4.796261039481833e-06, + "logits/chosen": -2.3665077686309814, + "logits/rejected": -1.6557515859603882, + "logps/chosen": -476.62835693359375, + "logps/rejected": -1742.8365478515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.025222301483154, + "rewards/margins": 12.962862968444824, + "rewards/rejected": -16.988086700439453, + "step": 10890 + }, + { + "epoch": 0.65, + "learning_rate": 4.795574617320122e-06, + "logits/chosen": -2.3830885887145996, + "logits/rejected": -1.696171522140503, + "logps/chosen": -471.7518615722656, + "logps/rejected": -1761.0286865234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.062018394470215, + "rewards/margins": 13.111948013305664, + "rewards/rejected": -17.173965454101562, + "step": 10900 + }, + { + "epoch": 0.65, + "learning_rate": 4.794887090068819e-06, + "logits/chosen": -2.455639362335205, + "logits/rejected": -1.715903639793396, + "logps/chosen": -470.38397216796875, + "logps/rejected": -1768.4111328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.935781955718994, + "rewards/margins": 13.294756889343262, + "rewards/rejected": -17.230539321899414, + "step": 10910 + }, + { + "epoch": 0.65, + "learning_rate": 4.7941984580589e-06, + "logits/chosen": -2.4070584774017334, + "logits/rejected": -1.7098388671875, + "logps/chosen": -478.38970947265625, + "logps/rejected": -1675.5306396484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.063201904296875, + "rewards/margins": 12.256269454956055, + "rewards/rejected": -16.31947135925293, + "step": 10920 + }, + { + "epoch": 0.65, + "learning_rate": 4.793508721621873e-06, + "logits/chosen": -2.3856096267700195, + "logits/rejected": -1.7090904712677002, + "logps/chosen": -489.3643493652344, + "logps/rejected": -1790.7568359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.186953544616699, + "rewards/margins": 13.28278636932373, + "rewards/rejected": -17.46973991394043, + "step": 10930 + }, + { + "epoch": 0.65, + "learning_rate": 4.792817881089776e-06, + "logits/chosen": -2.4146265983581543, + "logits/rejected": -1.714914083480835, + "logps/chosen": -461.8384704589844, + "logps/rejected": -1759.571044921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9475905895233154, + "rewards/margins": 13.20305061340332, + "rewards/rejected": -17.1506404876709, + "step": 10940 + }, + { + "epoch": 0.65, + "learning_rate": 4.7921259367951804e-06, + "logits/chosen": -2.3841519355773926, + "logits/rejected": -1.64617121219635, + "logps/chosen": -466.982666015625, + "logps/rejected": -1757.746337890625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9655158519744873, + "rewards/margins": 13.154589653015137, + "rewards/rejected": -17.120107650756836, + "step": 10950 + }, + { + "epoch": 0.65, + "learning_rate": 4.791432889071188e-06, + "logits/chosen": -2.3848910331726074, + "logits/rejected": -1.595414400100708, + "logps/chosen": -479.24432373046875, + "logps/rejected": -1822.73828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.053755283355713, + "rewards/margins": 13.7352294921875, + "rewards/rejected": -17.788986206054688, + "step": 10960 + }, + { + "epoch": 0.65, + "learning_rate": 4.790738738251433e-06, + "logits/chosen": -2.3665502071380615, + "logits/rejected": -1.7311710119247437, + "logps/chosen": -481.85504150390625, + "logps/rejected": -1635.907470703125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.055682182312012, + "rewards/margins": 11.855097770690918, + "rewards/rejected": -15.910778999328613, + "step": 10970 + }, + { + "epoch": 0.65, + "learning_rate": 4.790043484670077e-06, + "logits/chosen": -2.411581039428711, + "logits/rejected": -1.723153829574585, + "logps/chosen": -469.2854919433594, + "logps/rejected": -1734.703857421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9354655742645264, + "rewards/margins": 12.970283508300781, + "rewards/rejected": -16.90574836730957, + "step": 10980 + }, + { + "epoch": 0.66, + "learning_rate": 4.789347128661818e-06, + "logits/chosen": -2.4055373668670654, + "logits/rejected": -1.6711671352386475, + "logps/chosen": -480.6114196777344, + "logps/rejected": -1717.4544677734375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.100053310394287, + "rewards/margins": 12.647298812866211, + "rewards/rejected": -16.74735450744629, + "step": 10990 + }, + { + "epoch": 0.66, + "learning_rate": 4.78864967056188e-06, + "logits/chosen": -2.3720622062683105, + "logits/rejected": -1.663785696029663, + "logps/chosen": -470.90057373046875, + "logps/rejected": -1782.051513671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.02803373336792, + "rewards/margins": 13.337725639343262, + "rewards/rejected": -17.36575698852539, + "step": 11000 + }, + { + "epoch": 0.66, + "learning_rate": 4.787951110706019e-06, + "logits/chosen": -2.402211904525757, + "logits/rejected": -1.7067632675170898, + "logps/chosen": -483.69268798828125, + "logps/rejected": -1821.269287109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.101792335510254, + "rewards/margins": 13.659643173217773, + "rewards/rejected": -17.76143455505371, + "step": 11010 + }, + { + "epoch": 0.66, + "learning_rate": 4.787251449430524e-06, + "logits/chosen": -2.373643159866333, + "logits/rejected": -1.6885570287704468, + "logps/chosen": -483.84283447265625, + "logps/rejected": -1721.71484375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1190595626831055, + "rewards/margins": 12.644073486328125, + "rewards/rejected": -16.763134002685547, + "step": 11020 + }, + { + "epoch": 0.66, + "learning_rate": 4.78655068707221e-06, + "logits/chosen": -2.4007813930511475, + "logits/rejected": -1.6634540557861328, + "logps/chosen": -486.077392578125, + "logps/rejected": -1772.9068603515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.12137508392334, + "rewards/margins": 13.161947250366211, + "rewards/rejected": -17.283323287963867, + "step": 11030 + }, + { + "epoch": 0.66, + "learning_rate": 4.785848823968424e-06, + "logits/chosen": -2.35125994682312, + "logits/rejected": -1.711793303489685, + "logps/chosen": -497.80072021484375, + "logps/rejected": -1730.226806640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.221961498260498, + "rewards/margins": 12.62718391418457, + "rewards/rejected": -16.849145889282227, + "step": 11040 + }, + { + "epoch": 0.66, + "learning_rate": 4.785145860457043e-06, + "logits/chosen": -2.398834705352783, + "logits/rejected": -1.675968885421753, + "logps/chosen": -470.303466796875, + "logps/rejected": -1876.0146484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.966952085494995, + "rewards/margins": 14.342222213745117, + "rewards/rejected": -18.309175491333008, + "step": 11050 + }, + { + "epoch": 0.66, + "learning_rate": 4.784441796876475e-06, + "logits/chosen": -2.3945345878601074, + "logits/rejected": -1.7154890298843384, + "logps/chosen": -471.35565185546875, + "logps/rejected": -1656.666748046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0002899169921875, + "rewards/margins": 12.139408111572266, + "rewards/rejected": -16.139698028564453, + "step": 11060 + }, + { + "epoch": 0.66, + "learning_rate": 4.783736633565654e-06, + "logits/chosen": -2.4120919704437256, + "logits/rejected": -1.5458683967590332, + "logps/chosen": -486.00341796875, + "logps/rejected": -1781.938232421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.12947940826416, + "rewards/margins": 13.233192443847656, + "rewards/rejected": -17.362672805786133, + "step": 11070 + }, + { + "epoch": 0.66, + "learning_rate": 4.783030370864047e-06, + "logits/chosen": -2.3877146244049072, + "logits/rejected": -1.6002957820892334, + "logps/chosen": -479.8301696777344, + "logps/rejected": -1798.423583984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.075800895690918, + "rewards/margins": 13.448709487915039, + "rewards/rejected": -17.524511337280273, + "step": 11080 + }, + { + "epoch": 0.66, + "learning_rate": 4.782323009111649e-06, + "logits/chosen": -2.4369895458221436, + "logits/rejected": -1.7509247064590454, + "logps/chosen": -492.02093505859375, + "logps/rejected": -1747.539794921875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.172533988952637, + "rewards/margins": 12.852145195007324, + "rewards/rejected": -17.024677276611328, + "step": 11090 + }, + { + "epoch": 0.66, + "learning_rate": 4.781614548648983e-06, + "logits/chosen": -2.3867292404174805, + "logits/rejected": -1.6485340595245361, + "logps/chosen": -477.9795837402344, + "logps/rejected": -1757.068603515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.079047203063965, + "rewards/margins": 13.05724048614502, + "rewards/rejected": -17.13628578186035, + "step": 11100 + }, + { + "epoch": 0.66, + "learning_rate": 4.780904989817103e-06, + "logits/chosen": -2.4053516387939453, + "logits/rejected": -1.777919054031372, + "logps/chosen": -491.32476806640625, + "logps/rejected": -1750.979736328125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.16312313079834, + "rewards/margins": 12.910249710083008, + "rewards/rejected": -17.07337188720703, + "step": 11110 + }, + { + "epoch": 0.66, + "learning_rate": 4.780194332957589e-06, + "logits/chosen": -2.3639209270477295, + "logits/rejected": -1.6596457958221436, + "logps/chosen": -483.55548095703125, + "logps/rejected": -1765.512939453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.135526657104492, + "rewards/margins": 13.075935363769531, + "rewards/rejected": -17.211462020874023, + "step": 11120 + }, + { + "epoch": 0.66, + "learning_rate": 4.779482578412553e-06, + "logits/chosen": -2.4413161277770996, + "logits/rejected": -1.6614997386932373, + "logps/chosen": -485.2254333496094, + "logps/rejected": -1820.990234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.116025447845459, + "rewards/margins": 13.643879890441895, + "rewards/rejected": -17.759906768798828, + "step": 11130 + }, + { + "epoch": 0.66, + "learning_rate": 4.778769726524632e-06, + "logits/chosen": -2.400456428527832, + "logits/rejected": -1.6365245580673218, + "logps/chosen": -495.5403747558594, + "logps/rejected": -1728.3043212890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.233815670013428, + "rewards/margins": 12.598709106445312, + "rewards/rejected": -16.8325252532959, + "step": 11140 + }, + { + "epoch": 0.66, + "learning_rate": 4.778055777636994e-06, + "logits/chosen": -2.3885700702667236, + "logits/rejected": -1.6206862926483154, + "logps/chosen": -470.1903381347656, + "logps/rejected": -1700.276611328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.015231132507324, + "rewards/margins": 12.541441917419434, + "rewards/rejected": -16.55667495727539, + "step": 11150 + }, + { + "epoch": 0.67, + "learning_rate": 4.7773407320933345e-06, + "logits/chosen": -2.341071844100952, + "logits/rejected": -1.5821607112884521, + "logps/chosen": -501.5738220214844, + "logps/rejected": -1764.1099853515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.282618045806885, + "rewards/margins": 12.90587043762207, + "rewards/rejected": -17.18848991394043, + "step": 11160 + }, + { + "epoch": 0.67, + "learning_rate": 4.7766245902378746e-06, + "logits/chosen": -2.405628204345703, + "logits/rejected": -1.6901309490203857, + "logps/chosen": -484.613525390625, + "logps/rejected": -1860.5576171875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.117055416107178, + "rewards/margins": 14.049810409545898, + "rewards/rejected": -18.166866302490234, + "step": 11170 + }, + { + "epoch": 0.67, + "learning_rate": 4.775907352415367e-06, + "logits/chosen": -2.390310764312744, + "logits/rejected": -1.6379791498184204, + "logps/chosen": -502.56427001953125, + "logps/rejected": -1819.715087890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.275322437286377, + "rewards/margins": 13.483843803405762, + "rewards/rejected": -17.759166717529297, + "step": 11180 + }, + { + "epoch": 0.67, + "learning_rate": 4.775189018971088e-06, + "logits/chosen": -2.3010413646698, + "logits/rejected": -1.507775068283081, + "logps/chosen": -553.9208984375, + "logps/rejected": -1742.3023681640625, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.799193382263184, + "rewards/margins": 12.174764633178711, + "rewards/rejected": -16.973957061767578, + "step": 11190 + }, + { + "epoch": 0.67, + "learning_rate": 4.774469590250845e-06, + "logits/chosen": -2.276634693145752, + "logits/rejected": -1.623764991760254, + "logps/chosen": -529.0465087890625, + "logps/rejected": -1817.975830078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.552394390106201, + "rewards/margins": 13.190338134765625, + "rewards/rejected": -17.742733001708984, + "step": 11200 + }, + { + "epoch": 0.67, + "learning_rate": 4.77374906660097e-06, + "logits/chosen": -2.3602230548858643, + "logits/rejected": -1.6185203790664673, + "logps/chosen": -507.46533203125, + "logps/rejected": -1753.614013671875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.438468933105469, + "rewards/margins": 12.663567543029785, + "rewards/rejected": -17.102035522460938, + "step": 11210 + }, + { + "epoch": 0.67, + "learning_rate": 4.773027448368323e-06, + "logits/chosen": -2.4209611415863037, + "logits/rejected": -1.6464436054229736, + "logps/chosen": -522.66552734375, + "logps/rejected": -1782.440673828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.499823093414307, + "rewards/margins": 12.87523078918457, + "rewards/rejected": -17.37505531311035, + "step": 11220 + }, + { + "epoch": 0.67, + "learning_rate": 4.772304735900292e-06, + "logits/chosen": -2.4402260780334473, + "logits/rejected": -1.7294301986694336, + "logps/chosen": -540.256103515625, + "logps/rejected": -1860.7034912109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.683165550231934, + "rewards/margins": 13.485310554504395, + "rewards/rejected": -18.16847801208496, + "step": 11230 + }, + { + "epoch": 0.67, + "learning_rate": 4.77158092954479e-06, + "logits/chosen": -2.3789525032043457, + "logits/rejected": -1.6117260456085205, + "logps/chosen": -534.08740234375, + "logps/rejected": -1738.7718505859375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.583670139312744, + "rewards/margins": 12.362726211547852, + "rewards/rejected": -16.946395874023438, + "step": 11240 + }, + { + "epoch": 0.67, + "learning_rate": 4.770856029650257e-06, + "logits/chosen": -2.3473048210144043, + "logits/rejected": -1.6396753787994385, + "logps/chosen": -523.8961181640625, + "logps/rejected": -1745.4652099609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.602988243103027, + "rewards/margins": 12.415270805358887, + "rewards/rejected": -17.018260955810547, + "step": 11250 + }, + { + "epoch": 0.67, + "learning_rate": 4.770130036565661e-06, + "logits/chosen": -2.3774609565734863, + "logits/rejected": -1.5284485816955566, + "logps/chosen": -530.5358276367188, + "logps/rejected": -1785.581298828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.605923652648926, + "rewards/margins": 12.806302070617676, + "rewards/rejected": -17.4122257232666, + "step": 11260 + }, + { + "epoch": 0.67, + "learning_rate": 4.769402950640494e-06, + "logits/chosen": -2.3770947456359863, + "logits/rejected": -1.6242234706878662, + "logps/chosen": -546.1591796875, + "logps/rejected": -1839.656494140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.684296131134033, + "rewards/margins": 13.258468627929688, + "rewards/rejected": -17.942764282226562, + "step": 11270 + }, + { + "epoch": 0.67, + "learning_rate": 4.768674772224775e-06, + "logits/chosen": -2.333592414855957, + "logits/rejected": -1.4681684970855713, + "logps/chosen": -533.7803955078125, + "logps/rejected": -1936.9234619140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.564316272735596, + "rewards/margins": 14.351163864135742, + "rewards/rejected": -18.915481567382812, + "step": 11280 + }, + { + "epoch": 0.67, + "learning_rate": 4.7679455016690505e-06, + "logits/chosen": -2.3788349628448486, + "logits/rejected": -1.699690818786621, + "logps/chosen": -532.643798828125, + "logps/rejected": -1809.130126953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6271772384643555, + "rewards/margins": 13.038714408874512, + "rewards/rejected": -17.665891647338867, + "step": 11290 + }, + { + "epoch": 0.67, + "learning_rate": 4.767215139324389e-06, + "logits/chosen": -2.371720790863037, + "logits/rejected": -1.5835331678390503, + "logps/chosen": -532.1543579101562, + "logps/rejected": -1828.1839599609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.631621360778809, + "rewards/margins": 13.198695182800293, + "rewards/rejected": -17.8303165435791, + "step": 11300 + }, + { + "epoch": 0.67, + "learning_rate": 4.766483685542389e-06, + "logits/chosen": -2.3495535850524902, + "logits/rejected": -1.5643807649612427, + "logps/chosen": -549.4993896484375, + "logps/rejected": -1805.1138916015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8024468421936035, + "rewards/margins": 12.811836242675781, + "rewards/rejected": -17.614282608032227, + "step": 11310 + }, + { + "epoch": 0.68, + "learning_rate": 4.765751140675172e-06, + "logits/chosen": -2.362119197845459, + "logits/rejected": -1.5732431411743164, + "logps/chosen": -542.1395874023438, + "logps/rejected": -1881.1351318359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.693280220031738, + "rewards/margins": 13.671399116516113, + "rewards/rejected": -18.36467933654785, + "step": 11320 + }, + { + "epoch": 0.68, + "learning_rate": 4.765017505075385e-06, + "logits/chosen": -2.324275016784668, + "logits/rejected": -1.5061105489730835, + "logps/chosen": -543.86767578125, + "logps/rejected": -1804.549560546875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7442626953125, + "rewards/margins": 12.870135307312012, + "rewards/rejected": -17.614398956298828, + "step": 11330 + }, + { + "epoch": 0.68, + "learning_rate": 4.764282779096199e-06, + "logits/chosen": -2.3527109622955322, + "logits/rejected": -1.634735345840454, + "logps/chosen": -528.4219360351562, + "logps/rejected": -1752.4595947265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.54738712310791, + "rewards/margins": 12.53050708770752, + "rewards/rejected": -17.077892303466797, + "step": 11340 + }, + { + "epoch": 0.68, + "learning_rate": 4.763546963091313e-06, + "logits/chosen": -2.3855514526367188, + "logits/rejected": -1.6511262655258179, + "logps/chosen": -527.1305541992188, + "logps/rejected": -1777.503173828125, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.527438163757324, + "rewards/margins": 12.815763473510742, + "rewards/rejected": -17.34320068359375, + "step": 11350 + }, + { + "epoch": 0.68, + "learning_rate": 4.762810057414947e-06, + "logits/chosen": -2.2824103832244873, + "logits/rejected": -1.6729905605316162, + "logps/chosen": -631.6561889648438, + "logps/rejected": -1984.9779052734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.635398864746094, + "rewards/margins": 13.773590087890625, + "rewards/rejected": -19.408987045288086, + "step": 11360 + }, + { + "epoch": 0.68, + "learning_rate": 4.762072062421849e-06, + "logits/chosen": -2.3817358016967773, + "logits/rejected": -1.672959566116333, + "logps/chosen": -681.3734130859375, + "logps/rejected": -1926.8841552734375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0642476081848145, + "rewards/margins": 12.765110969543457, + "rewards/rejected": -18.829357147216797, + "step": 11370 + }, + { + "epoch": 0.68, + "learning_rate": 4.761332978467288e-06, + "logits/chosen": -2.366304636001587, + "logits/rejected": -1.5817081928253174, + "logps/chosen": -745.6350708007812, + "logps/rejected": -1915.262939453125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.71566104888916, + "rewards/margins": 11.985700607299805, + "rewards/rejected": -18.70136260986328, + "step": 11380 + }, + { + "epoch": 0.68, + "learning_rate": 4.760592805907062e-06, + "logits/chosen": -2.281949520111084, + "logits/rejected": -1.4942142963409424, + "logps/chosen": -1101.4459228515625, + "logps/rejected": -2350.949951171875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.330012321472168, + "rewards/margins": 12.725725173950195, + "rewards/rejected": -23.055742263793945, + "step": 11390 + }, + { + "epoch": 0.68, + "learning_rate": 4.759851545097486e-06, + "logits/chosen": -2.2660880088806152, + "logits/rejected": -1.246002435684204, + "logps/chosen": -1183.12841796875, + "logps/rejected": -2470.151123046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.112754821777344, + "rewards/margins": 13.149347305297852, + "rewards/rejected": -24.262102127075195, + "step": 11400 + }, + { + "epoch": 0.68, + "learning_rate": 4.759109196395406e-06, + "logits/chosen": -2.275607109069824, + "logits/rejected": -1.3746263980865479, + "logps/chosen": -1039.9368896484375, + "logps/rejected": -2230.408935546875, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.707905769348145, + "rewards/margins": 12.143758773803711, + "rewards/rejected": -21.85166358947754, + "step": 11410 + }, + { + "epoch": 0.68, + "learning_rate": 4.758365760158187e-06, + "logits/chosen": -2.371828079223633, + "logits/rejected": -1.5970077514648438, + "logps/chosen": -673.7218017578125, + "logps/rejected": -2022.1361083984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.028324604034424, + "rewards/margins": 13.744590759277344, + "rewards/rejected": -19.77291488647461, + "step": 11420 + }, + { + "epoch": 0.68, + "learning_rate": 4.75762123674372e-06, + "logits/chosen": -2.4293885231018066, + "logits/rejected": -1.6689815521240234, + "logps/chosen": -583.9432373046875, + "logps/rejected": -1925.6868896484375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.11368465423584, + "rewards/margins": 13.692182540893555, + "rewards/rejected": -18.805866241455078, + "step": 11430 + }, + { + "epoch": 0.68, + "learning_rate": 4.756875626510419e-06, + "logits/chosen": -2.44722580909729, + "logits/rejected": -1.8556969165802002, + "logps/chosen": -532.3082275390625, + "logps/rejected": -1787.7064208984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.638726711273193, + "rewards/margins": 12.797677993774414, + "rewards/rejected": -17.436405181884766, + "step": 11440 + }, + { + "epoch": 0.68, + "learning_rate": 4.756128929817219e-06, + "logits/chosen": -2.4094948768615723, + "logits/rejected": -1.7311744689941406, + "logps/chosen": -530.28955078125, + "logps/rejected": -1743.2698974609375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.601834297180176, + "rewards/margins": 12.394760131835938, + "rewards/rejected": -16.996593475341797, + "step": 11450 + }, + { + "epoch": 0.68, + "learning_rate": 4.755381147023582e-06, + "logits/chosen": -2.4512524604797363, + "logits/rejected": -1.807507872581482, + "logps/chosen": -530.267578125, + "logps/rejected": -1792.203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5720906257629395, + "rewards/margins": 12.902212142944336, + "rewards/rejected": -17.474302291870117, + "step": 11460 + }, + { + "epoch": 0.68, + "learning_rate": 4.754632278489489e-06, + "logits/chosen": -2.4319379329681396, + "logits/rejected": -1.842974066734314, + "logps/chosen": -540.2233276367188, + "logps/rejected": -1736.3255615234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.658278942108154, + "rewards/margins": 12.274105072021484, + "rewards/rejected": -16.932384490966797, + "step": 11470 + }, + { + "epoch": 0.68, + "learning_rate": 4.753882324575447e-06, + "logits/chosen": -2.437544584274292, + "logits/rejected": -1.7368974685668945, + "logps/chosen": -532.0233154296875, + "logps/rejected": -1800.3697509765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.616508960723877, + "rewards/margins": 12.933300971984863, + "rewards/rejected": -17.549808502197266, + "step": 11480 + }, + { + "epoch": 0.69, + "learning_rate": 4.7531312856424814e-06, + "logits/chosen": -2.3994462490081787, + "logits/rejected": -1.7582452297210693, + "logps/chosen": -551.3986206054688, + "logps/rejected": -1761.537109375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.818617820739746, + "rewards/margins": 12.342573165893555, + "rewards/rejected": -17.161191940307617, + "step": 11490 + }, + { + "epoch": 0.69, + "learning_rate": 4.752379162052145e-06, + "logits/chosen": -2.441347599029541, + "logits/rejected": -1.7678686380386353, + "logps/chosen": -497.3070373535156, + "logps/rejected": -1772.4595947265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.252627372741699, + "rewards/margins": 13.032785415649414, + "rewards/rejected": -17.285411834716797, + "step": 11500 + }, + { + "epoch": 0.69, + "learning_rate": 4.7516259541665075e-06, + "logits/chosen": -2.379274368286133, + "logits/rejected": -1.8978132009506226, + "logps/chosen": -486.3775329589844, + "logps/rejected": -1649.887939453125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.182669639587402, + "rewards/margins": 11.854853630065918, + "rewards/rejected": -16.03752326965332, + "step": 11510 + }, + { + "epoch": 0.69, + "learning_rate": 4.750871662348164e-06, + "logits/chosen": -2.421830892562866, + "logits/rejected": -1.8869349956512451, + "logps/chosen": -474.0227966308594, + "logps/rejected": -1593.9188232421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.029313564300537, + "rewards/margins": 11.456428527832031, + "rewards/rejected": -15.485742568969727, + "step": 11520 + }, + { + "epoch": 0.69, + "learning_rate": 4.750116286960233e-06, + "logits/chosen": -2.4029746055603027, + "logits/rejected": -1.6369459629058838, + "logps/chosen": -471.28338623046875, + "logps/rejected": -1622.7650146484375, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.005527496337891, + "rewards/margins": 11.786818504333496, + "rewards/rejected": -15.792346000671387, + "step": 11530 + }, + { + "epoch": 0.69, + "learning_rate": 4.749359828366349e-06, + "logits/chosen": -2.507918357849121, + "logits/rejected": -1.9156110286712646, + "logps/chosen": -546.458251953125, + "logps/rejected": -1718.5260009765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.706284523010254, + "rewards/margins": 12.025457382202148, + "rewards/rejected": -16.731740951538086, + "step": 11540 + }, + { + "epoch": 0.69, + "learning_rate": 4.748602286930671e-06, + "logits/chosen": -2.501009702682495, + "logits/rejected": -1.969892144203186, + "logps/chosen": -618.0504150390625, + "logps/rejected": -1698.526611328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.53867244720459, + "rewards/margins": 11.008405685424805, + "rewards/rejected": -16.547077178955078, + "step": 11550 + }, + { + "epoch": 0.69, + "learning_rate": 4.747843663017883e-06, + "logits/chosen": -2.50044322013855, + "logits/rejected": -2.0029003620147705, + "logps/chosen": -628.7897338867188, + "logps/rejected": -1799.037353515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.604464530944824, + "rewards/margins": 11.943315505981445, + "rewards/rejected": -17.547779083251953, + "step": 11560 + }, + { + "epoch": 0.69, + "learning_rate": 4.747083956993182e-06, + "logits/chosen": -2.470590591430664, + "logits/rejected": -1.9461443424224854, + "logps/chosen": -635.8862915039062, + "logps/rejected": -1686.0693359375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.661951541900635, + "rewards/margins": 10.74977970123291, + "rewards/rejected": -16.41172981262207, + "step": 11570 + }, + { + "epoch": 0.69, + "learning_rate": 4.746323169222295e-06, + "logits/chosen": -2.4455771446228027, + "logits/rejected": -1.730756163597107, + "logps/chosen": -617.3209228515625, + "logps/rejected": -1863.387939453125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.392513275146484, + "rewards/margins": 12.790299415588379, + "rewards/rejected": -18.182811737060547, + "step": 11580 + }, + { + "epoch": 0.69, + "learning_rate": 4.745561300071461e-06, + "logits/chosen": -2.3294837474823, + "logits/rejected": -1.583353042602539, + "logps/chosen": -604.0313720703125, + "logps/rejected": -1954.5230712890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3067450523376465, + "rewards/margins": 13.797152519226074, + "rewards/rejected": -19.103899002075195, + "step": 11590 + }, + { + "epoch": 0.69, + "learning_rate": 4.7447983499074464e-06, + "logits/chosen": -2.3440170288085938, + "logits/rejected": -1.6081459522247314, + "logps/chosen": -621.1390991210938, + "logps/rejected": -1882.5872802734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.469862937927246, + "rewards/margins": 12.904741287231445, + "rewards/rejected": -18.374603271484375, + "step": 11600 + }, + { + "epoch": 0.69, + "learning_rate": 4.744034319097536e-06, + "logits/chosen": -2.3593573570251465, + "logits/rejected": -1.6917842626571655, + "logps/chosen": -608.2244262695312, + "logps/rejected": -1927.219482421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.337443828582764, + "rewards/margins": 13.500524520874023, + "rewards/rejected": -18.837970733642578, + "step": 11610 + }, + { + "epoch": 0.69, + "learning_rate": 4.743269208009532e-06, + "logits/chosen": -2.3728275299072266, + "logits/rejected": -1.770437479019165, + "logps/chosen": -585.658447265625, + "logps/rejected": -1952.499267578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.14301872253418, + "rewards/margins": 13.955853462219238, + "rewards/rejected": -19.098873138427734, + "step": 11620 + }, + { + "epoch": 0.69, + "learning_rate": 4.7425030170117595e-06, + "logits/chosen": -2.344207286834717, + "logits/rejected": -1.7068755626678467, + "logps/chosen": -630.8598022460938, + "logps/rejected": -1885.8070068359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.623367786407471, + "rewards/margins": 12.79155158996582, + "rewards/rejected": -18.414920806884766, + "step": 11630 + }, + { + "epoch": 0.69, + "learning_rate": 4.741735746473063e-06, + "logits/chosen": -2.3727569580078125, + "logits/rejected": -1.751960039138794, + "logps/chosen": -641.791259765625, + "logps/rejected": -1955.4908447265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.723276615142822, + "rewards/margins": 13.393701553344727, + "rewards/rejected": -19.116973876953125, + "step": 11640 + }, + { + "epoch": 0.69, + "learning_rate": 4.740967396762808e-06, + "logits/chosen": -2.3676507472991943, + "logits/rejected": -1.7702627182006836, + "logps/chosen": -622.2339477539062, + "logps/rejected": -1911.1868896484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5294508934021, + "rewards/margins": 13.139375686645508, + "rewards/rejected": -18.6688232421875, + "step": 11650 + }, + { + "epoch": 0.7, + "learning_rate": 4.740197968250876e-06, + "logits/chosen": -2.410451650619507, + "logits/rejected": -1.7898962497711182, + "logps/chosen": -608.1883544921875, + "logps/rejected": -1993.771240234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.335497856140137, + "rewards/margins": 14.156991958618164, + "rewards/rejected": -19.492488861083984, + "step": 11660 + }, + { + "epoch": 0.7, + "learning_rate": 4.739427461307671e-06, + "logits/chosen": -2.356825590133667, + "logits/rejected": -1.6896921396255493, + "logps/chosen": -662.5484619140625, + "logps/rejected": -1893.7347412109375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.878873825073242, + "rewards/margins": 12.601705551147461, + "rewards/rejected": -18.480579376220703, + "step": 11670 + }, + { + "epoch": 0.7, + "learning_rate": 4.738655876304115e-06, + "logits/chosen": -2.3583531379699707, + "logits/rejected": -1.6632139682769775, + "logps/chosen": -667.723388671875, + "logps/rejected": -1729.2562255859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.940506458282471, + "rewards/margins": 10.906320571899414, + "rewards/rejected": -16.84682846069336, + "step": 11680 + }, + { + "epoch": 0.7, + "learning_rate": 4.737883213611647e-06, + "logits/chosen": -2.422558307647705, + "logits/rejected": -1.750838041305542, + "logps/chosen": -672.1478271484375, + "logps/rejected": -1793.7281494140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0204243659973145, + "rewards/margins": 11.467691421508789, + "rewards/rejected": -17.488115310668945, + "step": 11690 + }, + { + "epoch": 0.7, + "learning_rate": 4.73710947360223e-06, + "logits/chosen": -2.398498296737671, + "logits/rejected": -1.877930998802185, + "logps/chosen": -663.457275390625, + "logps/rejected": -1725.5172119140625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.918820381164551, + "rewards/margins": 10.887483596801758, + "rewards/rejected": -16.806303024291992, + "step": 11700 + }, + { + "epoch": 0.7, + "learning_rate": 4.73633465664834e-06, + "logits/chosen": -2.380587339401245, + "logits/rejected": -1.6667888164520264, + "logps/chosen": -665.8668823242188, + "logps/rejected": -1745.58984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.970932483673096, + "rewards/margins": 11.02383804321289, + "rewards/rejected": -16.99477195739746, + "step": 11710 + }, + { + "epoch": 0.7, + "learning_rate": 4.735558763122976e-06, + "logits/chosen": -2.369398832321167, + "logits/rejected": -1.7471431493759155, + "logps/chosen": -675.8654174804688, + "logps/rejected": -1745.1558837890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.021617889404297, + "rewards/margins": 10.986917495727539, + "rewards/rejected": -17.008533477783203, + "step": 11720 + }, + { + "epoch": 0.7, + "learning_rate": 4.734781793399651e-06, + "logits/chosen": -2.3349790573120117, + "logits/rejected": -1.6057937145233154, + "logps/chosen": -689.8077392578125, + "logps/rejected": -1761.786865234375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.147824287414551, + "rewards/margins": 11.01547908782959, + "rewards/rejected": -17.163305282592773, + "step": 11730 + }, + { + "epoch": 0.7, + "learning_rate": 4.7340037478524e-06, + "logits/chosen": -2.3615808486938477, + "logits/rejected": -1.6443736553192139, + "logps/chosen": -696.3763427734375, + "logps/rejected": -1806.254150390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.224556922912598, + "rewards/margins": 11.37768268585205, + "rewards/rejected": -17.602237701416016, + "step": 11740 + }, + { + "epoch": 0.7, + "learning_rate": 4.733224626855773e-06, + "logits/chosen": -2.355149269104004, + "logits/rejected": -1.7286288738250732, + "logps/chosen": -691.6626586914062, + "logps/rejected": -1818.9906005859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2354416847229, + "rewards/margins": 11.503756523132324, + "rewards/rejected": -17.739198684692383, + "step": 11750 + }, + { + "epoch": 0.7, + "learning_rate": 4.732444430784838e-06, + "logits/chosen": -2.3314859867095947, + "logits/rejected": -1.5200669765472412, + "logps/chosen": -714.040283203125, + "logps/rejected": -1830.77734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.403080940246582, + "rewards/margins": 11.460351943969727, + "rewards/rejected": -17.863433837890625, + "step": 11760 + }, + { + "epoch": 0.7, + "learning_rate": 4.731663160015184e-06, + "logits/chosen": -2.3756816387176514, + "logits/rejected": -1.5640971660614014, + "logps/chosen": -687.1038208007812, + "logps/rejected": -1794.375732421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.134788513183594, + "rewards/margins": 11.368268013000488, + "rewards/rejected": -17.503055572509766, + "step": 11770 + }, + { + "epoch": 0.7, + "learning_rate": 4.730880814922913e-06, + "logits/chosen": -2.386920213699341, + "logits/rejected": -1.7106773853302002, + "logps/chosen": -711.1305541992188, + "logps/rejected": -1804.9156494140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.417119026184082, + "rewards/margins": 11.176864624023438, + "rewards/rejected": -17.593982696533203, + "step": 11780 + }, + { + "epoch": 0.7, + "learning_rate": 4.730097395884645e-06, + "logits/chosen": -2.3176703453063965, + "logits/rejected": -1.5261887311935425, + "logps/chosen": -682.523681640625, + "logps/rejected": -1721.7777099609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.188653469085693, + "rewards/margins": 10.593133926391602, + "rewards/rejected": -16.781789779663086, + "step": 11790 + }, + { + "epoch": 0.7, + "learning_rate": 4.72931290327752e-06, + "logits/chosen": -2.3572001457214355, + "logits/rejected": -1.631813645362854, + "logps/chosen": -713.184326171875, + "logps/rejected": -1809.051025390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.442198753356934, + "rewards/margins": 11.204842567443848, + "rewards/rejected": -17.64704132080078, + "step": 11800 + }, + { + "epoch": 0.7, + "learning_rate": 4.728527337479191e-06, + "logits/chosen": -2.3578341007232666, + "logits/rejected": -1.5884106159210205, + "logps/chosen": -683.0955810546875, + "logps/rejected": -1785.9384765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.157341957092285, + "rewards/margins": 11.250717163085938, + "rewards/rejected": -17.40806007385254, + "step": 11810 + }, + { + "epoch": 0.7, + "learning_rate": 4.727740698867831e-06, + "logits/chosen": -2.3442890644073486, + "logits/rejected": -1.5933481454849243, + "logps/chosen": -690.6375732421875, + "logps/rejected": -1837.454833984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2038445472717285, + "rewards/margins": 11.722168922424316, + "rewards/rejected": -17.926013946533203, + "step": 11820 + }, + { + "epoch": 0.71, + "learning_rate": 4.726952987822126e-06, + "logits/chosen": -2.3648362159729004, + "logits/rejected": -1.662769079208374, + "logps/chosen": -710.578125, + "logps/rejected": -1889.4573974609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.377779006958008, + "rewards/margins": 12.07122802734375, + "rewards/rejected": -18.449007034301758, + "step": 11830 + }, + { + "epoch": 0.71, + "learning_rate": 4.726164204721282e-06, + "logits/chosen": -2.327599048614502, + "logits/rejected": -1.6004047393798828, + "logps/chosen": -705.5100708007812, + "logps/rejected": -1881.9957275390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.348166465759277, + "rewards/margins": 12.034749984741211, + "rewards/rejected": -18.382915496826172, + "step": 11840 + }, + { + "epoch": 0.71, + "learning_rate": 4.725374349945019e-06, + "logits/chosen": -2.3459200859069824, + "logits/rejected": -1.5952787399291992, + "logps/chosen": -670.1466064453125, + "logps/rejected": -1774.0816650390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9853925704956055, + "rewards/margins": 11.324291229248047, + "rewards/rejected": -17.309682846069336, + "step": 11850 + }, + { + "epoch": 0.71, + "learning_rate": 4.724583423873571e-06, + "logits/chosen": -2.363737106323242, + "logits/rejected": -1.726508378982544, + "logps/chosen": -666.8216552734375, + "logps/rejected": -1739.2728271484375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.953989505767822, + "rewards/margins": 11.000299453735352, + "rewards/rejected": -16.954288482666016, + "step": 11860 + }, + { + "epoch": 0.71, + "learning_rate": 4.723791426887691e-06, + "logits/chosen": -2.3894495964050293, + "logits/rejected": -1.7147353887557983, + "logps/chosen": -651.2884521484375, + "logps/rejected": -1703.059814453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.836228370666504, + "rewards/margins": 10.750777244567871, + "rewards/rejected": -16.587005615234375, + "step": 11870 + }, + { + "epoch": 0.71, + "learning_rate": 4.7229983593686465e-06, + "logits/chosen": -2.4370384216308594, + "logits/rejected": -1.8875458240509033, + "logps/chosen": -672.81494140625, + "logps/rejected": -1908.0166015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.984147548675537, + "rewards/margins": 12.663236618041992, + "rewards/rejected": -18.647384643554688, + "step": 11880 + }, + { + "epoch": 0.71, + "learning_rate": 4.722204221698221e-06, + "logits/chosen": -2.3905022144317627, + "logits/rejected": -1.7580293416976929, + "logps/chosen": -645.8302612304688, + "logps/rejected": -1857.6246337890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.734655857086182, + "rewards/margins": 12.385188102722168, + "rewards/rejected": -18.11984634399414, + "step": 11890 + }, + { + "epoch": 0.71, + "learning_rate": 4.721409014258711e-06, + "logits/chosen": -2.3604018688201904, + "logits/rejected": -1.8143632411956787, + "logps/chosen": -632.2434692382812, + "logps/rejected": -1834.9029541015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.708359241485596, + "rewards/margins": 12.18109130859375, + "rewards/rejected": -17.88945198059082, + "step": 11900 + }, + { + "epoch": 0.71, + "learning_rate": 4.72061273743293e-06, + "logits/chosen": -2.3680760860443115, + "logits/rejected": -1.6645110845565796, + "logps/chosen": -664.589111328125, + "logps/rejected": -1815.1907958984375, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.896126747131348, + "rewards/margins": 11.818241119384766, + "rewards/rejected": -17.71436882019043, + "step": 11910 + }, + { + "epoch": 0.71, + "learning_rate": 4.719815391604205e-06, + "logits/chosen": -2.3046693801879883, + "logits/rejected": -1.4630621671676636, + "logps/chosen": -837.7928466796875, + "logps/rejected": -1953.1103515625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.673397064208984, + "rewards/margins": 11.419981002807617, + "rewards/rejected": -19.0933780670166, + "step": 11920 + }, + { + "epoch": 0.71, + "learning_rate": 4.719016977156379e-06, + "logits/chosen": -2.266857624053955, + "logits/rejected": -1.488539695739746, + "logps/chosen": -906.0813598632812, + "logps/rejected": -2069.653076171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.368337631225586, + "rewards/margins": 11.8805513381958, + "rewards/rejected": -20.24888801574707, + "step": 11930 + }, + { + "epoch": 0.71, + "learning_rate": 4.718217494473809e-06, + "logits/chosen": -2.3214917182922363, + "logits/rejected": -1.40349280834198, + "logps/chosen": -914.6154174804688, + "logps/rejected": -2078.509521484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.406818389892578, + "rewards/margins": 11.935511589050293, + "rewards/rejected": -20.34233283996582, + "step": 11940 + }, + { + "epoch": 0.71, + "learning_rate": 4.717416943941365e-06, + "logits/chosen": -2.2993083000183105, + "logits/rejected": -1.5245517492294312, + "logps/chosen": -952.1868896484375, + "logps/rejected": -2172.086181640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.856409072875977, + "rewards/margins": 12.431641578674316, + "rewards/rejected": -21.288049697875977, + "step": 11950 + }, + { + "epoch": 0.71, + "learning_rate": 4.716615325944433e-06, + "logits/chosen": -2.2939164638519287, + "logits/rejected": -1.4728368520736694, + "logps/chosen": -931.0286865234375, + "logps/rejected": -2122.883544921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.54122543334961, + "rewards/margins": 12.24241828918457, + "rewards/rejected": -20.783641815185547, + "step": 11960 + }, + { + "epoch": 0.71, + "learning_rate": 4.715812640868911e-06, + "logits/chosen": -2.302877426147461, + "logits/rejected": -1.5327012538909912, + "logps/chosen": -907.2630615234375, + "logps/rejected": -2107.44580078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.37143611907959, + "rewards/margins": 12.251383781433105, + "rewards/rejected": -20.622819900512695, + "step": 11970 + }, + { + "epoch": 0.71, + "learning_rate": 4.715008889101211e-06, + "logits/chosen": -2.2900218963623047, + "logits/rejected": -1.6225789785385132, + "logps/chosen": -907.3821411132812, + "logps/rejected": -2107.77587890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.402095794677734, + "rewards/margins": 12.229564666748047, + "rewards/rejected": -20.631662368774414, + "step": 11980 + }, + { + "epoch": 0.71, + "learning_rate": 4.714204071028261e-06, + "logits/chosen": -2.303574562072754, + "logits/rejected": -1.6112396717071533, + "logps/chosen": -921.66015625, + "logps/rejected": -2086.43505859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.54165267944336, + "rewards/margins": 11.880943298339844, + "rewards/rejected": -20.422595977783203, + "step": 11990 + }, + { + "epoch": 0.72, + "learning_rate": 4.7133981870375e-06, + "logits/chosen": -2.3040263652801514, + "logits/rejected": -1.4545491933822632, + "logps/chosen": -908.3512573242188, + "logps/rejected": -2075.034912109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.361536026000977, + "rewards/margins": 11.950156211853027, + "rewards/rejected": -20.311691284179688, + "step": 12000 + }, + { + "epoch": 0.72, + "learning_rate": 4.712591237516879e-06, + "logits/chosen": -2.3056271076202393, + "logits/rejected": -1.4867618083953857, + "logps/chosen": -919.7911376953125, + "logps/rejected": -2098.61279296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.52281379699707, + "rewards/margins": 12.019657135009766, + "rewards/rejected": -20.542470932006836, + "step": 12010 + }, + { + "epoch": 0.72, + "learning_rate": 4.711783222854865e-06, + "logits/chosen": -2.2948763370513916, + "logits/rejected": -1.6303613185882568, + "logps/chosen": -919.30078125, + "logps/rejected": -2011.5888671875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.517538070678711, + "rewards/margins": 11.177942276000977, + "rewards/rejected": -19.695480346679688, + "step": 12020 + }, + { + "epoch": 0.72, + "learning_rate": 4.710974143440435e-06, + "logits/chosen": -2.3210318088531494, + "logits/rejected": -1.5268845558166504, + "logps/chosen": -954.45263671875, + "logps/rejected": -2098.197021484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.755172729492188, + "rewards/margins": 11.795136451721191, + "rewards/rejected": -20.550308227539062, + "step": 12030 + }, + { + "epoch": 0.72, + "learning_rate": 4.710163999663081e-06, + "logits/chosen": -2.276252269744873, + "logits/rejected": -1.5514951944351196, + "logps/chosen": -875.5607299804688, + "logps/rejected": -2066.89697265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.059812545776367, + "rewards/margins": 12.175773620605469, + "rewards/rejected": -20.235584259033203, + "step": 12040 + }, + { + "epoch": 0.72, + "learning_rate": 4.709352791912806e-06, + "logits/chosen": -2.2544941902160645, + "logits/rejected": -1.4564272165298462, + "logps/chosen": -928.8663940429688, + "logps/rejected": -2103.74462890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.565840721130371, + "rewards/margins": 12.039595603942871, + "rewards/rejected": -20.605438232421875, + "step": 12050 + }, + { + "epoch": 0.72, + "learning_rate": 4.708540520580125e-06, + "logits/chosen": -2.273247003555298, + "logits/rejected": -1.5010372400283813, + "logps/chosen": -923.6956176757812, + "logps/rejected": -2103.042236328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.537571907043457, + "rewards/margins": 12.056132316589355, + "rewards/rejected": -20.593704223632812, + "step": 12060 + }, + { + "epoch": 0.72, + "learning_rate": 4.707727186056066e-06, + "logits/chosen": -2.3066563606262207, + "logits/rejected": -1.424364686012268, + "logps/chosen": -907.49755859375, + "logps/rejected": -2065.422607421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.364471435546875, + "rewards/margins": 11.844647407531738, + "rewards/rejected": -20.209117889404297, + "step": 12070 + }, + { + "epoch": 0.72, + "learning_rate": 4.706912788732167e-06, + "logits/chosen": -2.277188777923584, + "logits/rejected": -1.4885280132293701, + "logps/chosen": -909.9312744140625, + "logps/rejected": -1949.9388427734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.433822631835938, + "rewards/margins": 10.644775390625, + "rewards/rejected": -19.078596115112305, + "step": 12080 + }, + { + "epoch": 0.72, + "learning_rate": 4.70609732900048e-06, + "logits/chosen": -2.3233721256256104, + "logits/rejected": -1.5987635850906372, + "logps/chosen": -898.720703125, + "logps/rejected": -2024.6468505859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.279882431030273, + "rewards/margins": 11.53213882446289, + "rewards/rejected": -19.812021255493164, + "step": 12090 + }, + { + "epoch": 0.72, + "learning_rate": 4.705280807253567e-06, + "logits/chosen": -2.272088050842285, + "logits/rejected": -1.4037643671035767, + "logps/chosen": -940.7891845703125, + "logps/rejected": -2118.513427734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.669404983520508, + "rewards/margins": 12.071748733520508, + "rewards/rejected": -20.74115562438965, + "step": 12100 + }, + { + "epoch": 0.72, + "learning_rate": 4.704463223884501e-06, + "logits/chosen": -2.35219407081604, + "logits/rejected": -1.5400075912475586, + "logps/chosen": -910.9401245117188, + "logps/rejected": -2201.9375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.269137382507324, + "rewards/margins": 13.29931926727295, + "rewards/rejected": -21.568456649780273, + "step": 12110 + }, + { + "epoch": 0.72, + "learning_rate": 4.703644579286867e-06, + "logits/chosen": -2.2657229900360107, + "logits/rejected": -1.4986718893051147, + "logps/chosen": -891.38720703125, + "logps/rejected": -2112.581298828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.238120079040527, + "rewards/margins": 12.453570365905762, + "rewards/rejected": -20.691692352294922, + "step": 12120 + }, + { + "epoch": 0.72, + "learning_rate": 4.702824873854761e-06, + "logits/chosen": -2.309971332550049, + "logits/rejected": -1.4879271984100342, + "logps/chosen": -882.0732421875, + "logps/rejected": -2167.3154296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.070892333984375, + "rewards/margins": 13.146453857421875, + "rewards/rejected": -21.217344284057617, + "step": 12130 + }, + { + "epoch": 0.72, + "learning_rate": 4.702004107982789e-06, + "logits/chosen": -2.320103406906128, + "logits/rejected": -1.603650689125061, + "logps/chosen": -919.8590698242188, + "logps/rejected": -2129.2841796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.461067199707031, + "rewards/margins": 12.39105224609375, + "rewards/rejected": -20.85211753845215, + "step": 12140 + }, + { + "epoch": 0.72, + "learning_rate": 4.701182282066068e-06, + "logits/chosen": -2.3372302055358887, + "logits/rejected": -1.58827805519104, + "logps/chosen": -880.3049926757812, + "logps/rejected": -2090.443359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.07585620880127, + "rewards/margins": 12.386209487915039, + "rewards/rejected": -20.46206283569336, + "step": 12150 + }, + { + "epoch": 0.73, + "learning_rate": 4.700359396500223e-06, + "logits/chosen": -2.280048370361328, + "logits/rejected": -1.5264836549758911, + "logps/chosen": -869.9417724609375, + "logps/rejected": -2208.89013671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.988421440124512, + "rewards/margins": 13.650492668151855, + "rewards/rejected": -21.638912200927734, + "step": 12160 + }, + { + "epoch": 0.73, + "learning_rate": 4.699535451681394e-06, + "logits/chosen": -2.394160032272339, + "logits/rejected": -1.6365734338760376, + "logps/chosen": -885.4385986328125, + "logps/rejected": -2079.51220703125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.173094749450684, + "rewards/margins": 12.17487621307373, + "rewards/rejected": -20.347972869873047, + "step": 12170 + }, + { + "epoch": 0.73, + "learning_rate": 4.698710448006226e-06, + "logits/chosen": -2.3361477851867676, + "logits/rejected": -1.5747175216674805, + "logps/chosen": -868.0623168945312, + "logps/rejected": -2060.40625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.9037957191467285, + "rewards/margins": 12.253938674926758, + "rewards/rejected": -20.157733917236328, + "step": 12180 + }, + { + "epoch": 0.73, + "learning_rate": 4.697884385871877e-06, + "logits/chosen": -2.265840768814087, + "logits/rejected": -1.4896609783172607, + "logps/chosen": -890.8796997070312, + "logps/rejected": -2078.255859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.261404037475586, + "rewards/margins": 12.084268569946289, + "rewards/rejected": -20.345672607421875, + "step": 12190 + }, + { + "epoch": 0.73, + "learning_rate": 4.6970572656760125e-06, + "logits/chosen": -2.2479281425476074, + "logits/rejected": -1.5028667449951172, + "logps/chosen": -930.1590576171875, + "logps/rejected": -2096.31884765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.628969192504883, + "rewards/margins": 11.890848159790039, + "rewards/rejected": -20.519817352294922, + "step": 12200 + }, + { + "epoch": 0.73, + "learning_rate": 4.696229087816808e-06, + "logits/chosen": -2.3236987590789795, + "logits/rejected": -1.6358238458633423, + "logps/chosen": -869.1468505859375, + "logps/rejected": -1979.481689453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.957434177398682, + "rewards/margins": 11.39643383026123, + "rewards/rejected": -19.35386848449707, + "step": 12210 + }, + { + "epoch": 0.73, + "learning_rate": 4.695399852692948e-06, + "logits/chosen": -2.297348976135254, + "logits/rejected": -1.4921989440917969, + "logps/chosen": -851.3814697265625, + "logps/rejected": -2081.83935546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.797646999359131, + "rewards/margins": 12.58021354675293, + "rewards/rejected": -20.37786293029785, + "step": 12220 + }, + { + "epoch": 0.73, + "learning_rate": 4.694569560703628e-06, + "logits/chosen": -2.3105788230895996, + "logits/rejected": -1.5800487995147705, + "logps/chosen": -851.3812255859375, + "logps/rejected": -2017.3521728515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.858436584472656, + "rewards/margins": 11.862920761108398, + "rewards/rejected": -19.721357345581055, + "step": 12230 + }, + { + "epoch": 0.73, + "learning_rate": 4.693738212248549e-06, + "logits/chosen": -2.2796945571899414, + "logits/rejected": -1.53555428981781, + "logps/chosen": -889.50537109375, + "logps/rejected": -2068.42041015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.224008560180664, + "rewards/margins": 12.01783561706543, + "rewards/rejected": -20.241846084594727, + "step": 12240 + }, + { + "epoch": 0.73, + "learning_rate": 4.692905807727922e-06, + "logits/chosen": -2.3227059841156006, + "logits/rejected": -1.598939299583435, + "logps/chosen": -858.9454956054688, + "logps/rejected": -2014.315673828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.859617710113525, + "rewards/margins": 11.839181900024414, + "rewards/rejected": -19.698801040649414, + "step": 12250 + }, + { + "epoch": 0.73, + "learning_rate": 4.692072347542468e-06, + "logits/chosen": -2.3295671939849854, + "logits/rejected": -1.6889257431030273, + "logps/chosen": -850.5485229492188, + "logps/rejected": -2043.147705078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.795111179351807, + "rewards/margins": 12.208375930786133, + "rewards/rejected": -20.003488540649414, + "step": 12260 + }, + { + "epoch": 0.73, + "learning_rate": 4.6912378320934134e-06, + "logits/chosen": -2.3316190242767334, + "logits/rejected": -1.5455907583236694, + "logps/chosen": -845.2976684570312, + "logps/rejected": -2045.748046875, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.730307579040527, + "rewards/margins": 12.288830757141113, + "rewards/rejected": -20.019140243530273, + "step": 12270 + }, + { + "epoch": 0.73, + "learning_rate": 4.690402261782494e-06, + "logits/chosen": -2.2682762145996094, + "logits/rejected": -1.4224581718444824, + "logps/chosen": -909.0734252929688, + "logps/rejected": -2145.618408203125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.377269744873047, + "rewards/margins": 12.627469062805176, + "rewards/rejected": -21.00473976135254, + "step": 12280 + }, + { + "epoch": 0.73, + "learning_rate": 4.689565637011955e-06, + "logits/chosen": -2.316528081893921, + "logits/rejected": -1.5826098918914795, + "logps/chosen": -873.6448364257812, + "logps/rejected": -2074.041748046875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.02174186706543, + "rewards/margins": 12.281771659851074, + "rewards/rejected": -20.30351448059082, + "step": 12290 + }, + { + "epoch": 0.73, + "learning_rate": 4.688727958184545e-06, + "logits/chosen": -2.3311378955841064, + "logits/rejected": -1.6398423910140991, + "logps/chosen": -816.47265625, + "logps/rejected": -1985.7578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.435278415679932, + "rewards/margins": 11.986404418945312, + "rewards/rejected": -19.42168426513672, + "step": 12300 + }, + { + "epoch": 0.73, + "learning_rate": 4.687889225703525e-06, + "logits/chosen": -2.3494880199432373, + "logits/rejected": -1.6022605895996094, + "logps/chosen": -759.7149658203125, + "logps/rejected": -1965.0494384765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.873079776763916, + "rewards/margins": 12.335451126098633, + "rewards/rejected": -19.208532333374023, + "step": 12310 + }, + { + "epoch": 0.73, + "learning_rate": 4.687049439972659e-06, + "logits/chosen": -2.3433825969696045, + "logits/rejected": -1.7165921926498413, + "logps/chosen": -764.0992431640625, + "logps/rejected": -2016.3411865234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.937239646911621, + "rewards/margins": 12.77510929107666, + "rewards/rejected": -19.71234703063965, + "step": 12320 + }, + { + "epoch": 0.74, + "learning_rate": 4.68620860139622e-06, + "logits/chosen": -2.275442361831665, + "logits/rejected": -1.5513830184936523, + "logps/chosen": -758.8952026367188, + "logps/rejected": -2006.69140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.901664733886719, + "rewards/margins": 12.720331192016602, + "rewards/rejected": -19.621994018554688, + "step": 12330 + }, + { + "epoch": 0.74, + "learning_rate": 4.685366710378989e-06, + "logits/chosen": -2.352602243423462, + "logits/rejected": -1.6559762954711914, + "logps/chosen": -764.1458740234375, + "logps/rejected": -2065.477783203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.947103977203369, + "rewards/margins": 13.25885009765625, + "rewards/rejected": -20.205955505371094, + "step": 12340 + }, + { + "epoch": 0.74, + "learning_rate": 4.684523767326251e-06, + "logits/chosen": -2.351867914199829, + "logits/rejected": -1.5803056955337524, + "logps/chosen": -774.601806640625, + "logps/rejected": -2075.493896484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.120750427246094, + "rewards/margins": 13.193089485168457, + "rewards/rejected": -20.313838958740234, + "step": 12350 + }, + { + "epoch": 0.74, + "learning_rate": 4.683679772643799e-06, + "logits/chosen": -2.320723056793213, + "logits/rejected": -1.5816880464553833, + "logps/chosen": -785.35302734375, + "logps/rejected": -2052.81640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.178704261779785, + "rewards/margins": 12.903882026672363, + "rewards/rejected": -20.082584381103516, + "step": 12360 + }, + { + "epoch": 0.74, + "learning_rate": 4.682834726737933e-06, + "logits/chosen": -2.3117377758026123, + "logits/rejected": -1.4487828016281128, + "logps/chosen": -767.9998779296875, + "logps/rejected": -1973.953857421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.979891300201416, + "rewards/margins": 12.323516845703125, + "rewards/rejected": -19.303409576416016, + "step": 12370 + }, + { + "epoch": 0.74, + "learning_rate": 4.6819886300154565e-06, + "logits/chosen": -2.3733949661254883, + "logits/rejected": -1.6435045003890991, + "logps/chosen": -754.1683349609375, + "logps/rejected": -1927.4111328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8175249099731445, + "rewards/margins": 12.017876625061035, + "rewards/rejected": -18.83540153503418, + "step": 12380 + }, + { + "epoch": 0.74, + "learning_rate": 4.681141482883682e-06, + "logits/chosen": -2.3556790351867676, + "logits/rejected": -1.5832946300506592, + "logps/chosen": -731.4368286132812, + "logps/rejected": -1955.4410400390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5860276222229, + "rewards/margins": 12.529126167297363, + "rewards/rejected": -19.115154266357422, + "step": 12390 + }, + { + "epoch": 0.74, + "learning_rate": 4.6802932857504254e-06, + "logits/chosen": -2.3637640476226807, + "logits/rejected": -1.7131067514419556, + "logps/chosen": -742.413818359375, + "logps/rejected": -1970.041259765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.70400857925415, + "rewards/margins": 12.561103820800781, + "rewards/rejected": -19.265111923217773, + "step": 12400 + }, + { + "epoch": 0.74, + "learning_rate": 4.679444039024008e-06, + "logits/chosen": -2.37764310836792, + "logits/rejected": -1.6903870105743408, + "logps/chosen": -746.6127319335938, + "logps/rejected": -2008.987548828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.747740268707275, + "rewards/margins": 12.888299942016602, + "rewards/rejected": -19.636043548583984, + "step": 12410 + }, + { + "epoch": 0.74, + "learning_rate": 4.6785937431132596e-06, + "logits/chosen": -2.359809398651123, + "logits/rejected": -1.713903784751892, + "logps/chosen": -758.2705078125, + "logps/rejected": -1961.4951171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.866019248962402, + "rewards/margins": 12.30457878112793, + "rewards/rejected": -19.170597076416016, + "step": 12420 + }, + { + "epoch": 0.74, + "learning_rate": 4.67774239842751e-06, + "logits/chosen": -2.3513312339782715, + "logits/rejected": -1.7347164154052734, + "logps/chosen": -752.3549194335938, + "logps/rejected": -2062.84423828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.799860954284668, + "rewards/margins": 13.373858451843262, + "rewards/rejected": -20.173721313476562, + "step": 12430 + }, + { + "epoch": 0.74, + "learning_rate": 4.676890005376598e-06, + "logits/chosen": -2.3508872985839844, + "logits/rejected": -1.6125166416168213, + "logps/chosen": -760.26123046875, + "logps/rejected": -1992.012939453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8868513107299805, + "rewards/margins": 12.58372688293457, + "rewards/rejected": -19.470579147338867, + "step": 12440 + }, + { + "epoch": 0.74, + "learning_rate": 4.676036564370865e-06, + "logits/chosen": -2.358893871307373, + "logits/rejected": -1.5313081741333008, + "logps/chosen": -735.8331909179688, + "logps/rejected": -1910.8551025390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6034088134765625, + "rewards/margins": 12.059374809265137, + "rewards/rejected": -18.662784576416016, + "step": 12450 + }, + { + "epoch": 0.74, + "learning_rate": 4.675182075821158e-06, + "logits/chosen": -2.3375556468963623, + "logits/rejected": -1.6216663122177124, + "logps/chosen": -778.0150146484375, + "logps/rejected": -1993.3863525390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.089566707611084, + "rewards/margins": 12.395454406738281, + "rewards/rejected": -19.485021591186523, + "step": 12460 + }, + { + "epoch": 0.74, + "learning_rate": 4.674326540138826e-06, + "logits/chosen": -2.2992122173309326, + "logits/rejected": -1.476839303970337, + "logps/chosen": -774.1066284179688, + "logps/rejected": -2051.77197265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.970612525939941, + "rewards/margins": 13.09434986114502, + "rewards/rejected": -20.06496238708496, + "step": 12470 + }, + { + "epoch": 0.74, + "learning_rate": 4.6734699577357265e-06, + "logits/chosen": -2.3626484870910645, + "logits/rejected": -1.708721399307251, + "logps/chosen": -713.0775756835938, + "logps/rejected": -1953.366455078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.493579864501953, + "rewards/margins": 12.598992347717285, + "rewards/rejected": -19.09256935119629, + "step": 12480 + }, + { + "epoch": 0.74, + "learning_rate": 4.672612329024217e-06, + "logits/chosen": -2.3412277698516846, + "logits/rejected": -1.5498765707015991, + "logps/chosen": -735.7423095703125, + "logps/rejected": -2056.4814453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.612164497375488, + "rewards/margins": 13.512939453125, + "rewards/rejected": -20.125104904174805, + "step": 12490 + }, + { + "epoch": 0.75, + "learning_rate": 4.671753654417159e-06, + "logits/chosen": -2.3045661449432373, + "logits/rejected": -1.6059478521347046, + "logps/chosen": -745.5012817382812, + "logps/rejected": -2009.383544921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.823692321777344, + "rewards/margins": 12.827191352844238, + "rewards/rejected": -19.650882720947266, + "step": 12500 + }, + { + "epoch": 0.75, + "learning_rate": 4.670893934327921e-06, + "logits/chosen": -2.2852370738983154, + "logits/rejected": -1.5791993141174316, + "logps/chosen": -776.737548828125, + "logps/rejected": -2020.5914306640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.031543731689453, + "rewards/margins": 12.720540046691895, + "rewards/rejected": -19.752084732055664, + "step": 12510 + }, + { + "epoch": 0.75, + "learning_rate": 4.67003316917037e-06, + "logits/chosen": -2.3670735359191895, + "logits/rejected": -1.598665714263916, + "logps/chosen": -744.1644287109375, + "logps/rejected": -2052.822021484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.756957054138184, + "rewards/margins": 13.319032669067383, + "rewards/rejected": -20.07598876953125, + "step": 12520 + }, + { + "epoch": 0.75, + "learning_rate": 4.6691713593588795e-06, + "logits/chosen": -2.377978801727295, + "logits/rejected": -1.7367572784423828, + "logps/chosen": -748.5162353515625, + "logps/rejected": -2006.0458984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.806979179382324, + "rewards/margins": 12.822627067565918, + "rewards/rejected": -19.629608154296875, + "step": 12530 + }, + { + "epoch": 0.75, + "learning_rate": 4.668308505308323e-06, + "logits/chosen": -2.376039981842041, + "logits/rejected": -1.63314950466156, + "logps/chosen": -734.8247680664062, + "logps/rejected": -2000.4290771484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.617531776428223, + "rewards/margins": 12.950456619262695, + "rewards/rejected": -19.567989349365234, + "step": 12540 + }, + { + "epoch": 0.75, + "learning_rate": 4.667444607434082e-06, + "logits/chosen": -2.3460121154785156, + "logits/rejected": -1.5081298351287842, + "logps/chosen": -726.4072265625, + "logps/rejected": -1912.2740478515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.570639610290527, + "rewards/margins": 12.117815017700195, + "rewards/rejected": -18.68845558166504, + "step": 12550 + }, + { + "epoch": 0.75, + "learning_rate": 4.666579666152032e-06, + "logits/chosen": -2.3622500896453857, + "logits/rejected": -1.642232894897461, + "logps/chosen": -759.5059814453125, + "logps/rejected": -1996.1409912109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.817136287689209, + "rewards/margins": 12.697587013244629, + "rewards/rejected": -19.514720916748047, + "step": 12560 + }, + { + "epoch": 0.75, + "learning_rate": 4.6657136818785596e-06, + "logits/chosen": -2.383434295654297, + "logits/rejected": -1.5625925064086914, + "logps/chosen": -760.732421875, + "logps/rejected": -1933.4417724609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.903054237365723, + "rewards/margins": 11.989463806152344, + "rewards/rejected": -18.892518997192383, + "step": 12570 + }, + { + "epoch": 0.75, + "learning_rate": 4.664846655030548e-06, + "logits/chosen": -2.324219226837158, + "logits/rejected": -1.555044174194336, + "logps/chosen": -750.1749877929688, + "logps/rejected": -1958.2177734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.824582099914551, + "rewards/margins": 12.323244094848633, + "rewards/rejected": -19.147825241088867, + "step": 12580 + }, + { + "epoch": 0.75, + "learning_rate": 4.663978586025385e-06, + "logits/chosen": -2.379725933074951, + "logits/rejected": -1.651921272277832, + "logps/chosen": -750.42236328125, + "logps/rejected": -1963.1474609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.833141326904297, + "rewards/margins": 12.363271713256836, + "rewards/rejected": -19.1964111328125, + "step": 12590 + }, + { + "epoch": 0.75, + "learning_rate": 4.663109475280958e-06, + "logits/chosen": -2.307892322540283, + "logits/rejected": -1.6617319583892822, + "logps/chosen": -746.8948974609375, + "logps/rejected": -1990.982177734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.838021755218506, + "rewards/margins": 12.626920700073242, + "rewards/rejected": -19.46494483947754, + "step": 12600 + }, + { + "epoch": 0.75, + "learning_rate": 4.662239323215657e-06, + "logits/chosen": -2.4899230003356934, + "logits/rejected": -1.8586353063583374, + "logps/chosen": -539.4165649414062, + "logps/rejected": -1723.8187255859375, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.646638870239258, + "rewards/margins": 12.149958610534668, + "rewards/rejected": -16.79659652709961, + "step": 12610 + }, + { + "epoch": 0.75, + "learning_rate": 4.661368130248373e-06, + "logits/chosen": -2.5253236293792725, + "logits/rejected": -2.0342371463775635, + "logps/chosen": -407.90924072265625, + "logps/rejected": -1475.7291259765625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3716418743133545, + "rewards/margins": 10.93259048461914, + "rewards/rejected": -14.304231643676758, + "step": 12620 + }, + { + "epoch": 0.75, + "learning_rate": 4.660495896798499e-06, + "logits/chosen": -2.4798500537872314, + "logits/rejected": -2.0171282291412354, + "logps/chosen": -418.842041015625, + "logps/rejected": -1610.5433349609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5189902782440186, + "rewards/margins": 12.136367797851562, + "rewards/rejected": -15.655359268188477, + "step": 12630 + }, + { + "epoch": 0.75, + "learning_rate": 4.6596226232859285e-06, + "logits/chosen": -2.509129047393799, + "logits/rejected": -1.8521140813827515, + "logps/chosen": -482.1939392089844, + "logps/rejected": -1778.657470703125, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.065914154052734, + "rewards/margins": 13.282415390014648, + "rewards/rejected": -17.348329544067383, + "step": 12640 + }, + { + "epoch": 0.75, + "learning_rate": 4.658748310131054e-06, + "logits/chosen": -2.509512424468994, + "logits/rejected": -1.9900840520858765, + "logps/chosen": -550.9572143554688, + "logps/rejected": -1815.8486328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.800627708435059, + "rewards/margins": 12.909423828125, + "rewards/rejected": -17.710050582885742, + "step": 12650 + }, + { + "epoch": 0.75, + "learning_rate": 4.65787295775477e-06, + "logits/chosen": -2.454228639602661, + "logits/rejected": -1.8303579092025757, + "logps/chosen": -548.107666015625, + "logps/rejected": -1849.2193603515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.722152233123779, + "rewards/margins": 13.325811386108398, + "rewards/rejected": -18.047962188720703, + "step": 12660 + }, + { + "epoch": 0.76, + "learning_rate": 4.656996566578472e-06, + "logits/chosen": -2.4777719974517822, + "logits/rejected": -1.9188200235366821, + "logps/chosen": -530.8133544921875, + "logps/rejected": -1808.2711181640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6409430503845215, + "rewards/margins": 13.014482498168945, + "rewards/rejected": -17.655426025390625, + "step": 12670 + }, + { + "epoch": 0.76, + "learning_rate": 4.6561191370240545e-06, + "logits/chosen": -2.4554965496063232, + "logits/rejected": -1.807938575744629, + "logps/chosen": -566.7615356445312, + "logps/rejected": -1822.226318359375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.949851989746094, + "rewards/margins": 12.802334785461426, + "rewards/rejected": -17.752187728881836, + "step": 12680 + }, + { + "epoch": 0.76, + "learning_rate": 4.655240669513913e-06, + "logits/chosen": -2.4385976791381836, + "logits/rejected": -1.7275890111923218, + "logps/chosen": -673.5775146484375, + "logps/rejected": -1942.8656005859375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.063147068023682, + "rewards/margins": 12.925695419311523, + "rewards/rejected": -18.988842010498047, + "step": 12690 + }, + { + "epoch": 0.76, + "learning_rate": 4.654361164470939e-06, + "logits/chosen": -2.4348976612091064, + "logits/rejected": -1.7523233890533447, + "logps/chosen": -705.5535888671875, + "logps/rejected": -1962.1878662109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.355813503265381, + "rewards/margins": 12.82153034210205, + "rewards/rejected": -19.177343368530273, + "step": 12700 + }, + { + "epoch": 0.76, + "learning_rate": 4.653480622318528e-06, + "logits/chosen": -2.3922762870788574, + "logits/rejected": -1.7569687366485596, + "logps/chosen": -722.6134643554688, + "logps/rejected": -2047.0751953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5335187911987305, + "rewards/margins": 13.506281852722168, + "rewards/rejected": -20.03980255126953, + "step": 12710 + }, + { + "epoch": 0.76, + "learning_rate": 4.652599043480574e-06, + "logits/chosen": -2.4247989654541016, + "logits/rejected": -1.7622289657592773, + "logps/chosen": -678.6212768554688, + "logps/rejected": -2012.311279296875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.028153896331787, + "rewards/margins": 13.653643608093262, + "rewards/rejected": -19.681800842285156, + "step": 12720 + }, + { + "epoch": 0.76, + "learning_rate": 4.651716428381468e-06, + "logits/chosen": -2.453187942504883, + "logits/rejected": -1.751888632774353, + "logps/chosen": -702.2307739257812, + "logps/rejected": -1997.5758056640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.312932014465332, + "rewards/margins": 13.22215747833252, + "rewards/rejected": -19.53508949279785, + "step": 12730 + }, + { + "epoch": 0.76, + "learning_rate": 4.6508327774460994e-06, + "logits/chosen": -2.4859681129455566, + "logits/rejected": -1.8813331127166748, + "logps/chosen": -666.761962890625, + "logps/rejected": -1946.0980224609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.998958110809326, + "rewards/margins": 13.019369125366211, + "rewards/rejected": -19.018329620361328, + "step": 12740 + }, + { + "epoch": 0.76, + "learning_rate": 4.64994809109986e-06, + "logits/chosen": -2.4223616123199463, + "logits/rejected": -1.837713599205017, + "logps/chosen": -704.05029296875, + "logps/rejected": -2055.45263671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.407484531402588, + "rewards/margins": 13.699914932250977, + "rewards/rejected": -20.107398986816406, + "step": 12750 + }, + { + "epoch": 0.76, + "learning_rate": 4.649062369768637e-06, + "logits/chosen": -2.411831855773926, + "logits/rejected": -1.6856673955917358, + "logps/chosen": -711.548828125, + "logps/rejected": -2111.501953125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.432779788970947, + "rewards/margins": 14.237713813781738, + "rewards/rejected": -20.67049217224121, + "step": 12760 + }, + { + "epoch": 0.76, + "learning_rate": 4.648175613878816e-06, + "logits/chosen": -2.3479177951812744, + "logits/rejected": -1.617448091506958, + "logps/chosen": -737.6324462890625, + "logps/rejected": -2014.919189453125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.638930320739746, + "rewards/margins": 13.076619148254395, + "rewards/rejected": -19.715547561645508, + "step": 12770 + }, + { + "epoch": 0.76, + "learning_rate": 4.647287823857283e-06, + "logits/chosen": -2.3565733432769775, + "logits/rejected": -1.5523662567138672, + "logps/chosen": -767.0740356445312, + "logps/rejected": -2003.057861328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.97028112411499, + "rewards/margins": 12.615012168884277, + "rewards/rejected": -19.58529281616211, + "step": 12780 + }, + { + "epoch": 0.76, + "learning_rate": 4.646399000131419e-06, + "logits/chosen": -2.3879778385162354, + "logits/rejected": -1.5884318351745605, + "logps/chosen": -755.9432373046875, + "logps/rejected": -2117.128662109375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.859809875488281, + "rewards/margins": 13.870355606079102, + "rewards/rejected": -20.730165481567383, + "step": 12790 + }, + { + "epoch": 0.76, + "learning_rate": 4.645509143129102e-06, + "logits/chosen": -2.3881771564483643, + "logits/rejected": -1.6098893880844116, + "logps/chosen": -679.435791015625, + "logps/rejected": -2065.548095703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.12900972366333, + "rewards/margins": 14.088180541992188, + "rewards/rejected": -20.217187881469727, + "step": 12800 + }, + { + "epoch": 0.76, + "learning_rate": 4.644618253278712e-06, + "logits/chosen": -2.4493930339813232, + "logits/rejected": -1.648492455482483, + "logps/chosen": -703.3580322265625, + "logps/rejected": -1965.646484375, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.289845943450928, + "rewards/margins": 12.914731979370117, + "rewards/rejected": -19.20457649230957, + "step": 12810 + }, + { + "epoch": 0.76, + "learning_rate": 4.6437263310091226e-06, + "logits/chosen": -2.4801933765411377, + "logits/rejected": -1.8912436962127686, + "logps/chosen": -692.326171875, + "logps/rejected": -1970.704833984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.266932964324951, + "rewards/margins": 13.010854721069336, + "rewards/rejected": -19.277788162231445, + "step": 12820 + }, + { + "epoch": 0.77, + "learning_rate": 4.642833376749704e-06, + "logits/chosen": -2.4665844440460205, + "logits/rejected": -1.6884453296661377, + "logps/chosen": -702.08349609375, + "logps/rejected": -2041.086669921875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.285323619842529, + "rewards/margins": 13.67158031463623, + "rewards/rejected": -19.956905364990234, + "step": 12830 + }, + { + "epoch": 0.77, + "learning_rate": 4.6419393909303254e-06, + "logits/chosen": -2.3974814414978027, + "logits/rejected": -1.6111915111541748, + "logps/chosen": -722.271484375, + "logps/rejected": -2052.3603515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.537937164306641, + "rewards/margins": 13.546780586242676, + "rewards/rejected": -20.084718704223633, + "step": 12840 + }, + { + "epoch": 0.77, + "learning_rate": 4.641044373981351e-06, + "logits/chosen": -2.433546543121338, + "logits/rejected": -1.7097238302230835, + "logps/chosen": -665.531005859375, + "logps/rejected": -2010.807373046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.967810153961182, + "rewards/margins": 13.694366455078125, + "rewards/rejected": -19.662174224853516, + "step": 12850 + }, + { + "epoch": 0.77, + "learning_rate": 4.640148326333643e-06, + "logits/chosen": -2.396629810333252, + "logits/rejected": -1.7052276134490967, + "logps/chosen": -720.03125, + "logps/rejected": -1995.2545166015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4403510093688965, + "rewards/margins": 13.065176010131836, + "rewards/rejected": -19.50552749633789, + "step": 12860 + }, + { + "epoch": 0.77, + "learning_rate": 4.639251248418558e-06, + "logits/chosen": -2.380951404571533, + "logits/rejected": -1.723934531211853, + "logps/chosen": -714.1027221679688, + "logps/rejected": -2103.94580078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4600830078125, + "rewards/margins": 14.142303466796875, + "rewards/rejected": -20.602386474609375, + "step": 12870 + }, + { + "epoch": 0.77, + "learning_rate": 4.638353140667949e-06, + "logits/chosen": -2.4394631385803223, + "logits/rejected": -1.7818844318389893, + "logps/chosen": -663.3195190429688, + "logps/rejected": -1951.254638671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9097700119018555, + "rewards/margins": 13.15641975402832, + "rewards/rejected": -19.06618881225586, + "step": 12880 + }, + { + "epoch": 0.77, + "learning_rate": 4.6374540035141644e-06, + "logits/chosen": -2.3805718421936035, + "logits/rejected": -1.7124698162078857, + "logps/chosen": -694.6668701171875, + "logps/rejected": -1998.4329833984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2161335945129395, + "rewards/margins": 13.327890396118164, + "rewards/rejected": -19.544023513793945, + "step": 12890 + }, + { + "epoch": 0.77, + "learning_rate": 4.636553837390051e-06, + "logits/chosen": -2.4313759803771973, + "logits/rejected": -1.6466548442840576, + "logps/chosen": -723.037353515625, + "logps/rejected": -2031.2525634765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.463128089904785, + "rewards/margins": 13.410491943359375, + "rewards/rejected": -19.873619079589844, + "step": 12900 + }, + { + "epoch": 0.77, + "learning_rate": 4.6356526427289475e-06, + "logits/chosen": -2.4286553859710693, + "logits/rejected": -1.7335498332977295, + "logps/chosen": -686.3788452148438, + "logps/rejected": -2090.59716796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.211666584014893, + "rewards/margins": 14.249481201171875, + "rewards/rejected": -20.46114730834961, + "step": 12910 + }, + { + "epoch": 0.77, + "learning_rate": 4.634750419964688e-06, + "logits/chosen": -2.47924542427063, + "logits/rejected": -1.8286685943603516, + "logps/chosen": -726.84375, + "logps/rejected": -1994.590087890625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.526707649230957, + "rewards/margins": 12.952032089233398, + "rewards/rejected": -19.478742599487305, + "step": 12920 + }, + { + "epoch": 0.77, + "learning_rate": 4.6338471695316046e-06, + "logits/chosen": -2.4589309692382812, + "logits/rejected": -1.7801307439804077, + "logps/chosen": -603.8506469726562, + "logps/rejected": -1798.8050537109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.326873779296875, + "rewards/margins": 12.226424217224121, + "rewards/rejected": -17.553295135498047, + "step": 12930 + }, + { + "epoch": 0.77, + "learning_rate": 4.632942891864521e-06, + "logits/chosen": -2.458827495574951, + "logits/rejected": -1.6945905685424805, + "logps/chosen": -564.5950317382812, + "logps/rejected": -1820.890380859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.893847465515137, + "rewards/margins": 12.871683120727539, + "rewards/rejected": -17.76552963256836, + "step": 12940 + }, + { + "epoch": 0.77, + "learning_rate": 4.632037587398756e-06, + "logits/chosen": -2.4454641342163086, + "logits/rejected": -1.8389196395874023, + "logps/chosen": -577.25439453125, + "logps/rejected": -1757.6900634765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.082682132720947, + "rewards/margins": 12.047616958618164, + "rewards/rejected": -17.13029670715332, + "step": 12950 + }, + { + "epoch": 0.77, + "learning_rate": 4.631131256570124e-06, + "logits/chosen": -2.451078414916992, + "logits/rejected": -1.8245093822479248, + "logps/chosen": -573.1531982421875, + "logps/rejected": -1724.548583984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.994175910949707, + "rewards/margins": 11.821687698364258, + "rewards/rejected": -16.815860748291016, + "step": 12960 + }, + { + "epoch": 0.77, + "learning_rate": 4.630223899814932e-06, + "logits/chosen": -2.460547685623169, + "logits/rejected": -1.8444846868515015, + "logps/chosen": -576.9832763671875, + "logps/rejected": -1713.3424072265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.093632698059082, + "rewards/margins": 11.613265037536621, + "rewards/rejected": -16.706897735595703, + "step": 12970 + }, + { + "epoch": 0.77, + "learning_rate": 4.629315517569981e-06, + "logits/chosen": -2.4558472633361816, + "logits/rejected": -1.8126327991485596, + "logps/chosen": -568.3345947265625, + "logps/rejected": -1732.278564453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.921177864074707, + "rewards/margins": 11.957035064697266, + "rewards/rejected": -16.878211975097656, + "step": 12980 + }, + { + "epoch": 0.77, + "learning_rate": 4.628406110272568e-06, + "logits/chosen": -2.464669704437256, + "logits/rejected": -1.8461921215057373, + "logps/chosen": -589.8338623046875, + "logps/rejected": -1750.888427734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.174051284790039, + "rewards/margins": 11.89987564086914, + "rewards/rejected": -17.07392692565918, + "step": 12990 + }, + { + "epoch": 0.78, + "learning_rate": 4.627495678360481e-06, + "logits/chosen": -2.3948283195495605, + "logits/rejected": -1.7411584854125977, + "logps/chosen": -592.998291015625, + "logps/rejected": -1815.320556640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.191784858703613, + "rewards/margins": 12.527776718139648, + "rewards/rejected": -17.719560623168945, + "step": 13000 + }, + { + "epoch": 0.78, + "learning_rate": 4.6265842222720005e-06, + "logits/chosen": -2.453845500946045, + "logits/rejected": -1.7896039485931396, + "logps/chosen": -561.2005615234375, + "logps/rejected": -1757.403076171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8646392822265625, + "rewards/margins": 12.255282402038574, + "rewards/rejected": -17.11992073059082, + "step": 13010 + }, + { + "epoch": 0.78, + "learning_rate": 4.625671742445903e-06, + "logits/chosen": -2.4051451683044434, + "logits/rejected": -1.6979564428329468, + "logps/chosen": -582.1697998046875, + "logps/rejected": -1678.115478515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.190701007843018, + "rewards/margins": 11.167378425598145, + "rewards/rejected": -16.35807991027832, + "step": 13020 + }, + { + "epoch": 0.78, + "learning_rate": 4.624758239321456e-06, + "logits/chosen": -2.4728822708129883, + "logits/rejected": -1.7739136219024658, + "logps/chosen": -571.895751953125, + "logps/rejected": -1766.8082275390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.045773506164551, + "rewards/margins": 12.177142143249512, + "rewards/rejected": -17.222915649414062, + "step": 13030 + }, + { + "epoch": 0.78, + "learning_rate": 4.6238437133384195e-06, + "logits/chosen": -2.4388065338134766, + "logits/rejected": -1.7903282642364502, + "logps/chosen": -574.7263793945312, + "logps/rejected": -1727.135986328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.073938846588135, + "rewards/margins": 11.762436866760254, + "rewards/rejected": -16.836376190185547, + "step": 13040 + }, + { + "epoch": 0.78, + "learning_rate": 4.622928164937046e-06, + "logits/chosen": -2.4515016078948975, + "logits/rejected": -1.8675768375396729, + "logps/chosen": -567.2552490234375, + "logps/rejected": -1751.1439208984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.005125522613525, + "rewards/margins": 12.05770206451416, + "rewards/rejected": -17.06282615661621, + "step": 13050 + }, + { + "epoch": 0.78, + "learning_rate": 4.6220115945580815e-06, + "logits/chosen": -2.405008316040039, + "logits/rejected": -1.6074386835098267, + "logps/chosen": -592.607177734375, + "logps/rejected": -1793.6474609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.264058589935303, + "rewards/margins": 12.233983039855957, + "rewards/rejected": -17.4980411529541, + "step": 13060 + }, + { + "epoch": 0.78, + "learning_rate": 4.621094002642762e-06, + "logits/chosen": -2.4726433753967285, + "logits/rejected": -1.8763777017593384, + "logps/chosen": -590.9024047851562, + "logps/rejected": -1774.794921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.229938983917236, + "rewards/margins": 12.083396911621094, + "rewards/rejected": -17.313335418701172, + "step": 13070 + }, + { + "epoch": 0.78, + "learning_rate": 4.620175389632817e-06, + "logits/chosen": -2.465994358062744, + "logits/rejected": -1.7633079290390015, + "logps/chosen": -600.75830078125, + "logps/rejected": -1762.3935546875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2517218589782715, + "rewards/margins": 11.929886817932129, + "rewards/rejected": -17.181610107421875, + "step": 13080 + }, + { + "epoch": 0.78, + "learning_rate": 4.6192557559704665e-06, + "logits/chosen": -2.4412994384765625, + "logits/rejected": -1.8714603185653687, + "logps/chosen": -606.8112182617188, + "logps/rejected": -1840.524169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.378857612609863, + "rewards/margins": 12.580391883850098, + "rewards/rejected": -17.959247589111328, + "step": 13090 + }, + { + "epoch": 0.78, + "learning_rate": 4.618335102098423e-06, + "logits/chosen": -2.405137062072754, + "logits/rejected": -1.782088041305542, + "logps/chosen": -595.5103759765625, + "logps/rejected": -1803.210205078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.252084732055664, + "rewards/margins": 12.346833229064941, + "rewards/rejected": -17.598918914794922, + "step": 13100 + }, + { + "epoch": 0.78, + "learning_rate": 4.617413428459887e-06, + "logits/chosen": -2.4665191173553467, + "logits/rejected": -1.825425386428833, + "logps/chosen": -584.7190551757812, + "logps/rejected": -1723.001220703125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.101689338684082, + "rewards/margins": 11.686923027038574, + "rewards/rejected": -16.788612365722656, + "step": 13110 + }, + { + "epoch": 0.78, + "learning_rate": 4.616490735498553e-06, + "logits/chosen": -2.454895496368408, + "logits/rejected": -1.8368043899536133, + "logps/chosen": -594.46240234375, + "logps/rejected": -1839.5472412109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.244992256164551, + "rewards/margins": 12.719340324401855, + "rewards/rejected": -17.964332580566406, + "step": 13120 + }, + { + "epoch": 0.78, + "learning_rate": 4.615567023658608e-06, + "logits/chosen": -2.4760375022888184, + "logits/rejected": -1.726867437362671, + "logps/chosen": -603.3980712890625, + "logps/rejected": -1829.9580078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.255461692810059, + "rewards/margins": 12.613761901855469, + "rewards/rejected": -17.86922264099121, + "step": 13130 + }, + { + "epoch": 0.78, + "learning_rate": 4.614642293384724e-06, + "logits/chosen": -2.4404895305633545, + "logits/rejected": -1.7053171396255493, + "logps/chosen": -629.5340576171875, + "logps/rejected": -1810.9293212890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5993332862854, + "rewards/margins": 12.072461128234863, + "rewards/rejected": -17.671794891357422, + "step": 13140 + }, + { + "epoch": 0.78, + "learning_rate": 4.6137165451220665e-06, + "logits/chosen": -2.4110209941864014, + "logits/rejected": -1.7383931875228882, + "logps/chosen": -622.5280151367188, + "logps/rejected": -1749.4029541015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.514034748077393, + "rewards/margins": 11.554815292358398, + "rewards/rejected": -17.068851470947266, + "step": 13150 + }, + { + "epoch": 0.78, + "learning_rate": 4.612789779316291e-06, + "logits/chosen": -2.4960808753967285, + "logits/rejected": -1.836991548538208, + "logps/chosen": -605.98486328125, + "logps/rejected": -1839.8046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.278067111968994, + "rewards/margins": 12.679605484008789, + "rewards/rejected": -17.957672119140625, + "step": 13160 + }, + { + "epoch": 0.79, + "learning_rate": 4.611861996413542e-06, + "logits/chosen": -2.423718214035034, + "logits/rejected": -1.7465299367904663, + "logps/chosen": -621.466796875, + "logps/rejected": -1771.8046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4974565505981445, + "rewards/margins": 11.78736686706543, + "rewards/rejected": -17.28482437133789, + "step": 13170 + }, + { + "epoch": 0.79, + "learning_rate": 4.610933196860457e-06, + "logits/chosen": -2.3798863887786865, + "logits/rejected": -1.6493186950683594, + "logps/chosen": -599.2962646484375, + "logps/rejected": -1847.961181640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.334364891052246, + "rewards/margins": 12.695138931274414, + "rewards/rejected": -18.029504776000977, + "step": 13180 + }, + { + "epoch": 0.79, + "learning_rate": 4.6100033811041565e-06, + "logits/chosen": -2.414393186569214, + "logits/rejected": -1.8339201211929321, + "logps/chosen": -595.1851196289062, + "logps/rejected": -1743.4681396484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.277136325836182, + "rewards/margins": 11.70134449005127, + "rewards/rejected": -16.97848129272461, + "step": 13190 + }, + { + "epoch": 0.79, + "learning_rate": 4.609072549592255e-06, + "logits/chosen": -2.389132261276245, + "logits/rejected": -1.7619049549102783, + "logps/chosen": -596.6653442382812, + "logps/rejected": -1890.408447265625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.292696952819824, + "rewards/margins": 13.168418884277344, + "rewards/rejected": -18.461116790771484, + "step": 13200 + }, + { + "epoch": 0.79, + "learning_rate": 4.608140702772855e-06, + "logits/chosen": -2.4786956310272217, + "logits/rejected": -1.6615040302276611, + "logps/chosen": -603.361572265625, + "logps/rejected": -1771.7935791015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.324849605560303, + "rewards/margins": 11.949841499328613, + "rewards/rejected": -17.27469253540039, + "step": 13210 + }, + { + "epoch": 0.79, + "learning_rate": 4.607207841094549e-06, + "logits/chosen": -2.4047064781188965, + "logits/rejected": -1.6996606588363647, + "logps/chosen": -619.3894653320312, + "logps/rejected": -1866.727294921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.485365867614746, + "rewards/margins": 12.734511375427246, + "rewards/rejected": -18.219879150390625, + "step": 13220 + }, + { + "epoch": 0.79, + "learning_rate": 4.6062739650064135e-06, + "logits/chosen": -2.3952796459198, + "logits/rejected": -1.6980708837509155, + "logps/chosen": -623.1444091796875, + "logps/rejected": -1842.482421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5475754737854, + "rewards/margins": 12.434094429016113, + "rewards/rejected": -17.98166847229004, + "step": 13230 + }, + { + "epoch": 0.79, + "learning_rate": 4.605339074958019e-06, + "logits/chosen": -2.4271304607391357, + "logits/rejected": -1.7121349573135376, + "logps/chosen": -615.9845581054688, + "logps/rejected": -1866.3583984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.454762935638428, + "rewards/margins": 12.755050659179688, + "rewards/rejected": -18.20981216430664, + "step": 13240 + }, + { + "epoch": 0.79, + "learning_rate": 4.604403171399421e-06, + "logits/chosen": -2.356348991394043, + "logits/rejected": -1.6576502323150635, + "logps/chosen": -663.5438842773438, + "logps/rejected": -1759.5296630859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.939993381500244, + "rewards/margins": 11.201715469360352, + "rewards/rejected": -17.141708374023438, + "step": 13250 + }, + { + "epoch": 0.79, + "learning_rate": 4.603466254781162e-06, + "logits/chosen": -2.462064743041992, + "logits/rejected": -1.6400835514068604, + "logps/chosen": -625.1858520507812, + "logps/rejected": -1852.284423828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.513754367828369, + "rewards/margins": 12.567644119262695, + "rewards/rejected": -18.08139991760254, + "step": 13260 + }, + { + "epoch": 0.79, + "learning_rate": 4.602528325554276e-06, + "logits/chosen": -2.400254964828491, + "logits/rejected": -1.604231834411621, + "logps/chosen": -614.3182983398438, + "logps/rejected": -1857.1556396484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3877363204956055, + "rewards/margins": 12.731363296508789, + "rewards/rejected": -18.11910057067871, + "step": 13270 + }, + { + "epoch": 0.79, + "learning_rate": 4.60158938417028e-06, + "logits/chosen": -2.3628792762756348, + "logits/rejected": -1.6673399209976196, + "logps/chosen": -607.3860473632812, + "logps/rejected": -1878.9586181640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.398555755615234, + "rewards/margins": 12.938928604125977, + "rewards/rejected": -18.33748435974121, + "step": 13280 + }, + { + "epoch": 0.79, + "learning_rate": 4.600649431081181e-06, + "logits/chosen": -2.423818349838257, + "logits/rejected": -1.642499566078186, + "logps/chosen": -641.8511962890625, + "logps/rejected": -1842.951904296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.632086753845215, + "rewards/margins": 12.349369049072266, + "rewards/rejected": -17.981456756591797, + "step": 13290 + }, + { + "epoch": 0.79, + "learning_rate": 4.5997084667394735e-06, + "logits/chosen": -2.464672565460205, + "logits/rejected": -1.7541742324829102, + "logps/chosen": -640.5135498046875, + "logps/rejected": -1871.3441162109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.652100563049316, + "rewards/margins": 12.610963821411133, + "rewards/rejected": -18.263065338134766, + "step": 13300 + }, + { + "epoch": 0.79, + "learning_rate": 4.598766491598137e-06, + "logits/chosen": -2.3841357231140137, + "logits/rejected": -1.6965067386627197, + "logps/chosen": -613.6336669921875, + "logps/rejected": -1840.2493896484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.431149482727051, + "rewards/margins": 12.520223617553711, + "rewards/rejected": -17.951372146606445, + "step": 13310 + }, + { + "epoch": 0.79, + "learning_rate": 4.597823506110637e-06, + "logits/chosen": -2.4137747287750244, + "logits/rejected": -1.5727856159210205, + "logps/chosen": -616.3074951171875, + "logps/rejected": -1884.9310302734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.423491477966309, + "rewards/margins": 12.982930183410645, + "rewards/rejected": -18.406421661376953, + "step": 13320 + }, + { + "epoch": 0.79, + "learning_rate": 4.596879510730929e-06, + "logits/chosen": -2.445005416870117, + "logits/rejected": -1.7327598333358765, + "logps/chosen": -621.5435791015625, + "logps/rejected": -1862.7578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.557125091552734, + "rewards/margins": 12.627302169799805, + "rewards/rejected": -18.18442726135254, + "step": 13330 + }, + { + "epoch": 0.8, + "learning_rate": 4.595934505913451e-06, + "logits/chosen": -2.4014439582824707, + "logits/rejected": -1.731407880783081, + "logps/chosen": -638.3585205078125, + "logps/rejected": -1837.498291015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6644697189331055, + "rewards/margins": 12.27830696105957, + "rewards/rejected": -17.942779541015625, + "step": 13340 + }, + { + "epoch": 0.8, + "learning_rate": 4.594988492113128e-06, + "logits/chosen": -2.4731392860412598, + "logits/rejected": -1.7836389541625977, + "logps/chosen": -641.7222900390625, + "logps/rejected": -1912.169677734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6823811531066895, + "rewards/margins": 12.978854179382324, + "rewards/rejected": -18.661237716674805, + "step": 13350 + }, + { + "epoch": 0.8, + "learning_rate": 4.594041469785373e-06, + "logits/chosen": -2.4327101707458496, + "logits/rejected": -1.8041969537734985, + "logps/chosen": -631.9505615234375, + "logps/rejected": -1777.7718505859375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.640171527862549, + "rewards/margins": 11.68016242980957, + "rewards/rejected": -17.320335388183594, + "step": 13360 + }, + { + "epoch": 0.8, + "learning_rate": 4.59309343938608e-06, + "logits/chosen": -2.4071383476257324, + "logits/rejected": -1.650277853012085, + "logps/chosen": -602.3390502929688, + "logps/rejected": -1906.5865478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.356301784515381, + "rewards/margins": 13.264506340026855, + "rewards/rejected": -18.62080955505371, + "step": 13370 + }, + { + "epoch": 0.8, + "learning_rate": 4.592144401371632e-06, + "logits/chosen": -2.466334581375122, + "logits/rejected": -1.6765035390853882, + "logps/chosen": -593.9075927734375, + "logps/rejected": -1820.272705078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.228102207183838, + "rewards/margins": 12.530380249023438, + "rewards/rejected": -17.758480072021484, + "step": 13380 + }, + { + "epoch": 0.8, + "learning_rate": 4.591194356198896e-06, + "logits/chosen": -2.3999202251434326, + "logits/rejected": -1.7901241779327393, + "logps/chosen": -626.6542358398438, + "logps/rejected": -1785.316162109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.574634075164795, + "rewards/margins": 11.827539443969727, + "rewards/rejected": -17.40217399597168, + "step": 13390 + }, + { + "epoch": 0.8, + "learning_rate": 4.5902433043252235e-06, + "logits/chosen": -2.4231531620025635, + "logits/rejected": -1.834602952003479, + "logps/chosen": -613.6676025390625, + "logps/rejected": -1806.4488525390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.438195705413818, + "rewards/margins": 12.174294471740723, + "rewards/rejected": -17.61248779296875, + "step": 13400 + }, + { + "epoch": 0.8, + "learning_rate": 4.5892912462084515e-06, + "logits/chosen": -2.4626832008361816, + "logits/rejected": -1.7681621313095093, + "logps/chosen": -607.7245483398438, + "logps/rejected": -1873.8873291015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.371595859527588, + "rewards/margins": 12.90974235534668, + "rewards/rejected": -18.28133773803711, + "step": 13410 + }, + { + "epoch": 0.8, + "learning_rate": 4.5883381823069e-06, + "logits/chosen": -2.4415435791015625, + "logits/rejected": -1.8169586658477783, + "logps/chosen": -598.5447998046875, + "logps/rejected": -1859.2689208984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.275871276855469, + "rewards/margins": 12.872105598449707, + "rewards/rejected": -18.14797592163086, + "step": 13420 + }, + { + "epoch": 0.8, + "learning_rate": 4.5873841130793735e-06, + "logits/chosen": -2.389749526977539, + "logits/rejected": -1.6664625406265259, + "logps/chosen": -617.5665893554688, + "logps/rejected": -1837.826416015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.500380516052246, + "rewards/margins": 12.418254852294922, + "rewards/rejected": -17.918636322021484, + "step": 13430 + }, + { + "epoch": 0.8, + "learning_rate": 4.586429038985163e-06, + "logits/chosen": -2.388308048248291, + "logits/rejected": -1.7213672399520874, + "logps/chosen": -622.2623291015625, + "logps/rejected": -1860.4664306640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.475030422210693, + "rewards/margins": 12.687268257141113, + "rewards/rejected": -18.16229820251465, + "step": 13440 + }, + { + "epoch": 0.8, + "learning_rate": 4.585472960484038e-06, + "logits/chosen": -2.3852028846740723, + "logits/rejected": -1.7284958362579346, + "logps/chosen": -628.537109375, + "logps/rejected": -1871.5087890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.592103958129883, + "rewards/margins": 12.666239738464355, + "rewards/rejected": -18.258344650268555, + "step": 13450 + }, + { + "epoch": 0.8, + "learning_rate": 4.584515878036257e-06, + "logits/chosen": -2.4022648334503174, + "logits/rejected": -1.8131773471832275, + "logps/chosen": -633.6952514648438, + "logps/rejected": -1797.1380615234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.618086338043213, + "rewards/margins": 11.910394668579102, + "rewards/rejected": -17.52848243713379, + "step": 13460 + }, + { + "epoch": 0.8, + "learning_rate": 4.583557792102559e-06, + "logits/chosen": -2.4522290229797363, + "logits/rejected": -1.815807580947876, + "logps/chosen": -615.5139770507812, + "logps/rejected": -1824.384765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.408617973327637, + "rewards/margins": 12.391364097595215, + "rewards/rejected": -17.79998207092285, + "step": 13470 + }, + { + "epoch": 0.8, + "learning_rate": 4.5825987031441676e-06, + "logits/chosen": -2.483424425125122, + "logits/rejected": -1.8039897680282593, + "logps/chosen": -594.4586181640625, + "logps/rejected": -1835.971435546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.305274963378906, + "rewards/margins": 12.610057830810547, + "rewards/rejected": -17.915332794189453, + "step": 13480 + }, + { + "epoch": 0.8, + "learning_rate": 4.581638611622786e-06, + "logits/chosen": -2.414496421813965, + "logits/rejected": -1.670935034751892, + "logps/chosen": -638.189697265625, + "logps/rejected": -1880.693359375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.610324382781982, + "rewards/margins": 12.759140968322754, + "rewards/rejected": -18.369464874267578, + "step": 13490 + }, + { + "epoch": 0.81, + "learning_rate": 4.580677518000604e-06, + "logits/chosen": -2.4547572135925293, + "logits/rejected": -1.5903236865997314, + "logps/chosen": -617.0198974609375, + "logps/rejected": -1852.659423828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.405229091644287, + "rewards/margins": 12.679719924926758, + "rewards/rejected": -18.084949493408203, + "step": 13500 + }, + { + "epoch": 0.81, + "learning_rate": 4.5797154227402905e-06, + "logits/chosen": -2.383563756942749, + "logits/rejected": -1.6369588375091553, + "logps/chosen": -660.883544921875, + "logps/rejected": -1817.0703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.913346767425537, + "rewards/margins": 11.809854507446289, + "rewards/rejected": -17.72319984436035, + "step": 13510 + }, + { + "epoch": 0.81, + "learning_rate": 4.578752326304999e-06, + "logits/chosen": -2.407721757888794, + "logits/rejected": -1.676630973815918, + "logps/chosen": -652.0762939453125, + "logps/rejected": -1967.1978759765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.746611595153809, + "rewards/margins": 13.4640474319458, + "rewards/rejected": -19.210657119750977, + "step": 13520 + }, + { + "epoch": 0.81, + "learning_rate": 4.577788229158364e-06, + "logits/chosen": -2.4500021934509277, + "logits/rejected": -1.7158219814300537, + "logps/chosen": -650.6060180664062, + "logps/rejected": -1901.4908447265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.758397102355957, + "rewards/margins": 12.81358814239502, + "rewards/rejected": -18.571985244750977, + "step": 13530 + }, + { + "epoch": 0.81, + "learning_rate": 4.576823131764503e-06, + "logits/chosen": -2.4282238483428955, + "logits/rejected": -1.6447480916976929, + "logps/chosen": -641.8367309570312, + "logps/rejected": -2010.24609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6776628494262695, + "rewards/margins": 13.9769287109375, + "rewards/rejected": -19.654590606689453, + "step": 13540 + }, + { + "epoch": 0.81, + "learning_rate": 4.5758570345880114e-06, + "logits/chosen": -2.419501304626465, + "logits/rejected": -1.6665074825286865, + "logps/chosen": -629.7803955078125, + "logps/rejected": -1949.514892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.596158027648926, + "rewards/margins": 13.454187393188477, + "rewards/rejected": -19.05034828186035, + "step": 13550 + }, + { + "epoch": 0.81, + "learning_rate": 4.574889938093971e-06, + "logits/chosen": -2.3370742797851562, + "logits/rejected": -1.6412550210952759, + "logps/chosen": -625.3291015625, + "logps/rejected": -1952.061279296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.55403995513916, + "rewards/margins": 13.522100448608398, + "rewards/rejected": -19.076141357421875, + "step": 13560 + }, + { + "epoch": 0.81, + "learning_rate": 4.57392184274794e-06, + "logits/chosen": -2.4499053955078125, + "logits/rejected": -1.6618671417236328, + "logps/chosen": -661.5618896484375, + "logps/rejected": -1918.260986328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.936981201171875, + "rewards/margins": 12.797301292419434, + "rewards/rejected": -18.734281539916992, + "step": 13570 + }, + { + "epoch": 0.81, + "learning_rate": 4.572952749015961e-06, + "logits/chosen": -2.427934169769287, + "logits/rejected": -1.66521418094635, + "logps/chosen": -668.9550170898438, + "logps/rejected": -1915.7760009765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.99800968170166, + "rewards/margins": 12.720717430114746, + "rewards/rejected": -18.71872901916504, + "step": 13580 + }, + { + "epoch": 0.81, + "learning_rate": 4.571982657364555e-06, + "logits/chosen": -2.3979573249816895, + "logits/rejected": -1.6604499816894531, + "logps/chosen": -653.4246215820312, + "logps/rejected": -1888.0172119140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.884744167327881, + "rewards/margins": 12.553996086120605, + "rewards/rejected": -18.438739776611328, + "step": 13590 + }, + { + "epoch": 0.81, + "learning_rate": 4.571011568260724e-06, + "logits/chosen": -2.4509902000427246, + "logits/rejected": -1.7922443151474, + "logps/chosen": -616.5641479492188, + "logps/rejected": -1913.6470947265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.470488548278809, + "rewards/margins": 13.212881088256836, + "rewards/rejected": -18.68337059020996, + "step": 13600 + }, + { + "epoch": 0.81, + "learning_rate": 4.57003948217195e-06, + "logits/chosen": -2.500159740447998, + "logits/rejected": -1.7359256744384766, + "logps/chosen": -631.2503051757812, + "logps/rejected": -1915.6015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.608972072601318, + "rewards/margins": 13.103962898254395, + "rewards/rejected": -18.712934494018555, + "step": 13610 + }, + { + "epoch": 0.81, + "learning_rate": 4.569066399566196e-06, + "logits/chosen": -2.418944835662842, + "logits/rejected": -1.6166565418243408, + "logps/chosen": -625.7158813476562, + "logps/rejected": -1872.2064208984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5152812004089355, + "rewards/margins": 12.760553359985352, + "rewards/rejected": -18.275836944580078, + "step": 13620 + }, + { + "epoch": 0.81, + "learning_rate": 4.568092320911904e-06, + "logits/chosen": -2.454049587249756, + "logits/rejected": -1.643715500831604, + "logps/chosen": -621.3746337890625, + "logps/rejected": -1872.5845947265625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5139970779418945, + "rewards/margins": 12.756540298461914, + "rewards/rejected": -18.270540237426758, + "step": 13630 + }, + { + "epoch": 0.81, + "learning_rate": 4.567117246677996e-06, + "logits/chosen": -2.404327869415283, + "logits/rejected": -1.857814073562622, + "logps/chosen": -539.983642578125, + "logps/rejected": -1752.614013671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7680864334106445, + "rewards/margins": 12.320699691772461, + "rewards/rejected": -17.088787078857422, + "step": 13640 + }, + { + "epoch": 0.81, + "learning_rate": 4.566141177333871e-06, + "logits/chosen": -2.4038095474243164, + "logits/rejected": -1.8999652862548828, + "logps/chosen": -504.31951904296875, + "logps/rejected": -1718.064208984375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.351359844207764, + "rewards/margins": 12.37978458404541, + "rewards/rejected": -16.731143951416016, + "step": 13650 + }, + { + "epoch": 0.81, + "learning_rate": 4.565164113349408e-06, + "logits/chosen": -2.4718985557556152, + "logits/rejected": -1.8220832347869873, + "logps/chosen": -502.2413024902344, + "logps/rejected": -1780.425048828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.338966369628906, + "rewards/margins": 13.014968872070312, + "rewards/rejected": -17.35393524169922, + "step": 13660 + }, + { + "epoch": 0.82, + "learning_rate": 4.564186055194969e-06, + "logits/chosen": -2.529615879058838, + "logits/rejected": -1.931857705116272, + "logps/chosen": -503.52545166015625, + "logps/rejected": -1760.316650390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.331194877624512, + "rewards/margins": 12.832008361816406, + "rewards/rejected": -17.163204193115234, + "step": 13670 + }, + { + "epoch": 0.82, + "learning_rate": 4.563207003341389e-06, + "logits/chosen": -2.446240186691284, + "logits/rejected": -1.8308908939361572, + "logps/chosen": -510.70941162109375, + "logps/rejected": -1766.8978271484375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3748579025268555, + "rewards/margins": 12.848179817199707, + "rewards/rejected": -17.223037719726562, + "step": 13680 + }, + { + "epoch": 0.82, + "learning_rate": 4.562226958259983e-06, + "logits/chosen": -2.4724488258361816, + "logits/rejected": -1.7043960094451904, + "logps/chosen": -541.8663330078125, + "logps/rejected": -1813.8958740234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.777680397033691, + "rewards/margins": 12.899312019348145, + "rewards/rejected": -17.676992416381836, + "step": 13690 + }, + { + "epoch": 0.82, + "learning_rate": 4.561245920422547e-06, + "logits/chosen": -2.460123062133789, + "logits/rejected": -1.7798267602920532, + "logps/chosen": -531.2394409179688, + "logps/rejected": -1883.0654296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.615455627441406, + "rewards/margins": 13.782255172729492, + "rewards/rejected": -18.397708892822266, + "step": 13700 + }, + { + "epoch": 0.82, + "learning_rate": 4.56026389030135e-06, + "logits/chosen": -2.3898935317993164, + "logits/rejected": -1.6587419509887695, + "logps/chosen": -553.6705932617188, + "logps/rejected": -1842.6373291015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.886819839477539, + "rewards/margins": 13.097335815429688, + "rewards/rejected": -17.98415756225586, + "step": 13710 + }, + { + "epoch": 0.82, + "learning_rate": 4.559280868369143e-06, + "logits/chosen": -2.4464125633239746, + "logits/rejected": -1.7879283428192139, + "logps/chosen": -545.4829711914062, + "logps/rejected": -1811.0784912109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.743433952331543, + "rewards/margins": 12.925924301147461, + "rewards/rejected": -17.669357299804688, + "step": 13720 + }, + { + "epoch": 0.82, + "learning_rate": 4.558296855099152e-06, + "logits/chosen": -2.4647459983825684, + "logits/rejected": -1.6723673343658447, + "logps/chosen": -569.4472045898438, + "logps/rejected": -1802.1107177734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.924517631530762, + "rewards/margins": 12.65803050994873, + "rewards/rejected": -17.582550048828125, + "step": 13730 + }, + { + "epoch": 0.82, + "learning_rate": 4.557311850965081e-06, + "logits/chosen": -2.423835277557373, + "logits/rejected": -1.7188637256622314, + "logps/chosen": -544.5731201171875, + "logps/rejected": -1907.7249755859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.698594093322754, + "rewards/margins": 13.938451766967773, + "rewards/rejected": -18.637046813964844, + "step": 13740 + }, + { + "epoch": 0.82, + "learning_rate": 4.556325856441112e-06, + "logits/chosen": -2.424835205078125, + "logits/rejected": -1.6510553359985352, + "logps/chosen": -596.4552001953125, + "logps/rejected": -1903.4508056640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.246728420257568, + "rewards/margins": 13.337529182434082, + "rewards/rejected": -18.58425521850586, + "step": 13750 + }, + { + "epoch": 0.82, + "learning_rate": 4.555338872001901e-06, + "logits/chosen": -2.3983755111694336, + "logits/rejected": -1.7141603231430054, + "logps/chosen": -582.6017456054688, + "logps/rejected": -1926.0054931640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.143181800842285, + "rewards/margins": 13.682185173034668, + "rewards/rejected": -18.825368881225586, + "step": 13760 + }, + { + "epoch": 0.82, + "learning_rate": 4.554350898122585e-06, + "logits/chosen": -2.3917384147644043, + "logits/rejected": -1.6495091915130615, + "logps/chosen": -571.1302490234375, + "logps/rejected": -1917.117919921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0733513832092285, + "rewards/margins": 13.656408309936523, + "rewards/rejected": -18.729759216308594, + "step": 13770 + }, + { + "epoch": 0.82, + "learning_rate": 4.55336193527877e-06, + "logits/chosen": -2.4065606594085693, + "logits/rejected": -1.8317371606826782, + "logps/chosen": -576.1359252929688, + "logps/rejected": -1871.4671630859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0814313888549805, + "rewards/margins": 13.18761920928955, + "rewards/rejected": -18.26905059814453, + "step": 13780 + }, + { + "epoch": 0.82, + "learning_rate": 4.552371983946548e-06, + "logits/chosen": -2.4431629180908203, + "logits/rejected": -1.7209899425506592, + "logps/chosen": -573.8416748046875, + "logps/rejected": -1943.122802734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.058442115783691, + "rewards/margins": 13.929224014282227, + "rewards/rejected": -18.987667083740234, + "step": 13790 + }, + { + "epoch": 0.82, + "learning_rate": 4.551381044602478e-06, + "logits/chosen": -2.3891704082489014, + "logits/rejected": -1.60659921169281, + "logps/chosen": -577.2105712890625, + "logps/rejected": -1914.9407958984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.025047779083252, + "rewards/margins": 13.676836013793945, + "rewards/rejected": -18.701885223388672, + "step": 13800 + }, + { + "epoch": 0.82, + "learning_rate": 4.550389117723599e-06, + "logits/chosen": -2.4442298412323, + "logits/rejected": -1.7970117330551147, + "logps/chosen": -593.9210205078125, + "logps/rejected": -1872.033935546875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.251876354217529, + "rewards/margins": 13.035659790039062, + "rewards/rejected": -18.28753662109375, + "step": 13810 + }, + { + "epoch": 0.82, + "learning_rate": 4.549396203787426e-06, + "logits/chosen": -2.4190969467163086, + "logits/rejected": -1.8042036294937134, + "logps/chosen": -598.8755493164062, + "logps/rejected": -1944.312744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.289017677307129, + "rewards/margins": 13.703651428222656, + "rewards/rejected": -18.9926700592041, + "step": 13820 + }, + { + "epoch": 0.82, + "learning_rate": 4.548402303271946e-06, + "logits/chosen": -2.423780679702759, + "logits/rejected": -1.771847128868103, + "logps/chosen": -582.0973510742188, + "logps/rejected": -1880.2119140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.046879768371582, + "rewards/margins": 13.308856010437012, + "rewards/rejected": -18.355737686157227, + "step": 13830 + }, + { + "epoch": 0.83, + "learning_rate": 4.5474074166556235e-06, + "logits/chosen": -2.461277723312378, + "logits/rejected": -1.7966480255126953, + "logps/chosen": -587.2761840820312, + "logps/rejected": -1938.2349853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.149739742279053, + "rewards/margins": 13.788665771484375, + "rewards/rejected": -18.938404083251953, + "step": 13840 + }, + { + "epoch": 0.83, + "learning_rate": 4.546411544417396e-06, + "logits/chosen": -2.4273252487182617, + "logits/rejected": -1.6373345851898193, + "logps/chosen": -594.5912475585938, + "logps/rejected": -1965.911865234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.274791717529297, + "rewards/margins": 13.934051513671875, + "rewards/rejected": -19.208843231201172, + "step": 13850 + }, + { + "epoch": 0.83, + "learning_rate": 4.5454146870366775e-06, + "logits/chosen": -2.4307961463928223, + "logits/rejected": -1.6027278900146484, + "logps/chosen": -591.7764892578125, + "logps/rejected": -1921.392333984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.229638576507568, + "rewards/margins": 13.551918029785156, + "rewards/rejected": -18.78155517578125, + "step": 13860 + }, + { + "epoch": 0.83, + "learning_rate": 4.544416844993355e-06, + "logits/chosen": -2.4531776905059814, + "logits/rejected": -1.7639774084091187, + "logps/chosen": -573.3653564453125, + "logps/rejected": -1995.612548828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.060533046722412, + "rewards/margins": 14.454622268676758, + "rewards/rejected": -19.515155792236328, + "step": 13870 + }, + { + "epoch": 0.83, + "learning_rate": 4.543418018767789e-06, + "logits/chosen": -2.4480252265930176, + "logits/rejected": -1.7337665557861328, + "logps/chosen": -578.6314086914062, + "logps/rejected": -1904.42578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.021125316619873, + "rewards/margins": 13.592120170593262, + "rewards/rejected": -18.61324691772461, + "step": 13880 + }, + { + "epoch": 0.83, + "learning_rate": 4.542418208840816e-06, + "logits/chosen": -2.4045357704162598, + "logits/rejected": -1.5227367877960205, + "logps/chosen": -613.4718017578125, + "logps/rejected": -1968.5924072265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3920793533325195, + "rewards/margins": 13.836689949035645, + "rewards/rejected": -19.228771209716797, + "step": 13890 + }, + { + "epoch": 0.83, + "learning_rate": 4.541417415693742e-06, + "logits/chosen": -2.425811767578125, + "logits/rejected": -1.680198311805725, + "logps/chosen": -588.3634643554688, + "logps/rejected": -1921.6409912109375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.24765682220459, + "rewards/margins": 13.525156021118164, + "rewards/rejected": -18.772811889648438, + "step": 13900 + }, + { + "epoch": 0.83, + "learning_rate": 4.54041563980835e-06, + "logits/chosen": -2.3724114894866943, + "logits/rejected": -1.6206023693084717, + "logps/chosen": -644.112060546875, + "logps/rejected": -1916.4888916015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7505035400390625, + "rewards/margins": 12.978302001953125, + "rewards/rejected": -18.728805541992188, + "step": 13910 + }, + { + "epoch": 0.83, + "learning_rate": 4.539412881666896e-06, + "logits/chosen": -2.396564245223999, + "logits/rejected": -1.6586217880249023, + "logps/chosen": -647.4867553710938, + "logps/rejected": -1937.4078369140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.70162296295166, + "rewards/margins": 13.226404190063477, + "rewards/rejected": -18.928028106689453, + "step": 13920 + }, + { + "epoch": 0.83, + "learning_rate": 4.538409141752106e-06, + "logits/chosen": -2.4051010608673096, + "logits/rejected": -1.5813727378845215, + "logps/chosen": -643.4798583984375, + "logps/rejected": -2030.5433349609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.699530124664307, + "rewards/margins": 14.15778923034668, + "rewards/rejected": -19.857318878173828, + "step": 13930 + }, + { + "epoch": 0.83, + "learning_rate": 4.537404420547181e-06, + "logits/chosen": -2.370701313018799, + "logits/rejected": -1.6812622547149658, + "logps/chosen": -602.7958374023438, + "logps/rejected": -1994.7490234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.31768274307251, + "rewards/margins": 14.187101364135742, + "rewards/rejected": -19.50478744506836, + "step": 13940 + }, + { + "epoch": 0.83, + "learning_rate": 4.536398718535795e-06, + "logits/chosen": -2.4247422218322754, + "logits/rejected": -1.703731894493103, + "logps/chosen": -607.4510498046875, + "logps/rejected": -1958.0416259765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.420541763305664, + "rewards/margins": 13.711987495422363, + "rewards/rejected": -19.13252830505371, + "step": 13950 + }, + { + "epoch": 0.83, + "learning_rate": 4.535392036202091e-06, + "logits/chosen": -2.4002623558044434, + "logits/rejected": -1.6765400171279907, + "logps/chosen": -632.9725341796875, + "logps/rejected": -2007.510986328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.653136253356934, + "rewards/margins": 13.965818405151367, + "rewards/rejected": -19.618953704833984, + "step": 13960 + }, + { + "epoch": 0.83, + "learning_rate": 4.534384374030687e-06, + "logits/chosen": -2.3875136375427246, + "logits/rejected": -1.639901876449585, + "logps/chosen": -646.0687255859375, + "logps/rejected": -1954.979736328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.74053955078125, + "rewards/margins": 13.352561950683594, + "rewards/rejected": -19.093103408813477, + "step": 13970 + }, + { + "epoch": 0.83, + "learning_rate": 4.5333757325066715e-06, + "logits/chosen": -2.425746202468872, + "logits/rejected": -1.7002439498901367, + "logps/chosen": -614.8092041015625, + "logps/rejected": -1956.463623046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.447579860687256, + "rewards/margins": 13.67480182647705, + "rewards/rejected": -19.12238121032715, + "step": 13980 + }, + { + "epoch": 0.83, + "learning_rate": 4.532366112115604e-06, + "logits/chosen": -2.403960943222046, + "logits/rejected": -1.6453050374984741, + "logps/chosen": -649.8294067382812, + "logps/rejected": -1998.9954833984375, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.745020389556885, + "rewards/margins": 13.798690795898438, + "rewards/rejected": -19.543710708618164, + "step": 13990 + }, + { + "epoch": 0.83, + "learning_rate": 4.531355513343516e-06, + "logits/chosen": -2.419039249420166, + "logits/rejected": -1.6722631454467773, + "logps/chosen": -675.5302734375, + "logps/rejected": -1872.0400390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0948567390441895, + "rewards/margins": 12.183938980102539, + "rewards/rejected": -18.27879524230957, + "step": 14000 + }, + { + "epoch": 0.83, + "eval_logits/chosen": -2.3449742794036865, + "eval_logits/rejected": -1.9434857368469238, + "eval_logps/chosen": -714.5291748046875, + "eval_logps/rejected": -1741.564697265625, + "eval_loss": 0.00017542393470648676, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": -6.474546432495117, + "eval_rewards/margins": 10.485617637634277, + "eval_rewards/rejected": -16.960163116455078, + "eval_runtime": 3.9012, + "eval_samples_per_second": 1.282, + "eval_steps_per_second": 0.256, + "step": 14000 + }, + { + "epoch": 0.84, + "learning_rate": 4.5303439366769095e-06, + "logits/chosen": -2.4042086601257324, + "logits/rejected": -1.5947492122650146, + "logps/chosen": -691.4254760742188, + "logps/rejected": -1894.690185546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.196560859680176, + "rewards/margins": 12.299505233764648, + "rewards/rejected": -18.49606704711914, + "step": 14010 + }, + { + "epoch": 0.84, + "learning_rate": 4.5293313826027585e-06, + "logits/chosen": -2.4124045372009277, + "logits/rejected": -1.6870477199554443, + "logps/chosen": -729.6375732421875, + "logps/rejected": -1885.588134765625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.579291343688965, + "rewards/margins": 11.834716796875, + "rewards/rejected": -18.41400909423828, + "step": 14020 + }, + { + "epoch": 0.84, + "learning_rate": 4.528317851608506e-06, + "logits/chosen": -2.406155586242676, + "logits/rejected": -1.576483964920044, + "logps/chosen": -656.1241455078125, + "logps/rejected": -1914.6617431640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.822575569152832, + "rewards/margins": 12.88447380065918, + "rewards/rejected": -18.707050323486328, + "step": 14030 + }, + { + "epoch": 0.84, + "learning_rate": 4.527303344182065e-06, + "logits/chosen": -2.395690441131592, + "logits/rejected": -1.6856533288955688, + "logps/chosen": -696.9719848632812, + "logps/rejected": -1963.2923583984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.270760536193848, + "rewards/margins": 12.91821002960205, + "rewards/rejected": -19.1889705657959, + "step": 14040 + }, + { + "epoch": 0.84, + "learning_rate": 4.526287860811819e-06, + "logits/chosen": -2.4405322074890137, + "logits/rejected": -1.7815732955932617, + "logps/chosen": -649.720703125, + "logps/rejected": -1961.778564453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.832555294036865, + "rewards/margins": 13.345240592956543, + "rewards/rejected": -19.17779541015625, + "step": 14050 + }, + { + "epoch": 0.84, + "learning_rate": 4.5252714019866236e-06, + "logits/chosen": -2.4079034328460693, + "logits/rejected": -1.780470609664917, + "logps/chosen": -628.1253051757812, + "logps/rejected": -1933.053955078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.592816352844238, + "rewards/margins": 13.282320976257324, + "rewards/rejected": -18.875139236450195, + "step": 14060 + }, + { + "epoch": 0.84, + "learning_rate": 4.524253968195802e-06, + "logits/chosen": -2.4612488746643066, + "logits/rejected": -1.8192218542099, + "logps/chosen": -690.241943359375, + "logps/rejected": -1987.998779296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.166144371032715, + "rewards/margins": 13.270708084106445, + "rewards/rejected": -19.436853408813477, + "step": 14070 + }, + { + "epoch": 0.84, + "learning_rate": 4.523235559929144e-06, + "logits/chosen": -2.426069736480713, + "logits/rejected": -1.6528816223144531, + "logps/chosen": -660.6192016601562, + "logps/rejected": -1940.8525390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.893461227416992, + "rewards/margins": 13.068522453308105, + "rewards/rejected": -18.961986541748047, + "step": 14080 + }, + { + "epoch": 0.84, + "learning_rate": 4.522216177676915e-06, + "logits/chosen": -2.448185682296753, + "logits/rejected": -1.7710158824920654, + "logps/chosen": -629.293701171875, + "logps/rejected": -1912.7255859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.574524879455566, + "rewards/margins": 13.108548164367676, + "rewards/rejected": -18.683073043823242, + "step": 14090 + }, + { + "epoch": 0.84, + "learning_rate": 4.521195821929843e-06, + "logits/chosen": -2.4017856121063232, + "logits/rejected": -1.737876534461975, + "logps/chosen": -669.6171264648438, + "logps/rejected": -1929.277587890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.001690864562988, + "rewards/margins": 12.848731994628906, + "rewards/rejected": -18.850425720214844, + "step": 14100 + }, + { + "epoch": 0.84, + "learning_rate": 4.520174493179128e-06, + "logits/chosen": -2.402163028717041, + "logits/rejected": -1.6545509099960327, + "logps/chosen": -671.6890258789062, + "logps/rejected": -1938.7623291015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0515875816345215, + "rewards/margins": 12.8888521194458, + "rewards/rejected": -18.940441131591797, + "step": 14110 + }, + { + "epoch": 0.84, + "learning_rate": 4.519152191916439e-06, + "logits/chosen": -2.4294991493225098, + "logits/rejected": -1.7472988367080688, + "logps/chosen": -666.3199462890625, + "logps/rejected": -1979.262451171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.919328689575195, + "rewards/margins": 13.429830551147461, + "rewards/rejected": -19.349159240722656, + "step": 14120 + }, + { + "epoch": 0.84, + "learning_rate": 4.5181289186339085e-06, + "logits/chosen": -2.417160749435425, + "logits/rejected": -1.7449824810028076, + "logps/chosen": -665.8554077148438, + "logps/rejected": -1942.4447021484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.910078525543213, + "rewards/margins": 13.063494682312012, + "rewards/rejected": -18.97357177734375, + "step": 14130 + }, + { + "epoch": 0.84, + "learning_rate": 4.517104673824143e-06, + "logits/chosen": -2.448392152786255, + "logits/rejected": -1.7964239120483398, + "logps/chosen": -680.8096313476562, + "logps/rejected": -1908.4683837890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.077609062194824, + "rewards/margins": 12.548169136047363, + "rewards/rejected": -18.625782012939453, + "step": 14140 + }, + { + "epoch": 0.84, + "learning_rate": 4.516079457980215e-06, + "logits/chosen": -2.4221012592315674, + "logits/rejected": -1.7467788457870483, + "logps/chosen": -676.0969848632812, + "logps/rejected": -1894.0784912109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.044197082519531, + "rewards/margins": 12.44613265991211, + "rewards/rejected": -18.49032974243164, + "step": 14150 + }, + { + "epoch": 0.84, + "learning_rate": 4.51505327159566e-06, + "logits/chosen": -2.4501914978027344, + "logits/rejected": -1.8106451034545898, + "logps/chosen": -686.16796875, + "logps/rejected": -1863.0517578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.146075248718262, + "rewards/margins": 12.041644096374512, + "rewards/rejected": -18.18771743774414, + "step": 14160 + }, + { + "epoch": 0.84, + "learning_rate": 4.514026115164486e-06, + "logits/chosen": -2.385087728500366, + "logits/rejected": -1.6347739696502686, + "logps/chosen": -781.2395629882812, + "logps/rejected": -2052.24658203125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.041011810302734, + "rewards/margins": 13.045896530151367, + "rewards/rejected": -20.086910247802734, + "step": 14170 + }, + { + "epoch": 0.85, + "learning_rate": 4.512997989181165e-06, + "logits/chosen": -2.233661413192749, + "logits/rejected": -1.3251692056655884, + "logps/chosen": -968.1652221679688, + "logps/rejected": -2227.839599609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.986504554748535, + "rewards/margins": 12.842399597167969, + "rewards/rejected": -21.828903198242188, + "step": 14180 + }, + { + "epoch": 0.85, + "learning_rate": 4.511968894140639e-06, + "logits/chosen": -2.1981711387634277, + "logits/rejected": -1.3604671955108643, + "logps/chosen": -1019.5517578125, + "logps/rejected": -2300.451904296875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.51756763458252, + "rewards/margins": 13.045560836791992, + "rewards/rejected": -22.563129425048828, + "step": 14190 + }, + { + "epoch": 0.85, + "learning_rate": 4.510938830538312e-06, + "logits/chosen": -2.254830837249756, + "logits/rejected": -1.3146476745605469, + "logps/chosen": -942.2122802734375, + "logps/rejected": -2211.08251953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.716548919677734, + "rewards/margins": 12.963705062866211, + "rewards/rejected": -21.68025779724121, + "step": 14200 + }, + { + "epoch": 0.85, + "learning_rate": 4.509907798870058e-06, + "logits/chosen": -2.2620456218719482, + "logits/rejected": -1.3431745767593384, + "logps/chosen": -908.4323120117188, + "logps/rejected": -2108.96044921875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.387186050415039, + "rewards/margins": 12.268998146057129, + "rewards/rejected": -20.65618324279785, + "step": 14210 + }, + { + "epoch": 0.85, + "learning_rate": 4.508875799632215e-06, + "logits/chosen": -2.446364641189575, + "logits/rejected": -1.7554008960723877, + "logps/chosen": -566.6852416992188, + "logps/rejected": -1913.9697265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.984689235687256, + "rewards/margins": 13.701181411743164, + "rewards/rejected": -18.68587303161621, + "step": 14220 + }, + { + "epoch": 0.85, + "learning_rate": 4.507842833321588e-06, + "logits/chosen": -2.4555156230926514, + "logits/rejected": -1.8412501811981201, + "logps/chosen": -480.05645751953125, + "logps/rejected": -1751.719970703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.099171161651611, + "rewards/margins": 12.981039047241211, + "rewards/rejected": -17.080209732055664, + "step": 14230 + }, + { + "epoch": 0.85, + "learning_rate": 4.506808900435447e-06, + "logits/chosen": -2.4325079917907715, + "logits/rejected": -1.7725744247436523, + "logps/chosen": -478.61383056640625, + "logps/rejected": -1786.039306640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.06204080581665, + "rewards/margins": 13.36432933807373, + "rewards/rejected": -17.42637062072754, + "step": 14240 + }, + { + "epoch": 0.85, + "learning_rate": 4.505774001471527e-06, + "logits/chosen": -2.4451260566711426, + "logits/rejected": -1.7643067836761475, + "logps/chosen": -475.93621826171875, + "logps/rejected": -1676.2769775390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1094183921813965, + "rewards/margins": 12.211613655090332, + "rewards/rejected": -16.32103157043457, + "step": 14250 + }, + { + "epoch": 0.85, + "learning_rate": 4.5047381369280285e-06, + "logits/chosen": -2.4679722785949707, + "logits/rejected": -1.6808769702911377, + "logps/chosen": -484.27264404296875, + "logps/rejected": -1824.131591796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.117508888244629, + "rewards/margins": 13.678817749023438, + "rewards/rejected": -17.796327590942383, + "step": 14260 + }, + { + "epoch": 0.85, + "learning_rate": 4.503701307303615e-06, + "logits/chosen": -2.492994546890259, + "logits/rejected": -1.636156439781189, + "logps/chosen": -476.2152404785156, + "logps/rejected": -1821.1019287109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.052163124084473, + "rewards/margins": 13.718557357788086, + "rewards/rejected": -17.770721435546875, + "step": 14270 + }, + { + "epoch": 0.85, + "learning_rate": 4.502663513097419e-06, + "logits/chosen": -2.495617389678955, + "logits/rejected": -1.787714958190918, + "logps/chosen": -464.78594970703125, + "logps/rejected": -1734.6732177734375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.949345111846924, + "rewards/margins": 12.93932056427002, + "rewards/rejected": -16.8886661529541, + "step": 14280 + }, + { + "epoch": 0.85, + "learning_rate": 4.501624754809033e-06, + "logits/chosen": -2.4268860816955566, + "logits/rejected": -1.7091413736343384, + "logps/chosen": -453.14910888671875, + "logps/rejected": -1726.966796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.826066493988037, + "rewards/margins": 12.990545272827148, + "rewards/rejected": -16.816612243652344, + "step": 14290 + }, + { + "epoch": 0.85, + "learning_rate": 4.5005850329385155e-06, + "logits/chosen": -2.484675407409668, + "logits/rejected": -1.7259658575057983, + "logps/chosen": -452.90447998046875, + "logps/rejected": -1732.469482421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.849271059036255, + "rewards/margins": 13.034538269042969, + "rewards/rejected": -16.883808135986328, + "step": 14300 + }, + { + "epoch": 0.85, + "learning_rate": 4.499544347986388e-06, + "logits/chosen": -2.45149564743042, + "logits/rejected": -1.8663175106048584, + "logps/chosen": -467.40936279296875, + "logps/rejected": -1748.974365234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.981295108795166, + "rewards/margins": 13.071866035461426, + "rewards/rejected": -17.053157806396484, + "step": 14310 + }, + { + "epoch": 0.85, + "learning_rate": 4.498502700453637e-06, + "logits/chosen": -2.4329450130462646, + "logits/rejected": -1.8815807104110718, + "logps/chosen": -477.0279846191406, + "logps/rejected": -1758.364013671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.079416751861572, + "rewards/margins": 13.067733764648438, + "rewards/rejected": -17.14715003967285, + "step": 14320 + }, + { + "epoch": 0.85, + "learning_rate": 4.497460090841713e-06, + "logits/chosen": -2.4865775108337402, + "logits/rejected": -1.7410986423492432, + "logps/chosen": -519.9943237304688, + "logps/rejected": -1752.3441162109375, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.448013782501221, + "rewards/margins": 12.641294479370117, + "rewards/rejected": -17.08930778503418, + "step": 14330 + }, + { + "epoch": 0.86, + "learning_rate": 4.4964165196525255e-06, + "logits/chosen": -2.415006160736084, + "logits/rejected": -1.6473134756088257, + "logps/chosen": -578.2239990234375, + "logps/rejected": -1821.0283203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.052134037017822, + "rewards/margins": 12.702603340148926, + "rewards/rejected": -17.754735946655273, + "step": 14340 + }, + { + "epoch": 0.86, + "learning_rate": 4.495371987388451e-06, + "logits/chosen": -2.3749899864196777, + "logits/rejected": -1.4748013019561768, + "logps/chosen": -644.182861328125, + "logps/rejected": -1899.623779296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.741323947906494, + "rewards/margins": 12.806245803833008, + "rewards/rejected": -18.547569274902344, + "step": 14350 + }, + { + "epoch": 0.86, + "learning_rate": 4.494326494552327e-06, + "logits/chosen": -2.3828811645507812, + "logits/rejected": -1.537236213684082, + "logps/chosen": -616.072998046875, + "logps/rejected": -1843.3948974609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.43908166885376, + "rewards/margins": 12.547456741333008, + "rewards/rejected": -17.98653793334961, + "step": 14360 + }, + { + "epoch": 0.86, + "learning_rate": 4.493280041647454e-06, + "logits/chosen": -2.38551926612854, + "logits/rejected": -1.5848323106765747, + "logps/chosen": -612.4732666015625, + "logps/rejected": -1865.4000244140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.423132419586182, + "rewards/margins": 12.804664611816406, + "rewards/rejected": -18.227798461914062, + "step": 14370 + }, + { + "epoch": 0.86, + "learning_rate": 4.492232629177595e-06, + "logits/chosen": -2.381981611251831, + "logits/rejected": -1.5541927814483643, + "logps/chosen": -584.4002685546875, + "logps/rejected": -1786.818115234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.096806526184082, + "rewards/margins": 12.324640274047852, + "rewards/rejected": -17.421445846557617, + "step": 14380 + }, + { + "epoch": 0.86, + "learning_rate": 4.491184257646973e-06, + "logits/chosen": -2.404717445373535, + "logits/rejected": -1.743691086769104, + "logps/chosen": -644.0523071289062, + "logps/rejected": -1817.901611328125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.688565254211426, + "rewards/margins": 12.028115272521973, + "rewards/rejected": -17.716678619384766, + "step": 14390 + }, + { + "epoch": 0.86, + "learning_rate": 4.490134927560276e-06, + "logits/chosen": -2.3790926933288574, + "logits/rejected": -1.623549461364746, + "logps/chosen": -595.2689208984375, + "logps/rejected": -1839.7652587890625, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.251752853393555, + "rewards/margins": 12.697749137878418, + "rewards/rejected": -17.949499130249023, + "step": 14400 + }, + { + "epoch": 0.86, + "learning_rate": 4.489084639422649e-06, + "logits/chosen": -2.409715175628662, + "logits/rejected": -1.5699751377105713, + "logps/chosen": -599.2747192382812, + "logps/rejected": -1995.013916015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3184494972229, + "rewards/margins": 14.183609008789062, + "rewards/rejected": -19.502056121826172, + "step": 14410 + }, + { + "epoch": 0.86, + "learning_rate": 4.488033393739702e-06, + "logits/chosen": -2.458144187927246, + "logits/rejected": -1.6279857158660889, + "logps/chosen": -608.5180053710938, + "logps/rejected": -1971.8831787109375, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3437724113464355, + "rewards/margins": 13.921087265014648, + "rewards/rejected": -19.26485824584961, + "step": 14420 + }, + { + "epoch": 0.86, + "learning_rate": 4.486981191017505e-06, + "logits/chosen": -2.5090839862823486, + "logits/rejected": -1.8267748355865479, + "logps/chosen": -528.5845336914062, + "logps/rejected": -1755.9814453125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.608792781829834, + "rewards/margins": 12.493253707885742, + "rewards/rejected": -17.102046966552734, + "step": 14430 + }, + { + "epoch": 0.86, + "learning_rate": 4.485928031762587e-06, + "logits/chosen": -2.512244701385498, + "logits/rejected": -1.8225157260894775, + "logps/chosen": -513.8652954101562, + "logps/rejected": -1714.0966796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4120965003967285, + "rewards/margins": 12.27092170715332, + "rewards/rejected": -16.683019638061523, + "step": 14440 + }, + { + "epoch": 0.86, + "learning_rate": 4.484873916481941e-06, + "logits/chosen": -2.5398762226104736, + "logits/rejected": -1.7748702764511108, + "logps/chosen": -542.8739624023438, + "logps/rejected": -1860.7867431640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.732717990875244, + "rewards/margins": 13.430709838867188, + "rewards/rejected": -18.163427352905273, + "step": 14450 + }, + { + "epoch": 0.86, + "learning_rate": 4.4838188456830175e-06, + "logits/chosen": -2.5225653648376465, + "logits/rejected": -1.7794551849365234, + "logps/chosen": -548.4766845703125, + "logps/rejected": -1825.60546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.743442535400391, + "rewards/margins": 13.062759399414062, + "rewards/rejected": -17.806201934814453, + "step": 14460 + }, + { + "epoch": 0.86, + "learning_rate": 4.482762819873726e-06, + "logits/chosen": -2.382302761077881, + "logits/rejected": -1.551810622215271, + "logps/chosen": -612.36279296875, + "logps/rejected": -1958.3714599609375, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.391597270965576, + "rewards/margins": 13.737579345703125, + "rewards/rejected": -19.12917709350586, + "step": 14470 + }, + { + "epoch": 0.86, + "learning_rate": 4.481705839562438e-06, + "logits/chosen": -2.4043288230895996, + "logits/rejected": -1.637375831604004, + "logps/chosen": -666.7576904296875, + "logps/rejected": -2032.445556640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9964094161987305, + "rewards/margins": 13.893453598022461, + "rewards/rejected": -19.88986587524414, + "step": 14480 + }, + { + "epoch": 0.86, + "learning_rate": 4.480647905257985e-06, + "logits/chosen": -2.4170355796813965, + "logits/rejected": -1.7225942611694336, + "logps/chosen": -606.2965698242188, + "logps/rejected": -2029.396240234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.360188961029053, + "rewards/margins": 14.495584487915039, + "rewards/rejected": -19.85577392578125, + "step": 14490 + }, + { + "epoch": 0.86, + "learning_rate": 4.479589017469655e-06, + "logits/chosen": -2.357923746109009, + "logits/rejected": -1.5525585412979126, + "logps/chosen": -651.2855834960938, + "logps/rejected": -1958.0390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.812714576721191, + "rewards/margins": 13.329394340515137, + "rewards/rejected": -19.142108917236328, + "step": 14500 + }, + { + "epoch": 0.87, + "learning_rate": 4.478529176707197e-06, + "logits/chosen": -2.4108452796936035, + "logits/rejected": -1.4651319980621338, + "logps/chosen": -606.4674072265625, + "logps/rejected": -2054.61181640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3516740798950195, + "rewards/margins": 14.744935989379883, + "rewards/rejected": -20.096607208251953, + "step": 14510 + }, + { + "epoch": 0.87, + "learning_rate": 4.47746838348082e-06, + "logits/chosen": -2.4464521408081055, + "logits/rejected": -1.5527561902999878, + "logps/chosen": -620.5284423828125, + "logps/rejected": -2088.64208984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.463375568389893, + "rewards/margins": 14.975407600402832, + "rewards/rejected": -20.43878173828125, + "step": 14520 + }, + { + "epoch": 0.87, + "learning_rate": 4.476406638301187e-06, + "logits/chosen": -2.422776937484741, + "logits/rejected": -1.6903431415557861, + "logps/chosen": -595.8466186523438, + "logps/rejected": -1951.708984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2227630615234375, + "rewards/margins": 13.843777656555176, + "rewards/rejected": -19.066539764404297, + "step": 14530 + }, + { + "epoch": 0.87, + "learning_rate": 4.475343941679423e-06, + "logits/chosen": -2.44882869720459, + "logits/rejected": -1.7108179330825806, + "logps/chosen": -588.2870483398438, + "logps/rejected": -1952.0855712890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.145316123962402, + "rewards/margins": 13.916293144226074, + "rewards/rejected": -19.061609268188477, + "step": 14540 + }, + { + "epoch": 0.87, + "learning_rate": 4.474280294127112e-06, + "logits/chosen": -2.397552013397217, + "logits/rejected": -1.5385363101959229, + "logps/chosen": -612.3439331054688, + "logps/rejected": -1979.861083984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.472516059875488, + "rewards/margins": 13.881612777709961, + "rewards/rejected": -19.354129791259766, + "step": 14550 + }, + { + "epoch": 0.87, + "learning_rate": 4.473215696156292e-06, + "logits/chosen": -2.368910074234009, + "logits/rejected": -1.6204923391342163, + "logps/chosen": -645.1373291015625, + "logps/rejected": -1941.435791015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.81321382522583, + "rewards/margins": 13.161651611328125, + "rewards/rejected": -18.974864959716797, + "step": 14560 + }, + { + "epoch": 0.87, + "learning_rate": 4.472150148279462e-06, + "logits/chosen": -2.397423267364502, + "logits/rejected": -1.6090755462646484, + "logps/chosen": -619.5420532226562, + "logps/rejected": -1992.276123046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.499382972717285, + "rewards/margins": 13.983110427856445, + "rewards/rejected": -19.482492446899414, + "step": 14570 + }, + { + "epoch": 0.87, + "learning_rate": 4.471083651009574e-06, + "logits/chosen": -2.4111671447753906, + "logits/rejected": -1.5813825130462646, + "logps/chosen": -572.2049560546875, + "logps/rejected": -1992.6683349609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.016232967376709, + "rewards/margins": 14.472005844116211, + "rewards/rejected": -19.488239288330078, + "step": 14580 + }, + { + "epoch": 0.87, + "learning_rate": 4.470016204860043e-06, + "logits/chosen": -2.418593645095825, + "logits/rejected": -1.6678266525268555, + "logps/chosen": -615.6971435546875, + "logps/rejected": -2042.839599609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.451285362243652, + "rewards/margins": 14.535675048828125, + "rewards/rejected": -19.986963272094727, + "step": 14590 + }, + { + "epoch": 0.87, + "learning_rate": 4.468947810344736e-06, + "logits/chosen": -2.434541940689087, + "logits/rejected": -1.5175970792770386, + "logps/chosen": -619.17578125, + "logps/rejected": -1926.2513427734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4131269454956055, + "rewards/margins": 13.406392097473145, + "rewards/rejected": -18.819522857666016, + "step": 14600 + }, + { + "epoch": 0.87, + "learning_rate": 4.4678784679779766e-06, + "logits/chosen": -2.424078941345215, + "logits/rejected": -1.5025453567504883, + "logps/chosen": -574.1945190429688, + "logps/rejected": -2118.740966796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.023106575012207, + "rewards/margins": 15.708984375, + "rewards/rejected": -20.732091903686523, + "step": 14610 + }, + { + "epoch": 0.87, + "learning_rate": 4.46680817827455e-06, + "logits/chosen": -2.4107601642608643, + "logits/rejected": -1.730921745300293, + "logps/chosen": -608.6077270507812, + "logps/rejected": -2058.314453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.411177158355713, + "rewards/margins": 14.729403495788574, + "rewards/rejected": -20.140581130981445, + "step": 14620 + }, + { + "epoch": 0.87, + "learning_rate": 4.46573694174969e-06, + "logits/chosen": -2.409738779067993, + "logits/rejected": -1.5616695880889893, + "logps/chosen": -578.03076171875, + "logps/rejected": -2040.026611328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.096164703369141, + "rewards/margins": 14.8602876663208, + "rewards/rejected": -19.956451416015625, + "step": 14630 + }, + { + "epoch": 0.87, + "learning_rate": 4.464664758919092e-06, + "logits/chosen": -2.4205474853515625, + "logits/rejected": -1.5771589279174805, + "logps/chosen": -596.7616577148438, + "logps/rejected": -2051.8857421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.228369235992432, + "rewards/margins": 14.836189270019531, + "rewards/rejected": -20.064559936523438, + "step": 14640 + }, + { + "epoch": 0.87, + "learning_rate": 4.463591630298904e-06, + "logits/chosen": -2.416073799133301, + "logits/rejected": -1.8116919994354248, + "logps/chosen": -594.1237182617188, + "logps/rejected": -2046.8277587890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.299272060394287, + "rewards/margins": 14.710917472839355, + "rewards/rejected": -20.010190963745117, + "step": 14650 + }, + { + "epoch": 0.87, + "learning_rate": 4.462517556405729e-06, + "logits/chosen": -2.4322476387023926, + "logits/rejected": -1.7965580224990845, + "logps/chosen": -593.601806640625, + "logps/rejected": -1951.5357666015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.218749046325684, + "rewards/margins": 13.845499038696289, + "rewards/rejected": -19.06424903869629, + "step": 14660 + }, + { + "epoch": 0.87, + "learning_rate": 4.461442537756629e-06, + "logits/chosen": -2.439997911453247, + "logits/rejected": -1.7086464166641235, + "logps/chosen": -576.4588623046875, + "logps/rejected": -1939.349853515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.051292419433594, + "rewards/margins": 13.898233413696289, + "rewards/rejected": -18.94952392578125, + "step": 14670 + }, + { + "epoch": 0.88, + "learning_rate": 4.460366574869116e-06, + "logits/chosen": -2.419975519180298, + "logits/rejected": -1.7021520137786865, + "logps/chosen": -563.3225708007812, + "logps/rejected": -1782.569580078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.897896766662598, + "rewards/margins": 12.497835159301758, + "rewards/rejected": -17.395732879638672, + "step": 14680 + }, + { + "epoch": 0.88, + "learning_rate": 4.459289668261159e-06, + "logits/chosen": -2.3909530639648438, + "logits/rejected": -1.5983389616012573, + "logps/chosen": -564.6715698242188, + "logps/rejected": -1950.2242431640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.993304252624512, + "rewards/margins": 14.06634521484375, + "rewards/rejected": -19.059648513793945, + "step": 14690 + }, + { + "epoch": 0.88, + "learning_rate": 4.458211818451179e-06, + "logits/chosen": -2.4043257236480713, + "logits/rejected": -1.750698447227478, + "logps/chosen": -576.9891967773438, + "logps/rejected": -1874.3734130859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.061062812805176, + "rewards/margins": 13.23609447479248, + "rewards/rejected": -18.297155380249023, + "step": 14700 + }, + { + "epoch": 0.88, + "learning_rate": 4.457133025958056e-06, + "logits/chosen": -2.3896138668060303, + "logits/rejected": -1.5148626565933228, + "logps/chosen": -585.6700439453125, + "logps/rejected": -1880.220458984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.157825469970703, + "rewards/margins": 13.192384719848633, + "rewards/rejected": -18.350210189819336, + "step": 14710 + }, + { + "epoch": 0.88, + "learning_rate": 4.45605329130112e-06, + "logits/chosen": -2.4008994102478027, + "logits/rejected": -1.4952532052993774, + "logps/chosen": -584.9503784179688, + "logps/rejected": -1942.025634765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.117252349853516, + "rewards/margins": 13.8444185256958, + "rewards/rejected": -18.961673736572266, + "step": 14720 + }, + { + "epoch": 0.88, + "learning_rate": 4.454972615000153e-06, + "logits/chosen": -2.433030366897583, + "logits/rejected": -1.6728826761245728, + "logps/chosen": -585.4971923828125, + "logps/rejected": -1926.0716552734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.18607234954834, + "rewards/margins": 13.624653816223145, + "rewards/rejected": -18.810726165771484, + "step": 14730 + }, + { + "epoch": 0.88, + "learning_rate": 4.4538909975753955e-06, + "logits/chosen": -2.457597255706787, + "logits/rejected": -1.7929637432098389, + "logps/chosen": -577.7593994140625, + "logps/rejected": -1807.0787353515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.046017646789551, + "rewards/margins": 12.583523750305176, + "rewards/rejected": -17.62954330444336, + "step": 14740 + }, + { + "epoch": 0.88, + "learning_rate": 4.452808439547535e-06, + "logits/chosen": -2.388958215713501, + "logits/rejected": -1.5968472957611084, + "logps/chosen": -589.2129516601562, + "logps/rejected": -1814.787109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.242910861968994, + "rewards/margins": 12.463666915893555, + "rewards/rejected": -17.706579208374023, + "step": 14750 + }, + { + "epoch": 0.88, + "learning_rate": 4.451724941437718e-06, + "logits/chosen": -2.397270917892456, + "logits/rejected": -1.5912597179412842, + "logps/chosen": -601.8082275390625, + "logps/rejected": -1885.539794921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.355560779571533, + "rewards/margins": 13.051055908203125, + "rewards/rejected": -18.406618118286133, + "step": 14760 + }, + { + "epoch": 0.88, + "learning_rate": 4.450640503767538e-06, + "logits/chosen": -2.4495186805725098, + "logits/rejected": -1.7076212167739868, + "logps/chosen": -577.3862915039062, + "logps/rejected": -1933.085205078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0502448081970215, + "rewards/margins": 13.835309982299805, + "rewards/rejected": -18.885557174682617, + "step": 14770 + }, + { + "epoch": 0.88, + "learning_rate": 4.449555127059045e-06, + "logits/chosen": -2.3870320320129395, + "logits/rejected": -1.6182914972305298, + "logps/chosen": -592.9158325195312, + "logps/rejected": -1803.578369140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.214718341827393, + "rewards/margins": 12.390978813171387, + "rewards/rejected": -17.605697631835938, + "step": 14780 + }, + { + "epoch": 0.88, + "learning_rate": 4.448468811834739e-06, + "logits/chosen": -2.485384702682495, + "logits/rejected": -1.8254810571670532, + "logps/chosen": -588.0424194335938, + "logps/rejected": -1832.473876953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.164084434509277, + "rewards/margins": 12.732553482055664, + "rewards/rejected": -17.896636962890625, + "step": 14790 + }, + { + "epoch": 0.88, + "learning_rate": 4.447381558617572e-06, + "logits/chosen": -2.425513982772827, + "logits/rejected": -1.5928318500518799, + "logps/chosen": -592.533203125, + "logps/rejected": -1896.1015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.202249050140381, + "rewards/margins": 13.310976028442383, + "rewards/rejected": -18.513225555419922, + "step": 14800 + }, + { + "epoch": 0.88, + "learning_rate": 4.446293367930946e-06, + "logits/chosen": -2.3949201107025146, + "logits/rejected": -1.5638834238052368, + "logps/chosen": -571.1279907226562, + "logps/rejected": -1949.860107421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.020842552185059, + "rewards/margins": 14.038518905639648, + "rewards/rejected": -19.05936050415039, + "step": 14810 + }, + { + "epoch": 0.88, + "learning_rate": 4.445204240298718e-06, + "logits/chosen": -2.374603748321533, + "logits/rejected": -1.5966049432754517, + "logps/chosen": -588.2318115234375, + "logps/rejected": -1815.8017578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.152325630187988, + "rewards/margins": 12.566571235656738, + "rewards/rejected": -17.718896865844727, + "step": 14820 + }, + { + "epoch": 0.88, + "learning_rate": 4.444114176245194e-06, + "logits/chosen": -2.468203067779541, + "logits/rejected": -1.6907711029052734, + "logps/chosen": -573.827880859375, + "logps/rejected": -1899.4017333984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.999885559082031, + "rewards/margins": 13.556994438171387, + "rewards/rejected": -18.556880950927734, + "step": 14830 + }, + { + "epoch": 0.88, + "learning_rate": 4.443023176295131e-06, + "logits/chosen": -2.3707871437072754, + "logits/rejected": -1.6834867000579834, + "logps/chosen": -556.5684814453125, + "logps/rejected": -1821.2086181640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.87054443359375, + "rewards/margins": 12.900212287902832, + "rewards/rejected": -17.7707576751709, + "step": 14840 + }, + { + "epoch": 0.89, + "learning_rate": 4.441931240973735e-06, + "logits/chosen": -2.3857717514038086, + "logits/rejected": -1.5524308681488037, + "logps/chosen": -573.178466796875, + "logps/rejected": -1917.6956787109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.028563499450684, + "rewards/margins": 13.704259872436523, + "rewards/rejected": -18.732824325561523, + "step": 14850 + }, + { + "epoch": 0.89, + "learning_rate": 4.440838370806664e-06, + "logits/chosen": -2.416794538497925, + "logits/rejected": -1.5867564678192139, + "logps/chosen": -572.3052978515625, + "logps/rejected": -1829.803466796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0100226402282715, + "rewards/margins": 12.839871406555176, + "rewards/rejected": -17.849895477294922, + "step": 14860 + }, + { + "epoch": 0.89, + "learning_rate": 4.439744566320027e-06, + "logits/chosen": -2.4546709060668945, + "logits/rejected": -1.5997815132141113, + "logps/chosen": -572.668701171875, + "logps/rejected": -1842.723388671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.965679168701172, + "rewards/margins": 13.02153491973877, + "rewards/rejected": -17.987215042114258, + "step": 14870 + }, + { + "epoch": 0.89, + "learning_rate": 4.43864982804038e-06, + "logits/chosen": -2.4388747215270996, + "logits/rejected": -1.7501475811004639, + "logps/chosen": -556.9293823242188, + "logps/rejected": -1782.052001953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.881751537322998, + "rewards/margins": 12.498982429504395, + "rewards/rejected": -17.380733489990234, + "step": 14880 + }, + { + "epoch": 0.89, + "learning_rate": 4.43755415649473e-06, + "logits/chosen": -2.4245524406433105, + "logits/rejected": -1.7009779214859009, + "logps/chosen": -559.200439453125, + "logps/rejected": -1884.3236083984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.882735252380371, + "rewards/margins": 13.514287948608398, + "rewards/rejected": -18.397024154663086, + "step": 14890 + }, + { + "epoch": 0.89, + "learning_rate": 4.436457552210534e-06, + "logits/chosen": -2.4420034885406494, + "logits/rejected": -1.7484499216079712, + "logps/chosen": -564.8449096679688, + "logps/rejected": -1885.931640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.930663585662842, + "rewards/margins": 13.4913330078125, + "rewards/rejected": -18.4219970703125, + "step": 14900 + }, + { + "epoch": 0.89, + "learning_rate": 4.435360015715697e-06, + "logits/chosen": -2.4473366737365723, + "logits/rejected": -1.7171761989593506, + "logps/chosen": -585.0032958984375, + "logps/rejected": -1928.8203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.125128746032715, + "rewards/margins": 13.719167709350586, + "rewards/rejected": -18.84429931640625, + "step": 14910 + }, + { + "epoch": 0.89, + "learning_rate": 4.4342615475385745e-06, + "logits/chosen": -2.406743049621582, + "logits/rejected": -1.6958882808685303, + "logps/chosen": -581.8987426757812, + "logps/rejected": -1902.060546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.061037540435791, + "rewards/margins": 13.507104873657227, + "rewards/rejected": -18.56814193725586, + "step": 14920 + }, + { + "epoch": 0.89, + "learning_rate": 4.433162148207966e-06, + "logits/chosen": -2.383084774017334, + "logits/rejected": -1.741330862045288, + "logps/chosen": -579.4158325195312, + "logps/rejected": -1926.176025390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.080870628356934, + "rewards/margins": 13.72041130065918, + "rewards/rejected": -18.801280975341797, + "step": 14930 + }, + { + "epoch": 0.89, + "learning_rate": 4.4320618182531244e-06, + "logits/chosen": -2.3834404945373535, + "logits/rejected": -1.6071357727050781, + "logps/chosen": -568.7125244140625, + "logps/rejected": -1845.6302490234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.931117057800293, + "rewards/margins": 13.092973709106445, + "rewards/rejected": -18.024089813232422, + "step": 14940 + }, + { + "epoch": 0.89, + "learning_rate": 4.4309605582037486e-06, + "logits/chosen": -2.386845111846924, + "logits/rejected": -1.5578300952911377, + "logps/chosen": -571.8558959960938, + "logps/rejected": -1921.7574462890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.017665386199951, + "rewards/margins": 13.750592231750488, + "rewards/rejected": -18.768259048461914, + "step": 14950 + }, + { + "epoch": 0.89, + "learning_rate": 4.429858368589984e-06, + "logits/chosen": -2.394359827041626, + "logits/rejected": -1.630481481552124, + "logps/chosen": -569.8059692382812, + "logps/rejected": -1861.263916015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.996894359588623, + "rewards/margins": 13.181312561035156, + "rewards/rejected": -18.178205490112305, + "step": 14960 + }, + { + "epoch": 0.89, + "learning_rate": 4.428755249942425e-06, + "logits/chosen": -2.407374858856201, + "logits/rejected": -1.686338186264038, + "logps/chosen": -569.987060546875, + "logps/rejected": -1884.5355224609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.01310396194458, + "rewards/margins": 13.3814697265625, + "rewards/rejected": -18.394573211669922, + "step": 14970 + }, + { + "epoch": 0.89, + "learning_rate": 4.4276512027921145e-06, + "logits/chosen": -2.416934013366699, + "logits/rejected": -1.7172248363494873, + "logps/chosen": -568.73193359375, + "logps/rejected": -1924.038330078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.967644214630127, + "rewards/margins": 13.822553634643555, + "rewards/rejected": -18.79019546508789, + "step": 14980 + }, + { + "epoch": 0.89, + "learning_rate": 4.426546227670539e-06, + "logits/chosen": -2.4494471549987793, + "logits/rejected": -1.6053314208984375, + "logps/chosen": -576.0819702148438, + "logps/rejected": -1974.682373046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.064249515533447, + "rewards/margins": 14.241861343383789, + "rewards/rejected": -19.306110382080078, + "step": 14990 + }, + { + "epoch": 0.89, + "learning_rate": 4.4254403251096345e-06, + "logits/chosen": -2.3901772499084473, + "logits/rejected": -1.5595831871032715, + "logps/chosen": -571.8161010742188, + "logps/rejected": -1826.387451171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.046109199523926, + "rewards/margins": 12.769942283630371, + "rewards/rejected": -17.816049575805664, + "step": 15000 + }, + { + "epoch": 0.9, + "learning_rate": 4.424333495641782e-06, + "logits/chosen": -2.3637404441833496, + "logits/rejected": -1.523585557937622, + "logps/chosen": -581.4497680664062, + "logps/rejected": -1819.4603271484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.124538421630859, + "rewards/margins": 12.639189720153809, + "rewards/rejected": -17.763729095458984, + "step": 15010 + }, + { + "epoch": 0.9, + "learning_rate": 4.423225739799809e-06, + "logits/chosen": -2.4318478107452393, + "logits/rejected": -1.6108814477920532, + "logps/chosen": -565.9749755859375, + "logps/rejected": -1948.6234130859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.98650598526001, + "rewards/margins": 14.063802719116211, + "rewards/rejected": -19.050308227539062, + "step": 15020 + }, + { + "epoch": 0.9, + "learning_rate": 4.422117058116989e-06, + "logits/chosen": -2.393789768218994, + "logits/rejected": -1.5973838567733765, + "logps/chosen": -567.449951171875, + "logps/rejected": -1877.5830078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.980391502380371, + "rewards/margins": 13.356866836547852, + "rewards/rejected": -18.337255477905273, + "step": 15030 + }, + { + "epoch": 0.9, + "learning_rate": 4.421007451127042e-06, + "logits/chosen": -2.399127960205078, + "logits/rejected": -1.6317806243896484, + "logps/chosen": -578.3239135742188, + "logps/rejected": -1878.4287109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.118675231933594, + "rewards/margins": 13.215019226074219, + "rewards/rejected": -18.333694458007812, + "step": 15040 + }, + { + "epoch": 0.9, + "learning_rate": 4.419896919364133e-06, + "logits/chosen": -2.4183273315429688, + "logits/rejected": -1.6529114246368408, + "logps/chosen": -595.5196533203125, + "logps/rejected": -1915.531005859375, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.243255138397217, + "rewards/margins": 13.473312377929688, + "rewards/rejected": -18.71656608581543, + "step": 15050 + }, + { + "epoch": 0.9, + "learning_rate": 4.418785463362871e-06, + "logits/chosen": -2.283473491668701, + "logits/rejected": -1.4428719282150269, + "logps/chosen": -759.1969604492188, + "logps/rejected": -2003.6162109375, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.910715579986572, + "rewards/margins": 12.680432319641113, + "rewards/rejected": -19.591150283813477, + "step": 15060 + }, + { + "epoch": 0.9, + "learning_rate": 4.417673083658311e-06, + "logits/chosen": -2.376173734664917, + "logits/rejected": -1.6073973178863525, + "logps/chosen": -645.6504516601562, + "logps/rejected": -1994.4593505859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.714275360107422, + "rewards/margins": 13.782476425170898, + "rewards/rejected": -19.496753692626953, + "step": 15070 + }, + { + "epoch": 0.9, + "learning_rate": 4.416559780785954e-06, + "logits/chosen": -2.444462537765503, + "logits/rejected": -1.7543306350708008, + "logps/chosen": -579.2366333007812, + "logps/rejected": -1881.6396484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.098817825317383, + "rewards/margins": 13.275354385375977, + "rewards/rejected": -18.37417221069336, + "step": 15080 + }, + { + "epoch": 0.9, + "learning_rate": 4.415445555281742e-06, + "logits/chosen": -2.369931936264038, + "logits/rejected": -1.5350743532180786, + "logps/chosen": -577.8827514648438, + "logps/rejected": -1950.9820556640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.087027072906494, + "rewards/margins": 13.978330612182617, + "rewards/rejected": -19.065357208251953, + "step": 15090 + }, + { + "epoch": 0.9, + "learning_rate": 4.414330407682065e-06, + "logits/chosen": -2.422043800354004, + "logits/rejected": -1.7799770832061768, + "logps/chosen": -577.0963134765625, + "logps/rejected": -1870.1802978515625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0877180099487305, + "rewards/margins": 13.177667617797852, + "rewards/rejected": -18.265384674072266, + "step": 15100 + }, + { + "epoch": 0.9, + "learning_rate": 4.413214338523754e-06, + "logits/chosen": -2.401745319366455, + "logits/rejected": -1.7190446853637695, + "logps/chosen": -573.6734619140625, + "logps/rejected": -1905.5755615234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.013950824737549, + "rewards/margins": 13.608195304870605, + "rewards/rejected": -18.622142791748047, + "step": 15110 + }, + { + "epoch": 0.9, + "learning_rate": 4.412097348344084e-06, + "logits/chosen": -2.40008544921875, + "logits/rejected": -1.6362745761871338, + "logps/chosen": -578.3695068359375, + "logps/rejected": -1844.6539306640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.089067459106445, + "rewards/margins": 12.91557502746582, + "rewards/rejected": -18.004642486572266, + "step": 15120 + }, + { + "epoch": 0.9, + "learning_rate": 4.410979437680775e-06, + "logits/chosen": -2.4660158157348633, + "logits/rejected": -1.7667442560195923, + "logps/chosen": -578.8908081054688, + "logps/rejected": -2010.112060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.087174415588379, + "rewards/margins": 14.567052841186523, + "rewards/rejected": -19.65422821044922, + "step": 15130 + }, + { + "epoch": 0.9, + "learning_rate": 4.40986060707199e-06, + "logits/chosen": -2.389970541000366, + "logits/rejected": -1.6524635553359985, + "logps/chosen": -566.8650512695312, + "logps/rejected": -1953.9658203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.968990325927734, + "rewards/margins": 14.130666732788086, + "rewards/rejected": -19.09965705871582, + "step": 15140 + }, + { + "epoch": 0.9, + "learning_rate": 4.408740857056332e-06, + "logits/chosen": -2.4252936840057373, + "logits/rejected": -1.6991398334503174, + "logps/chosen": -577.3970947265625, + "logps/rejected": -1880.260009765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.024503231048584, + "rewards/margins": 13.34075927734375, + "rewards/rejected": -18.36526107788086, + "step": 15150 + }, + { + "epoch": 0.9, + "learning_rate": 4.4076201881728505e-06, + "logits/chosen": -2.4220643043518066, + "logits/rejected": -1.795122742652893, + "logps/chosen": -578.5068359375, + "logps/rejected": -1928.9320068359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.10837459564209, + "rewards/margins": 13.73253345489502, + "rewards/rejected": -18.840906143188477, + "step": 15160 + }, + { + "epoch": 0.9, + "learning_rate": 4.406498600961034e-06, + "logits/chosen": -2.4605281352996826, + "logits/rejected": -1.6898767948150635, + "logps/chosen": -577.9927978515625, + "logps/rejected": -1871.494873046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.092586517333984, + "rewards/margins": 13.167402267456055, + "rewards/rejected": -18.25998878479004, + "step": 15170 + }, + { + "epoch": 0.91, + "learning_rate": 4.405376095960816e-06, + "logits/chosen": -2.420231342315674, + "logits/rejected": -1.6984943151474, + "logps/chosen": -566.4788818359375, + "logps/rejected": -2052.909912109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.954844951629639, + "rewards/margins": 15.120231628417969, + "rewards/rejected": -20.0750789642334, + "step": 15180 + }, + { + "epoch": 0.91, + "learning_rate": 4.40425267371257e-06, + "logits/chosen": -2.390144109725952, + "logits/rejected": -1.6785873174667358, + "logps/chosen": -568.8071899414062, + "logps/rejected": -2067.738037109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.001628875732422, + "rewards/margins": 15.228788375854492, + "rewards/rejected": -20.230417251586914, + "step": 15190 + }, + { + "epoch": 0.91, + "learning_rate": 4.403128334757111e-06, + "logits/chosen": -2.4406933784484863, + "logits/rejected": -1.7314860820770264, + "logps/chosen": -571.9669189453125, + "logps/rejected": -1974.516357421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.023664474487305, + "rewards/margins": 14.25806999206543, + "rewards/rejected": -19.281736373901367, + "step": 15200 + }, + { + "epoch": 0.91, + "learning_rate": 4.402003079635695e-06, + "logits/chosen": -2.4267666339874268, + "logits/rejected": -1.724506139755249, + "logps/chosen": -570.2230224609375, + "logps/rejected": -1942.486572265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.035548210144043, + "rewards/margins": 13.941062927246094, + "rewards/rejected": -18.976612091064453, + "step": 15210 + }, + { + "epoch": 0.91, + "learning_rate": 4.400876908890022e-06, + "logits/chosen": -2.3096535205841064, + "logits/rejected": -1.611244797706604, + "logps/chosen": -589.1080322265625, + "logps/rejected": -1886.5482177734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.226596832275391, + "rewards/margins": 13.201105117797852, + "rewards/rejected": -18.42770004272461, + "step": 15220 + }, + { + "epoch": 0.91, + "learning_rate": 4.3997498230622285e-06, + "logits/chosen": -2.40523099899292, + "logits/rejected": -1.660496473312378, + "logps/chosen": -581.2516479492188, + "logps/rejected": -1967.33984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.108613014221191, + "rewards/margins": 14.124643325805664, + "rewards/rejected": -19.23325538635254, + "step": 15230 + }, + { + "epoch": 0.91, + "learning_rate": 4.398621822694894e-06, + "logits/chosen": -2.3839046955108643, + "logits/rejected": -1.639997124671936, + "logps/chosen": -574.9609985351562, + "logps/rejected": -1868.3369140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.044733047485352, + "rewards/margins": 13.191381454467773, + "rewards/rejected": -18.236114501953125, + "step": 15240 + }, + { + "epoch": 0.91, + "learning_rate": 4.397492908331039e-06, + "logits/chosen": -2.4577229022979736, + "logits/rejected": -1.818211555480957, + "logps/chosen": -582.9324951171875, + "logps/rejected": -1907.5101318359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.071176052093506, + "rewards/margins": 13.561477661132812, + "rewards/rejected": -18.632652282714844, + "step": 15250 + }, + { + "epoch": 0.91, + "learning_rate": 4.396363080514123e-06, + "logits/chosen": -2.393892765045166, + "logits/rejected": -1.7534301280975342, + "logps/chosen": -576.9119873046875, + "logps/rejected": -1937.378662109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.090823173522949, + "rewards/margins": 13.843060493469238, + "rewards/rejected": -18.93388557434082, + "step": 15260 + }, + { + "epoch": 0.91, + "learning_rate": 4.3952323397880426e-06, + "logits/chosen": -2.4283127784729004, + "logits/rejected": -1.801166296005249, + "logps/chosen": -576.48828125, + "logps/rejected": -1912.015869140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.031533718109131, + "rewards/margins": 13.6506986618042, + "rewards/rejected": -18.682231903076172, + "step": 15270 + }, + { + "epoch": 0.91, + "learning_rate": 4.394100686697138e-06, + "logits/chosen": -2.3967056274414062, + "logits/rejected": -1.7128350734710693, + "logps/chosen": -583.3796997070312, + "logps/rejected": -1854.7877197265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.123101234436035, + "rewards/margins": 12.991737365722656, + "rewards/rejected": -18.114837646484375, + "step": 15280 + }, + { + "epoch": 0.91, + "learning_rate": 4.392968121786187e-06, + "logits/chosen": -2.4654958248138428, + "logits/rejected": -1.6905063390731812, + "logps/chosen": -567.2797241210938, + "logps/rejected": -1882.345703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.937753200531006, + "rewards/margins": 13.455438613891602, + "rewards/rejected": -18.393192291259766, + "step": 15290 + }, + { + "epoch": 0.91, + "learning_rate": 4.391834645600408e-06, + "logits/chosen": -2.4265482425689697, + "logits/rejected": -1.7669248580932617, + "logps/chosen": -562.9655151367188, + "logps/rejected": -1912.178466796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.916402816772461, + "rewards/margins": 13.769752502441406, + "rewards/rejected": -18.686153411865234, + "step": 15300 + }, + { + "epoch": 0.91, + "learning_rate": 4.390700258685453e-06, + "logits/chosen": -2.4499011039733887, + "logits/rejected": -1.6826210021972656, + "logps/chosen": -577.2006225585938, + "logps/rejected": -2023.0166015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.031406402587891, + "rewards/margins": 14.764289855957031, + "rewards/rejected": -19.795696258544922, + "step": 15310 + }, + { + "epoch": 0.91, + "learning_rate": 4.389564961587418e-06, + "logits/chosen": -2.418513774871826, + "logits/rejected": -1.667382001876831, + "logps/chosen": -577.8197021484375, + "logps/rejected": -1899.2037353515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.038106441497803, + "rewards/margins": 13.513470649719238, + "rewards/rejected": -18.551578521728516, + "step": 15320 + }, + { + "epoch": 0.91, + "learning_rate": 4.388428754852835e-06, + "logits/chosen": -2.394261598587036, + "logits/rejected": -1.7181990146636963, + "logps/chosen": -587.4137573242188, + "logps/rejected": -1816.7388916015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.22892427444458, + "rewards/margins": 12.505855560302734, + "rewards/rejected": -17.734779357910156, + "step": 15330 + }, + { + "epoch": 0.91, + "learning_rate": 4.387291639028673e-06, + "logits/chosen": -2.357609272003174, + "logits/rejected": -1.4568885564804077, + "logps/chosen": -582.0084228515625, + "logps/rejected": -1923.104736328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.161465644836426, + "rewards/margins": 13.631757736206055, + "rewards/rejected": -18.793222427368164, + "step": 15340 + }, + { + "epoch": 0.92, + "learning_rate": 4.386153614662341e-06, + "logits/chosen": -2.4017012119293213, + "logits/rejected": -1.6205774545669556, + "logps/chosen": -585.6903686523438, + "logps/rejected": -1915.702880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.153334140777588, + "rewards/margins": 13.555949211120605, + "rewards/rejected": -18.70928382873535, + "step": 15350 + }, + { + "epoch": 0.92, + "learning_rate": 4.385014682301682e-06, + "logits/chosen": -2.4278769493103027, + "logits/rejected": -1.7025024890899658, + "logps/chosen": -576.4446411132812, + "logps/rejected": -1865.768798828125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.028353691101074, + "rewards/margins": 13.195465087890625, + "rewards/rejected": -18.223819732666016, + "step": 15360 + }, + { + "epoch": 0.92, + "learning_rate": 4.383874842494979e-06, + "logits/chosen": -2.387568712234497, + "logits/rejected": -1.5319349765777588, + "logps/chosen": -597.2047729492188, + "logps/rejected": -1886.280029296875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.28122615814209, + "rewards/margins": 13.150487899780273, + "rewards/rejected": -18.43171501159668, + "step": 15370 + }, + { + "epoch": 0.92, + "learning_rate": 4.382734095790951e-06, + "logits/chosen": -2.4996519088745117, + "logits/rejected": -1.9460115432739258, + "logps/chosen": -529.8321533203125, + "logps/rejected": -1691.0657958984375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.566135883331299, + "rewards/margins": 11.898221969604492, + "rewards/rejected": -16.464357376098633, + "step": 15380 + }, + { + "epoch": 0.92, + "learning_rate": 4.381592442738753e-06, + "logits/chosen": -2.3770155906677246, + "logits/rejected": -1.608594298362732, + "logps/chosen": -613.003662109375, + "logps/rejected": -1825.176513671875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.415995121002197, + "rewards/margins": 12.384060859680176, + "rewards/rejected": -17.8000545501709, + "step": 15390 + }, + { + "epoch": 0.92, + "learning_rate": 4.380449883887978e-06, + "logits/chosen": -2.319805383682251, + "logits/rejected": -1.555402398109436, + "logps/chosen": -767.8864135742188, + "logps/rejected": -2167.2236328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.937410831451416, + "rewards/margins": 14.2861909866333, + "rewards/rejected": -21.223602294921875, + "step": 15400 + }, + { + "epoch": 0.92, + "learning_rate": 4.379306419788652e-06, + "logits/chosen": -2.3393187522888184, + "logits/rejected": -1.5163973569869995, + "logps/chosen": -797.5515747070312, + "logps/rejected": -2214.74169921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.290682792663574, + "rewards/margins": 14.418060302734375, + "rewards/rejected": -21.708744049072266, + "step": 15410 + }, + { + "epoch": 0.92, + "learning_rate": 4.3781620509912395e-06, + "logits/chosen": -2.3172011375427246, + "logits/rejected": -1.3756499290466309, + "logps/chosen": -811.7916259765625, + "logps/rejected": -2311.21923828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.413478851318359, + "rewards/margins": 15.241758346557617, + "rewards/rejected": -22.65523910522461, + "step": 15420 + }, + { + "epoch": 0.92, + "learning_rate": 4.37701677804664e-06, + "logits/chosen": -2.2477450370788574, + "logits/rejected": -1.4039381742477417, + "logps/chosen": -787.5130615234375, + "logps/rejected": -2155.52294921875, + "loss": 0.0122, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -7.277149200439453, + "rewards/margins": 13.842056274414062, + "rewards/rejected": -21.119205474853516, + "step": 15430 + }, + { + "epoch": 0.92, + "learning_rate": 4.375870601506187e-06, + "logits/chosen": -2.4591400623321533, + "logits/rejected": -1.7376600503921509, + "logps/chosen": -554.7110595703125, + "logps/rejected": -1742.310546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.840843677520752, + "rewards/margins": 12.115432739257812, + "rewards/rejected": -16.95627784729004, + "step": 15440 + }, + { + "epoch": 0.92, + "learning_rate": 4.374723521921651e-06, + "logits/chosen": -2.4452435970306396, + "logits/rejected": -1.7518726587295532, + "logps/chosen": -519.43896484375, + "logps/rejected": -1601.1060791015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5096025466918945, + "rewards/margins": 11.051382064819336, + "rewards/rejected": -15.560983657836914, + "step": 15450 + }, + { + "epoch": 0.92, + "learning_rate": 4.373575539845235e-06, + "logits/chosen": -2.4398012161254883, + "logits/rejected": -1.8041706085205078, + "logps/chosen": -514.283203125, + "logps/rejected": -1589.670654296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.449429512023926, + "rewards/margins": 11.0049467086792, + "rewards/rejected": -15.454376220703125, + "step": 15460 + }, + { + "epoch": 0.92, + "learning_rate": 4.372426655829578e-06, + "logits/chosen": -2.351973295211792, + "logits/rejected": -1.5682103633880615, + "logps/chosen": -663.5455322265625, + "logps/rejected": -1930.887939453125, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.002109527587891, + "rewards/margins": 12.855398178100586, + "rewards/rejected": -18.85750961303711, + "step": 15470 + }, + { + "epoch": 0.92, + "learning_rate": 4.3712768704277535e-06, + "logits/chosen": -2.134143590927124, + "logits/rejected": -1.0567082166671753, + "logps/chosen": -1058.656982421875, + "logps/rejected": -2510.66552734375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.879638671875, + "rewards/margins": 14.784344673156738, + "rewards/rejected": -24.663982391357422, + "step": 15480 + }, + { + "epoch": 0.92, + "learning_rate": 4.370126184193267e-06, + "logits/chosen": -2.1343994140625, + "logits/rejected": -1.0039443969726562, + "logps/chosen": -1087.23388671875, + "logps/rejected": -2553.41064453125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.161796569824219, + "rewards/margins": 14.923985481262207, + "rewards/rejected": -25.08578109741211, + "step": 15490 + }, + { + "epoch": 0.92, + "learning_rate": 4.368974597680058e-06, + "logits/chosen": -2.197073459625244, + "logits/rejected": -1.14566171169281, + "logps/chosen": -1050.067626953125, + "logps/rejected": -2633.08642578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.789377212524414, + "rewards/margins": 16.098968505859375, + "rewards/rejected": -25.888347625732422, + "step": 15500 + }, + { + "epoch": 0.92, + "learning_rate": 4.367822111442504e-06, + "logits/chosen": -2.248028039932251, + "logits/rejected": -1.122520923614502, + "logps/chosen": -984.6227416992188, + "logps/rejected": -2589.90869140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.092504501342773, + "rewards/margins": 16.359363555908203, + "rewards/rejected": -25.451868057250977, + "step": 15510 + }, + { + "epoch": 0.93, + "learning_rate": 4.366668726035407e-06, + "logits/chosen": -2.2422266006469727, + "logits/rejected": -1.2533817291259766, + "logps/chosen": -956.8480224609375, + "logps/rejected": -2434.91748046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.897453308105469, + "rewards/margins": 14.995806694030762, + "rewards/rejected": -23.893260955810547, + "step": 15520 + }, + { + "epoch": 0.93, + "learning_rate": 4.36551444201401e-06, + "logits/chosen": -2.2270867824554443, + "logits/rejected": -1.2122507095336914, + "logps/chosen": -995.3406372070312, + "logps/rejected": -2494.61767578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.235326766967773, + "rewards/margins": 15.275776863098145, + "rewards/rejected": -24.51110076904297, + "step": 15530 + }, + { + "epoch": 0.93, + "learning_rate": 4.364359259933985e-06, + "logits/chosen": -2.2288408279418945, + "logits/rejected": -1.1832692623138428, + "logps/chosen": -957.0828247070312, + "logps/rejected": -2612.96728515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.832398414611816, + "rewards/margins": 16.86428451538086, + "rewards/rejected": -25.69668197631836, + "step": 15540 + }, + { + "epoch": 0.93, + "learning_rate": 4.363203180351435e-06, + "logits/chosen": -2.1898860931396484, + "logits/rejected": -1.2295573949813843, + "logps/chosen": -966.4099731445312, + "logps/rejected": -2378.937255859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.9351806640625, + "rewards/margins": 14.420875549316406, + "rewards/rejected": -23.356056213378906, + "step": 15550 + }, + { + "epoch": 0.93, + "learning_rate": 4.362046203822898e-06, + "logits/chosen": -2.2171175479888916, + "logits/rejected": -1.0842764377593994, + "logps/chosen": -948.7083129882812, + "logps/rejected": -2521.381591796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.753351211547852, + "rewards/margins": 16.013607025146484, + "rewards/rejected": -24.766956329345703, + "step": 15560 + }, + { + "epoch": 0.93, + "learning_rate": 4.3608883309053425e-06, + "logits/chosen": -2.224473476409912, + "logits/rejected": -1.2692534923553467, + "logps/chosen": -953.7767333984375, + "logps/rejected": -2475.59619140625, + "loss": 0.0284, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -8.789796829223633, + "rewards/margins": 15.5255765914917, + "rewards/rejected": -24.315372467041016, + "step": 15570 + }, + { + "epoch": 0.93, + "learning_rate": 4.3597295621561686e-06, + "logits/chosen": -2.3728671073913574, + "logits/rejected": -1.5150697231292725, + "logps/chosen": -834.6075439453125, + "logps/rejected": -2208.558349609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.65643835067749, + "rewards/margins": 13.977404594421387, + "rewards/rejected": -21.63384437561035, + "step": 15580 + }, + { + "epoch": 0.93, + "learning_rate": 4.358569898133207e-06, + "logits/chosen": -2.439028263092041, + "logits/rejected": -1.7112458944320679, + "logps/chosen": -864.0538940429688, + "logps/rejected": -2041.9833984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.926988124847412, + "rewards/margins": 12.045366287231445, + "rewards/rejected": -19.972354888916016, + "step": 15590 + }, + { + "epoch": 0.93, + "learning_rate": 4.35740933939472e-06, + "logits/chosen": -2.3438572883605957, + "logits/rejected": -1.6456711292266846, + "logps/chosen": -825.3519287109375, + "logps/rejected": -2050.72216796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.562902927398682, + "rewards/margins": 12.496502876281738, + "rewards/rejected": -20.059406280517578, + "step": 15600 + }, + { + "epoch": 0.93, + "learning_rate": 4.356247886499401e-06, + "logits/chosen": -2.4424877166748047, + "logits/rejected": -1.610595941543579, + "logps/chosen": -832.8441162109375, + "logps/rejected": -2034.654296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.630705833435059, + "rewards/margins": 12.283190727233887, + "rewards/rejected": -19.913898468017578, + "step": 15610 + }, + { + "epoch": 0.93, + "learning_rate": 4.355085540006372e-06, + "logits/chosen": -2.413931369781494, + "logits/rejected": -1.7569687366485596, + "logps/chosen": -839.3724365234375, + "logps/rejected": -2033.8961181640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.702264308929443, + "rewards/margins": 12.185559272766113, + "rewards/rejected": -19.88782501220703, + "step": 15620 + }, + { + "epoch": 0.93, + "learning_rate": 4.353922300475189e-06, + "logits/chosen": -2.4042551517486572, + "logits/rejected": -1.4476066827774048, + "logps/chosen": -844.2605590820312, + "logps/rejected": -2065.6396484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.689312934875488, + "rewards/margins": 12.51994514465332, + "rewards/rejected": -20.209257125854492, + "step": 15630 + }, + { + "epoch": 0.93, + "learning_rate": 4.352758168465833e-06, + "logits/chosen": -2.3857975006103516, + "logits/rejected": -1.647912621498108, + "logps/chosen": -846.2037353515625, + "logps/rejected": -2021.5543212890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.780087471008301, + "rewards/margins": 11.988038063049316, + "rewards/rejected": -19.76812744140625, + "step": 15640 + }, + { + "epoch": 0.93, + "learning_rate": 4.351593144538719e-06, + "logits/chosen": -2.3948233127593994, + "logits/rejected": -1.5612647533416748, + "logps/chosen": -829.5084228515625, + "logps/rejected": -2025.9075927734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.53564453125, + "rewards/margins": 12.276577949523926, + "rewards/rejected": -19.81222152709961, + "step": 15650 + }, + { + "epoch": 0.93, + "learning_rate": 4.350427229254689e-06, + "logits/chosen": -2.380786418914795, + "logits/rejected": -1.6066299676895142, + "logps/chosen": -837.2005615234375, + "logps/rejected": -2047.4205322265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.677587032318115, + "rewards/margins": 12.359748840332031, + "rewards/rejected": -20.037334442138672, + "step": 15660 + }, + { + "epoch": 0.93, + "learning_rate": 4.349260423175014e-06, + "logits/chosen": -2.500464916229248, + "logits/rejected": -1.6343713998794556, + "logps/chosen": -829.1158447265625, + "logps/rejected": -2074.87841796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.553095817565918, + "rewards/margins": 12.742274284362793, + "rewards/rejected": -20.295368194580078, + "step": 15670 + }, + { + "epoch": 0.94, + "learning_rate": 4.348092726861395e-06, + "logits/chosen": -2.3570516109466553, + "logits/rejected": -1.4972474575042725, + "logps/chosen": -846.6749267578125, + "logps/rejected": -2065.300537109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.763266086578369, + "rewards/margins": 12.455236434936523, + "rewards/rejected": -20.218505859375, + "step": 15680 + }, + { + "epoch": 0.94, + "learning_rate": 4.346924140875961e-06, + "logits/chosen": -2.379441261291504, + "logits/rejected": -1.5726053714752197, + "logps/chosen": -843.96923828125, + "logps/rejected": -2065.823974609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.737311363220215, + "rewards/margins": 12.479469299316406, + "rewards/rejected": -20.216781616210938, + "step": 15690 + }, + { + "epoch": 0.94, + "learning_rate": 4.345754665781269e-06, + "logits/chosen": -2.4327163696289062, + "logits/rejected": -1.5912200212478638, + "logps/chosen": -845.8034057617188, + "logps/rejected": -2077.12158203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.731990814208984, + "rewards/margins": 12.604842185974121, + "rewards/rejected": -20.33683204650879, + "step": 15700 + }, + { + "epoch": 0.94, + "learning_rate": 4.344584302140304e-06, + "logits/chosen": -2.3770956993103027, + "logits/rejected": -1.676645278930664, + "logps/chosen": -822.2916259765625, + "logps/rejected": -2005.3385009765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.488752841949463, + "rewards/margins": 12.136571884155273, + "rewards/rejected": -19.625324249267578, + "step": 15710 + }, + { + "epoch": 0.94, + "learning_rate": 4.34341305051648e-06, + "logits/chosen": -2.3742547035217285, + "logits/rejected": -1.6494684219360352, + "logps/chosen": -817.4862060546875, + "logps/rejected": -2004.2261962890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4849958419799805, + "rewards/margins": 12.105342864990234, + "rewards/rejected": -19.59033966064453, + "step": 15720 + }, + { + "epoch": 0.94, + "learning_rate": 4.3422409114736375e-06, + "logits/chosen": -2.381105899810791, + "logits/rejected": -1.5701593160629272, + "logps/chosen": -824.9569091796875, + "logps/rejected": -2008.2603759765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.496946811676025, + "rewards/margins": 12.146178245544434, + "rewards/rejected": -19.643125534057617, + "step": 15730 + }, + { + "epoch": 0.94, + "learning_rate": 4.3410678855760435e-06, + "logits/chosen": -2.4049999713897705, + "logits/rejected": -1.5632909536361694, + "logps/chosen": -836.9963989257812, + "logps/rejected": -1970.598388671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.649104118347168, + "rewards/margins": 11.620000839233398, + "rewards/rejected": -19.26910400390625, + "step": 15740 + }, + { + "epoch": 0.94, + "learning_rate": 4.339893973388392e-06, + "logits/chosen": -2.3995959758758545, + "logits/rejected": -1.6898200511932373, + "logps/chosen": -828.7342529296875, + "logps/rejected": -2014.7679443359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.65283727645874, + "rewards/margins": 12.057196617126465, + "rewards/rejected": -19.710031509399414, + "step": 15750 + }, + { + "epoch": 0.94, + "learning_rate": 4.338719175475807e-06, + "logits/chosen": -2.4347729682922363, + "logits/rejected": -1.6078557968139648, + "logps/chosen": -820.0128173828125, + "logps/rejected": -1975.1998291015625, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.545502662658691, + "rewards/margins": 11.765230178833008, + "rewards/rejected": -19.310733795166016, + "step": 15760 + }, + { + "epoch": 0.94, + "learning_rate": 4.337543492403832e-06, + "logits/chosen": -2.350860595703125, + "logits/rejected": -1.4644479751586914, + "logps/chosen": -923.7286987304688, + "logps/rejected": -2014.8851318359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.516724586486816, + "rewards/margins": 11.199442863464355, + "rewards/rejected": -19.716167449951172, + "step": 15770 + }, + { + "epoch": 0.94, + "learning_rate": 4.3363669247384446e-06, + "logits/chosen": -2.3266515731811523, + "logits/rejected": -1.508305549621582, + "logps/chosen": -919.9578857421875, + "logps/rejected": -2083.40234375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.525469779968262, + "rewards/margins": 11.872702598571777, + "rewards/rejected": -20.39817237854004, + "step": 15780 + }, + { + "epoch": 0.94, + "learning_rate": 4.335189473046042e-06, + "logits/chosen": -2.3240678310394287, + "logits/rejected": -1.4423474073410034, + "logps/chosen": -873.2457275390625, + "logps/rejected": -2061.864501953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.018834114074707, + "rewards/margins": 12.167524337768555, + "rewards/rejected": -20.186359405517578, + "step": 15790 + }, + { + "epoch": 0.94, + "learning_rate": 4.334011137893452e-06, + "logits/chosen": -2.322775363922119, + "logits/rejected": -1.4818694591522217, + "logps/chosen": -913.1143798828125, + "logps/rejected": -2075.340576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.43989086151123, + "rewards/margins": 11.869847297668457, + "rewards/rejected": -20.30974006652832, + "step": 15800 + }, + { + "epoch": 0.94, + "learning_rate": 4.332831919847922e-06, + "logits/chosen": -2.31664776802063, + "logits/rejected": -1.4155701398849487, + "logps/chosen": -871.1072998046875, + "logps/rejected": -2075.16748046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.049510955810547, + "rewards/margins": 12.257329940795898, + "rewards/rejected": -20.306838989257812, + "step": 15810 + }, + { + "epoch": 0.94, + "learning_rate": 4.3316518194771305e-06, + "logits/chosen": -2.339721202850342, + "logits/rejected": -1.4732401371002197, + "logps/chosen": -863.6583862304688, + "logps/rejected": -2115.45166015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.9460577964782715, + "rewards/margins": 12.762457847595215, + "rewards/rejected": -20.70851707458496, + "step": 15820 + }, + { + "epoch": 0.94, + "learning_rate": 4.330470837349175e-06, + "logits/chosen": -2.3277602195739746, + "logits/rejected": -1.5257325172424316, + "logps/chosen": -865.1227416992188, + "logps/rejected": -2077.917724609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.961973667144775, + "rewards/margins": 12.381901741027832, + "rewards/rejected": -20.3438777923584, + "step": 15830 + }, + { + "epoch": 0.94, + "learning_rate": 4.329288974032583e-06, + "logits/chosen": -2.333448886871338, + "logits/rejected": -1.3990534543991089, + "logps/chosen": -840.13525390625, + "logps/rejected": -2169.461669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.6853766441345215, + "rewards/margins": 13.544923782348633, + "rewards/rejected": -21.23029899597168, + "step": 15840 + }, + { + "epoch": 0.95, + "learning_rate": 4.328106230096302e-06, + "logits/chosen": -2.296415090560913, + "logits/rejected": -1.4835126399993896, + "logps/chosen": -891.1484375, + "logps/rejected": -2045.297607421875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.200224876403809, + "rewards/margins": 11.800736427307129, + "rewards/rejected": -20.000959396362305, + "step": 15850 + }, + { + "epoch": 0.95, + "learning_rate": 4.326922606109704e-06, + "logits/chosen": -2.3502116203308105, + "logits/rejected": -1.4219577312469482, + "logps/chosen": -864.6666870117188, + "logps/rejected": -2069.42333984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.98733377456665, + "rewards/margins": 12.272363662719727, + "rewards/rejected": -20.259695053100586, + "step": 15860 + }, + { + "epoch": 0.95, + "learning_rate": 4.325738102642589e-06, + "logits/chosen": -2.3277266025543213, + "logits/rejected": -1.4861513376235962, + "logps/chosen": -821.0515747070312, + "logps/rejected": -2086.500732421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.534162998199463, + "rewards/margins": 12.8953857421875, + "rewards/rejected": -20.429550170898438, + "step": 15870 + }, + { + "epoch": 0.95, + "learning_rate": 4.324552720265173e-06, + "logits/chosen": -2.3357417583465576, + "logits/rejected": -1.405364751815796, + "logps/chosen": -837.5628051757812, + "logps/rejected": -2088.78955078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.661309719085693, + "rewards/margins": 12.786191940307617, + "rewards/rejected": -20.44750213623047, + "step": 15880 + }, + { + "epoch": 0.95, + "learning_rate": 4.323366459548101e-06, + "logits/chosen": -2.3355445861816406, + "logits/rejected": -1.510807991027832, + "logps/chosen": -866.1846923828125, + "logps/rejected": -2073.407470703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.9675140380859375, + "rewards/margins": 12.325703620910645, + "rewards/rejected": -20.2932186126709, + "step": 15890 + }, + { + "epoch": 0.95, + "learning_rate": 4.322179321062439e-06, + "logits/chosen": -2.3213844299316406, + "logits/rejected": -1.55025315284729, + "logps/chosen": -841.04833984375, + "logps/rejected": -2006.6546630859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.763333797454834, + "rewards/margins": 11.87794303894043, + "rewards/rejected": -19.641277313232422, + "step": 15900 + }, + { + "epoch": 0.95, + "learning_rate": 4.320991305379675e-06, + "logits/chosen": -2.3458170890808105, + "logits/rejected": -1.5124925374984741, + "logps/chosen": -845.04736328125, + "logps/rejected": -2127.957763671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.725978851318359, + "rewards/margins": 13.106002807617188, + "rewards/rejected": -20.831979751586914, + "step": 15910 + }, + { + "epoch": 0.95, + "learning_rate": 4.319802413071719e-06, + "logits/chosen": -2.3545565605163574, + "logits/rejected": -1.433571219444275, + "logps/chosen": -830.9833984375, + "logps/rejected": -2095.996337890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.585823059082031, + "rewards/margins": 12.933980941772461, + "rewards/rejected": -20.519805908203125, + "step": 15920 + }, + { + "epoch": 0.95, + "learning_rate": 4.318612644710906e-06, + "logits/chosen": -2.361921787261963, + "logits/rejected": -1.4621822834014893, + "logps/chosen": -835.6956176757812, + "logps/rejected": -2159.034912109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.661002159118652, + "rewards/margins": 13.483163833618164, + "rewards/rejected": -21.144168853759766, + "step": 15930 + }, + { + "epoch": 0.95, + "learning_rate": 4.317422000869987e-06, + "logits/chosen": -2.3143210411071777, + "logits/rejected": -1.5279518365859985, + "logps/chosen": -819.5753173828125, + "logps/rejected": -2048.18798828125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.495492458343506, + "rewards/margins": 12.554492950439453, + "rewards/rejected": -20.049983978271484, + "step": 15940 + }, + { + "epoch": 0.95, + "learning_rate": 4.316230482122142e-06, + "logits/chosen": -2.3768632411956787, + "logits/rejected": -1.5029726028442383, + "logps/chosen": -821.1976318359375, + "logps/rejected": -2080.57861328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.46973180770874, + "rewards/margins": 12.890989303588867, + "rewards/rejected": -20.360721588134766, + "step": 15950 + }, + { + "epoch": 0.95, + "learning_rate": 4.315038089040965e-06, + "logits/chosen": -2.3455090522766113, + "logits/rejected": -1.558251976966858, + "logps/chosen": -863.9547729492188, + "logps/rejected": -2090.031005859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.9080328941345215, + "rewards/margins": 12.548322677612305, + "rewards/rejected": -20.456356048583984, + "step": 15960 + }, + { + "epoch": 0.95, + "learning_rate": 4.313844822200474e-06, + "logits/chosen": -2.3741369247436523, + "logits/rejected": -1.460157871246338, + "logps/chosen": -841.669921875, + "logps/rejected": -2005.870849609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.679185390472412, + "rewards/margins": 11.935876846313477, + "rewards/rejected": -19.615062713623047, + "step": 15970 + }, + { + "epoch": 0.95, + "learning_rate": 4.312650682175111e-06, + "logits/chosen": -2.319507122039795, + "logits/rejected": -1.3774954080581665, + "logps/chosen": -831.3267822265625, + "logps/rejected": -2056.907470703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.62310791015625, + "rewards/margins": 12.510753631591797, + "rewards/rejected": -20.13386344909668, + "step": 15980 + }, + { + "epoch": 0.95, + "learning_rate": 4.311455669539732e-06, + "logits/chosen": -2.392838954925537, + "logits/rejected": -1.5047202110290527, + "logps/chosen": -835.0192260742188, + "logps/rejected": -2124.322509765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.6566267013549805, + "rewards/margins": 13.148712158203125, + "rewards/rejected": -20.805339813232422, + "step": 15990 + }, + { + "epoch": 0.95, + "learning_rate": 4.310259784869616e-06, + "logits/chosen": -2.3832530975341797, + "logits/rejected": -1.42087721824646, + "logps/chosen": -868.5525512695312, + "logps/rejected": -2071.5419921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.930828094482422, + "rewards/margins": 12.345251083374023, + "rewards/rejected": -20.276081085205078, + "step": 16000 + }, + { + "epoch": 0.95, + "learning_rate": 4.309063028740464e-06, + "logits/chosen": -2.368055820465088, + "logits/rejected": -1.5345474481582642, + "logps/chosen": -831.9560546875, + "logps/rejected": -2081.957275390625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.645559787750244, + "rewards/margins": 12.723212242126465, + "rewards/rejected": -20.368772506713867, + "step": 16010 + }, + { + "epoch": 0.96, + "learning_rate": 4.307865401728392e-06, + "logits/chosen": -2.2983319759368896, + "logits/rejected": -1.4693052768707275, + "logps/chosen": -828.1456909179688, + "logps/rejected": -2067.6826171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.551168918609619, + "rewards/margins": 12.691009521484375, + "rewards/rejected": -20.24217987060547, + "step": 16020 + }, + { + "epoch": 0.96, + "learning_rate": 4.3066669044099385e-06, + "logits/chosen": -2.3176963329315186, + "logits/rejected": -1.397221326828003, + "logps/chosen": -830.4445190429688, + "logps/rejected": -2137.00537109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.574900150299072, + "rewards/margins": 13.352360725402832, + "rewards/rejected": -20.927257537841797, + "step": 16030 + }, + { + "epoch": 0.96, + "learning_rate": 4.30546753736206e-06, + "logits/chosen": -2.361797332763672, + "logits/rejected": -1.479391098022461, + "logps/chosen": -817.6224365234375, + "logps/rejected": -2130.055419921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4361419677734375, + "rewards/margins": 13.406892776489258, + "rewards/rejected": -20.843032836914062, + "step": 16040 + }, + { + "epoch": 0.96, + "learning_rate": 4.3042673011621334e-06, + "logits/chosen": -2.3482651710510254, + "logits/rejected": -1.5561363697052002, + "logps/chosen": -807.205078125, + "logps/rejected": -2159.788330078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3759765625, + "rewards/margins": 13.787473678588867, + "rewards/rejected": -21.1634521484375, + "step": 16050 + }, + { + "epoch": 0.96, + "learning_rate": 4.3030661963879505e-06, + "logits/chosen": -2.439957857131958, + "logits/rejected": -1.5784562826156616, + "logps/chosen": -797.3411865234375, + "logps/rejected": -2113.66552734375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.252445220947266, + "rewards/margins": 13.435420036315918, + "rewards/rejected": -20.687864303588867, + "step": 16060 + }, + { + "epoch": 0.96, + "learning_rate": 4.301864223617723e-06, + "logits/chosen": -2.3256375789642334, + "logits/rejected": -1.4791265726089478, + "logps/chosen": -766.025390625, + "logps/rejected": -2137.231689453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9062700271606445, + "rewards/margins": 14.021893501281738, + "rewards/rejected": -20.92816162109375, + "step": 16070 + }, + { + "epoch": 0.96, + "learning_rate": 4.300661383430081e-06, + "logits/chosen": -2.2951724529266357, + "logits/rejected": -1.4933580160140991, + "logps/chosen": -794.0272827148438, + "logps/rejected": -2105.56591796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.27829647064209, + "rewards/margins": 13.333669662475586, + "rewards/rejected": -20.611967086791992, + "step": 16080 + }, + { + "epoch": 0.96, + "learning_rate": 4.299457676404073e-06, + "logits/chosen": -2.376980781555176, + "logits/rejected": -1.5078169107437134, + "logps/chosen": -760.9634399414062, + "logps/rejected": -2098.51220703125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.927585601806641, + "rewards/margins": 13.6012601852417, + "rewards/rejected": -20.528844833374023, + "step": 16090 + }, + { + "epoch": 0.96, + "learning_rate": 4.298253103119162e-06, + "logits/chosen": -2.3509559631347656, + "logits/rejected": -1.405503273010254, + "logps/chosen": -732.6740112304688, + "logps/rejected": -2125.82763671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.602071285247803, + "rewards/margins": 14.210260391235352, + "rewards/rejected": -20.81233024597168, + "step": 16100 + }, + { + "epoch": 0.96, + "learning_rate": 4.2970476641552304e-06, + "logits/chosen": -2.4151387214660645, + "logits/rejected": -1.5110714435577393, + "logps/chosen": -734.4816284179688, + "logps/rejected": -2220.773193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.625360012054443, + "rewards/margins": 15.125337600708008, + "rewards/rejected": -21.75069808959961, + "step": 16110 + }, + { + "epoch": 0.96, + "learning_rate": 4.295841360092576e-06, + "logits/chosen": -2.347431182861328, + "logits/rejected": -1.5303785800933838, + "logps/chosen": -739.5587768554688, + "logps/rejected": -2111.72705078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.673280239105225, + "rewards/margins": 14.002833366394043, + "rewards/rejected": -20.67611312866211, + "step": 16120 + }, + { + "epoch": 0.96, + "learning_rate": 4.294634191511914e-06, + "logits/chosen": -2.36683988571167, + "logits/rejected": -1.5710080862045288, + "logps/chosen": -739.397216796875, + "logps/rejected": -2167.88671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.736340522766113, + "rewards/margins": 14.49769401550293, + "rewards/rejected": -21.23403549194336, + "step": 16130 + }, + { + "epoch": 0.96, + "learning_rate": 4.293426158994375e-06, + "logits/chosen": -2.35015869140625, + "logits/rejected": -1.3824220895767212, + "logps/chosen": -743.0401611328125, + "logps/rejected": -2145.43603515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.668623924255371, + "rewards/margins": 14.342889785766602, + "rewards/rejected": -21.011516571044922, + "step": 16140 + }, + { + "epoch": 0.96, + "learning_rate": 4.292217263121505e-06, + "logits/chosen": -2.3809244632720947, + "logits/rejected": -1.6585063934326172, + "logps/chosen": -731.89453125, + "logps/rejected": -2000.821044921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.607466697692871, + "rewards/margins": 12.95972728729248, + "rewards/rejected": -19.56719398498535, + "step": 16150 + }, + { + "epoch": 0.96, + "learning_rate": 4.291007504475269e-06, + "logits/chosen": -2.3345327377319336, + "logits/rejected": -1.4900497198104858, + "logps/chosen": -715.6353759765625, + "logps/rejected": -2112.51708984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4697675704956055, + "rewards/margins": 14.21234130859375, + "rewards/rejected": -20.682109832763672, + "step": 16160 + }, + { + "epoch": 0.96, + "learning_rate": 4.289796883638042e-06, + "logits/chosen": -2.3869190216064453, + "logits/rejected": -1.6188102960586548, + "logps/chosen": -746.0643920898438, + "logps/rejected": -2087.99853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.712224006652832, + "rewards/margins": 13.724235534667969, + "rewards/rejected": -20.436458587646484, + "step": 16170 + }, + { + "epoch": 0.96, + "learning_rate": 4.2885854011926185e-06, + "logits/chosen": -2.4053750038146973, + "logits/rejected": -1.6063435077667236, + "logps/chosen": -748.9327392578125, + "logps/rejected": -2056.242919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.811247825622559, + "rewards/margins": 13.308670043945312, + "rewards/rejected": -20.119918823242188, + "step": 16180 + }, + { + "epoch": 0.97, + "learning_rate": 4.2873730577222055e-06, + "logits/chosen": -2.3974156379699707, + "logits/rejected": -1.5400400161743164, + "logps/chosen": -724.7410278320312, + "logps/rejected": -2138.21044921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.573792934417725, + "rewards/margins": 14.367283821105957, + "rewards/rejected": -20.941076278686523, + "step": 16190 + }, + { + "epoch": 0.97, + "learning_rate": 4.2861598538104255e-06, + "logits/chosen": -2.3205678462982178, + "logits/rejected": -1.5097311735153198, + "logps/chosen": -752.3272094726562, + "logps/rejected": -2003.7783203125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.847596168518066, + "rewards/margins": 12.751996994018555, + "rewards/rejected": -19.599592208862305, + "step": 16200 + }, + { + "epoch": 0.97, + "learning_rate": 4.284945790041315e-06, + "logits/chosen": -2.340517520904541, + "logits/rejected": -1.5342745780944824, + "logps/chosen": -805.8675537109375, + "logps/rejected": -2197.160888671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.401665687561035, + "rewards/margins": 14.129364013671875, + "rewards/rejected": -21.531030654907227, + "step": 16210 + }, + { + "epoch": 0.97, + "learning_rate": 4.283730866999323e-06, + "logits/chosen": -2.310232639312744, + "logits/rejected": -1.4405291080474854, + "logps/chosen": -822.685546875, + "logps/rejected": -2164.89599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.546438694000244, + "rewards/margins": 13.654912948608398, + "rewards/rejected": -21.201353073120117, + "step": 16220 + }, + { + "epoch": 0.97, + "learning_rate": 4.282515085269315e-06, + "logits/chosen": -2.3249239921569824, + "logits/rejected": -1.3131908178329468, + "logps/chosen": -845.025390625, + "logps/rejected": -2138.26123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.725647926330566, + "rewards/margins": 13.213406562805176, + "rewards/rejected": -20.939054489135742, + "step": 16230 + }, + { + "epoch": 0.97, + "learning_rate": 4.281298445436568e-06, + "logits/chosen": -2.3605029582977295, + "logits/rejected": -1.4965593814849854, + "logps/chosen": -845.3806762695312, + "logps/rejected": -2198.721435546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.721987247467041, + "rewards/margins": 13.81926155090332, + "rewards/rejected": -21.541248321533203, + "step": 16240 + }, + { + "epoch": 0.97, + "learning_rate": 4.280080948086771e-06, + "logits/chosen": -2.2749664783477783, + "logits/rejected": -1.3968629837036133, + "logps/chosen": -821.9111328125, + "logps/rejected": -2095.54150390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.527815341949463, + "rewards/margins": 13.003217697143555, + "rewards/rejected": -20.531036376953125, + "step": 16250 + }, + { + "epoch": 0.97, + "learning_rate": 4.278862593806029e-06, + "logits/chosen": -2.298884868621826, + "logits/rejected": -1.3994418382644653, + "logps/chosen": -814.2052612304688, + "logps/rejected": -2125.867431640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.355082035064697, + "rewards/margins": 13.458379745483398, + "rewards/rejected": -20.813461303710938, + "step": 16260 + }, + { + "epoch": 0.97, + "learning_rate": 4.277643383180857e-06, + "logits/chosen": -2.3317441940307617, + "logits/rejected": -1.4414074420928955, + "logps/chosen": -816.9530029296875, + "logps/rejected": -2152.86669921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.515173435211182, + "rewards/margins": 13.565302848815918, + "rewards/rejected": -21.080476760864258, + "step": 16270 + }, + { + "epoch": 0.97, + "learning_rate": 4.2764233167981826e-06, + "logits/chosen": -2.359884262084961, + "logits/rejected": -1.437058448791504, + "logps/chosen": -837.1183471679688, + "logps/rejected": -2208.95166015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.628429412841797, + "rewards/margins": 14.0255126953125, + "rewards/rejected": -21.653942108154297, + "step": 16280 + }, + { + "epoch": 0.97, + "learning_rate": 4.275202395245346e-06, + "logits/chosen": -2.3357467651367188, + "logits/rejected": -1.4389612674713135, + "logps/chosen": -831.3883666992188, + "logps/rejected": -2206.96923828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.539036750793457, + "rewards/margins": 14.0858793258667, + "rewards/rejected": -21.624919891357422, + "step": 16290 + }, + { + "epoch": 0.97, + "learning_rate": 4.2739806191101e-06, + "logits/chosen": -2.362658977508545, + "logits/rejected": -1.4672372341156006, + "logps/chosen": -790.6473388671875, + "logps/rejected": -2176.40234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.166151523590088, + "rewards/margins": 14.148472785949707, + "rewards/rejected": -21.314624786376953, + "step": 16300 + }, + { + "epoch": 0.97, + "learning_rate": 4.272757988980606e-06, + "logits/chosen": -2.3231117725372314, + "logits/rejected": -1.4696794748306274, + "logps/chosen": -837.5230712890625, + "logps/rejected": -2077.114990234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.664947509765625, + "rewards/margins": 12.66468620300293, + "rewards/rejected": -20.329635620117188, + "step": 16310 + }, + { + "epoch": 0.97, + "learning_rate": 4.271534505445438e-06, + "logits/chosen": -2.3972668647766113, + "logits/rejected": -1.3944923877716064, + "logps/chosen": -802.7386474609375, + "logps/rejected": -2141.12353515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.319543361663818, + "rewards/margins": 13.649360656738281, + "rewards/rejected": -20.968902587890625, + "step": 16320 + }, + { + "epoch": 0.97, + "learning_rate": 4.270310169093583e-06, + "logits/chosen": -2.455885648727417, + "logits/rejected": -1.5598698854446411, + "logps/chosen": -843.95654296875, + "logps/rejected": -2147.58447265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.737092018127441, + "rewards/margins": 13.295669555664062, + "rewards/rejected": -21.032760620117188, + "step": 16330 + }, + { + "epoch": 0.97, + "learning_rate": 4.269084980514434e-06, + "logits/chosen": -2.394151210784912, + "logits/rejected": -1.3288617134094238, + "logps/chosen": -812.7820434570312, + "logps/rejected": -2141.096923828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.43541955947876, + "rewards/margins": 13.541913986206055, + "rewards/rejected": -20.97733497619629, + "step": 16340 + }, + { + "epoch": 0.97, + "learning_rate": 4.267858940297799e-06, + "logits/chosen": -2.3152220249176025, + "logits/rejected": -1.337388038635254, + "logps/chosen": -867.3160400390625, + "logps/rejected": -2165.583740234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.945352077484131, + "rewards/margins": 13.267390251159668, + "rewards/rejected": -21.212743759155273, + "step": 16350 + }, + { + "epoch": 0.98, + "learning_rate": 4.266632049033892e-06, + "logits/chosen": -2.3254427909851074, + "logits/rejected": -1.510040521621704, + "logps/chosen": -819.8356323242188, + "logps/rejected": -2170.72998046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.46182107925415, + "rewards/margins": 13.793795585632324, + "rewards/rejected": -21.255619049072266, + "step": 16360 + }, + { + "epoch": 0.98, + "learning_rate": 4.265404307313339e-06, + "logits/chosen": -2.340045928955078, + "logits/rejected": -1.314864158630371, + "logps/chosen": -814.0137939453125, + "logps/rejected": -2137.454345703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.445228576660156, + "rewards/margins": 13.481010437011719, + "rewards/rejected": -20.926239013671875, + "step": 16370 + }, + { + "epoch": 0.98, + "learning_rate": 4.264175715727176e-06, + "logits/chosen": -2.3346805572509766, + "logits/rejected": -1.4774667024612427, + "logps/chosen": -837.0673828125, + "logps/rejected": -2188.3720703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.696932792663574, + "rewards/margins": 13.754658699035645, + "rewards/rejected": -21.45159339904785, + "step": 16380 + }, + { + "epoch": 0.98, + "learning_rate": 4.262946274866845e-06, + "logits/chosen": -2.3464488983154297, + "logits/rejected": -1.529739260673523, + "logps/chosen": -835.5964965820312, + "logps/rejected": -2145.262939453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.674935817718506, + "rewards/margins": 13.330816268920898, + "rewards/rejected": -21.005752563476562, + "step": 16390 + }, + { + "epoch": 0.98, + "learning_rate": 4.261715985324199e-06, + "logits/chosen": -2.3518433570861816, + "logits/rejected": -1.41307532787323, + "logps/chosen": -837.9777221679688, + "logps/rejected": -2273.134521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.673118591308594, + "rewards/margins": 14.611598014831543, + "rewards/rejected": -22.284717559814453, + "step": 16400 + }, + { + "epoch": 0.98, + "learning_rate": 4.2604848476915015e-06, + "logits/chosen": -2.3180184364318848, + "logits/rejected": -1.4398542642593384, + "logps/chosen": -835.75927734375, + "logps/rejected": -2177.93505859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.614073276519775, + "rewards/margins": 13.73328685760498, + "rewards/rejected": -21.347360610961914, + "step": 16410 + }, + { + "epoch": 0.98, + "learning_rate": 4.2592528625614206e-06, + "logits/chosen": -2.377495765686035, + "logits/rejected": -1.549177646636963, + "logps/chosen": -810.4583740234375, + "logps/rejected": -2172.49609375, + "loss": 0.0063, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.413954734802246, + "rewards/margins": 13.874130249023438, + "rewards/rejected": -21.288082122802734, + "step": 16420 + }, + { + "epoch": 0.98, + "learning_rate": 4.258020030527034e-06, + "logits/chosen": -2.3813135623931885, + "logits/rejected": -1.691440224647522, + "logps/chosen": -666.0817260742188, + "logps/rejected": -2037.141357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.99515438079834, + "rewards/margins": 13.933553695678711, + "rewards/rejected": -19.928709030151367, + "step": 16430 + }, + { + "epoch": 0.98, + "learning_rate": 4.256786352181827e-06, + "logits/chosen": -2.4323313236236572, + "logits/rejected": -1.7263997793197632, + "logps/chosen": -669.498291015625, + "logps/rejected": -1947.813232421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.950741767883301, + "rewards/margins": 13.092527389526367, + "rewards/rejected": -19.043270111083984, + "step": 16440 + }, + { + "epoch": 0.98, + "learning_rate": 4.255551828119692e-06, + "logits/chosen": -2.3969080448150635, + "logits/rejected": -1.67122483253479, + "logps/chosen": -679.2623291015625, + "logps/rejected": -1965.900634765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.115488052368164, + "rewards/margins": 13.105562210083008, + "rewards/rejected": -19.22104835510254, + "step": 16450 + }, + { + "epoch": 0.98, + "learning_rate": 4.25431645893493e-06, + "logits/chosen": -2.4050140380859375, + "logits/rejected": -1.6828899383544922, + "logps/chosen": -683.144775390625, + "logps/rejected": -2044.0748291015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.155014991760254, + "rewards/margins": 13.842188835144043, + "rewards/rejected": -19.99720573425293, + "step": 16460 + }, + { + "epoch": 0.98, + "learning_rate": 4.253080245222246e-06, + "logits/chosen": -2.3807129859924316, + "logits/rejected": -1.7155786752700806, + "logps/chosen": -670.4835815429688, + "logps/rejected": -1953.029541015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.998744964599609, + "rewards/margins": 13.086491584777832, + "rewards/rejected": -19.08523941040039, + "step": 16470 + }, + { + "epoch": 0.98, + "learning_rate": 4.2518431875767555e-06, + "logits/chosen": -2.4283032417297363, + "logits/rejected": -1.7443708181381226, + "logps/chosen": -694.9443359375, + "logps/rejected": -2094.083984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.190868377685547, + "rewards/margins": 14.294779777526855, + "rewards/rejected": -20.48565101623535, + "step": 16480 + }, + { + "epoch": 0.98, + "learning_rate": 4.250605286593977e-06, + "logits/chosen": -2.426852226257324, + "logits/rejected": -1.7334325313568115, + "logps/chosen": -679.6297607421875, + "logps/rejected": -1888.80859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.118653774261475, + "rewards/margins": 12.320863723754883, + "rewards/rejected": -18.439517974853516, + "step": 16490 + }, + { + "epoch": 0.98, + "learning_rate": 4.249366542869835e-06, + "logits/chosen": -2.414811134338379, + "logits/rejected": -1.7015889883041382, + "logps/chosen": -683.2149658203125, + "logps/rejected": -2068.43359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1248459815979, + "rewards/margins": 14.11529541015625, + "rewards/rejected": -20.240142822265625, + "step": 16500 + }, + { + "epoch": 0.98, + "learning_rate": 4.248126957000662e-06, + "logits/chosen": -2.440782308578491, + "logits/rejected": -1.6374822854995728, + "logps/chosen": -705.0015869140625, + "logps/rejected": -2142.81298828125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.324993133544922, + "rewards/margins": 14.654500007629395, + "rewards/rejected": -20.979490280151367, + "step": 16510 + }, + { + "epoch": 0.99, + "learning_rate": 4.246886529583194e-06, + "logits/chosen": -2.364828109741211, + "logits/rejected": -1.6724157333374023, + "logps/chosen": -718.852783203125, + "logps/rejected": -2075.101318359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.501028537750244, + "rewards/margins": 13.804672241210938, + "rewards/rejected": -20.305700302124023, + "step": 16520 + }, + { + "epoch": 0.99, + "learning_rate": 4.245645261214572e-06, + "logits/chosen": -2.4024124145507812, + "logits/rejected": -1.6864521503448486, + "logps/chosen": -761.1599731445312, + "logps/rejected": -2068.25390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.898156642913818, + "rewards/margins": 13.35009765625, + "rewards/rejected": -20.248254776000977, + "step": 16530 + }, + { + "epoch": 0.99, + "learning_rate": 4.2444031524923455e-06, + "logits/chosen": -2.4209823608398438, + "logits/rejected": -1.7255840301513672, + "logps/chosen": -726.3500366210938, + "logps/rejected": -2071.3720703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.524474143981934, + "rewards/margins": 13.754005432128906, + "rewards/rejected": -20.27847671508789, + "step": 16540 + }, + { + "epoch": 0.99, + "learning_rate": 4.243160204014463e-06, + "logits/chosen": -2.4685065746307373, + "logits/rejected": -1.7806217670440674, + "logps/chosen": -706.3834838867188, + "logps/rejected": -2136.192138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.351504325866699, + "rewards/margins": 14.569493293762207, + "rewards/rejected": -20.920997619628906, + "step": 16550 + }, + { + "epoch": 0.99, + "learning_rate": 4.24191641637928e-06, + "logits/chosen": -2.444956064224243, + "logits/rejected": -1.6397781372070312, + "logps/chosen": -709.8569946289062, + "logps/rejected": -2036.914794921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.395436763763428, + "rewards/margins": 13.531878471374512, + "rewards/rejected": -19.92731475830078, + "step": 16560 + }, + { + "epoch": 0.99, + "learning_rate": 4.2406717901855555e-06, + "logits/chosen": -2.417323112487793, + "logits/rejected": -1.7279771566390991, + "logps/chosen": -696.5011596679688, + "logps/rejected": -2124.940185546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.208968162536621, + "rewards/margins": 14.593925476074219, + "rewards/rejected": -20.802892684936523, + "step": 16570 + }, + { + "epoch": 0.99, + "learning_rate": 4.239426326032455e-06, + "logits/chosen": -2.409320116043091, + "logits/rejected": -1.677390694618225, + "logps/chosen": -743.0374755859375, + "logps/rejected": -1978.885009765625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.749749183654785, + "rewards/margins": 12.604692459106445, + "rewards/rejected": -19.354440689086914, + "step": 16580 + }, + { + "epoch": 0.99, + "learning_rate": 4.238180024519543e-06, + "logits/chosen": -2.3605523109436035, + "logits/rejected": -1.5698175430297852, + "logps/chosen": -731.4795532226562, + "logps/rejected": -2024.283447265625, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.664125919342041, + "rewards/margins": 13.130928039550781, + "rewards/rejected": -19.795055389404297, + "step": 16590 + }, + { + "epoch": 0.99, + "learning_rate": 4.236932886246789e-06, + "logits/chosen": -2.2854506969451904, + "logits/rejected": -1.4997950792312622, + "logps/chosen": -841.9267578125, + "logps/rejected": -2135.342529296875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.713427543640137, + "rewards/margins": 13.205259323120117, + "rewards/rejected": -20.918685913085938, + "step": 16600 + }, + { + "epoch": 0.99, + "learning_rate": 4.235684911814566e-06, + "logits/chosen": -2.309512138366699, + "logits/rejected": -1.3899043798446655, + "logps/chosen": -841.8679809570312, + "logps/rejected": -2136.318603515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.732835292816162, + "rewards/margins": 13.182464599609375, + "rewards/rejected": -20.915300369262695, + "step": 16610 + }, + { + "epoch": 0.99, + "learning_rate": 4.234436101823648e-06, + "logits/chosen": -2.350876569747925, + "logits/rejected": -1.322472333908081, + "logps/chosen": -841.9982299804688, + "logps/rejected": -2190.8037109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.709364891052246, + "rewards/margins": 13.75128173828125, + "rewards/rejected": -21.460643768310547, + "step": 16620 + }, + { + "epoch": 0.99, + "learning_rate": 4.233186456875213e-06, + "logits/chosen": -2.3888485431671143, + "logits/rejected": -1.5791094303131104, + "logps/chosen": -813.0345458984375, + "logps/rejected": -2166.388916015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.433658599853516, + "rewards/margins": 13.786291122436523, + "rewards/rejected": -21.219951629638672, + "step": 16630 + }, + { + "epoch": 0.99, + "learning_rate": 4.2319359775708405e-06, + "logits/chosen": -2.3303799629211426, + "logits/rejected": -1.3882715702056885, + "logps/chosen": -787.8463134765625, + "logps/rejected": -2148.55615234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.152085304260254, + "rewards/margins": 13.885113716125488, + "rewards/rejected": -21.03719711303711, + "step": 16640 + }, + { + "epoch": 0.99, + "learning_rate": 4.230684664512509e-06, + "logits/chosen": -2.333904981613159, + "logits/rejected": -1.5315635204315186, + "logps/chosen": -777.8318481445312, + "logps/rejected": -2145.2783203125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.039231777191162, + "rewards/margins": 13.955078125, + "rewards/rejected": -20.994308471679688, + "step": 16650 + }, + { + "epoch": 0.99, + "learning_rate": 4.229432518302603e-06, + "logits/chosen": -2.3924336433410645, + "logits/rejected": -1.4300719499588013, + "logps/chosen": -772.42431640625, + "logps/rejected": -2042.5072021484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.02643346786499, + "rewards/margins": 12.9558687210083, + "rewards/rejected": -19.9822998046875, + "step": 16660 + }, + { + "epoch": 0.99, + "learning_rate": 4.228179539543905e-06, + "logits/chosen": -2.3274574279785156, + "logits/rejected": -1.5029761791229248, + "logps/chosen": -775.5011596679688, + "logps/rejected": -2167.5908203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.030150413513184, + "rewards/margins": 14.202435493469238, + "rewards/rejected": -21.232585906982422, + "step": 16670 + }, + { + "epoch": 0.99, + "learning_rate": 4.226925728839598e-06, + "logits/chosen": -2.380281925201416, + "logits/rejected": -1.596400260925293, + "logps/chosen": -776.6669921875, + "logps/rejected": -2121.69580078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.068474769592285, + "rewards/margins": 13.707006454467773, + "rewards/rejected": -20.77547836303711, + "step": 16680 + }, + { + "epoch": 1.0, + "learning_rate": 4.225671086793268e-06, + "logits/chosen": -2.352072238922119, + "logits/rejected": -1.5062671899795532, + "logps/chosen": -781.0648803710938, + "logps/rejected": -2077.43896484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0513200759887695, + "rewards/margins": 13.288251876831055, + "rewards/rejected": -20.33957290649414, + "step": 16690 + }, + { + "epoch": 1.0, + "learning_rate": 4.224415614008898e-06, + "logits/chosen": -2.3569722175598145, + "logits/rejected": -1.438531756401062, + "logps/chosen": -754.4425048828125, + "logps/rejected": -2110.61328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.849137783050537, + "rewards/margins": 13.817670822143555, + "rewards/rejected": -20.66680908203125, + "step": 16700 + }, + { + "epoch": 1.0, + "learning_rate": 4.223159311090874e-06, + "logits/chosen": -2.385262966156006, + "logits/rejected": -1.6878706216812134, + "logps/chosen": -769.6485595703125, + "logps/rejected": -2110.03173828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0013909339904785, + "rewards/margins": 13.6666841506958, + "rewards/rejected": -20.668075561523438, + "step": 16710 + }, + { + "epoch": 1.0, + "learning_rate": 4.221902178643979e-06, + "logits/chosen": -2.308628559112549, + "logits/rejected": -1.523890495300293, + "logps/chosen": -789.8714599609375, + "logps/rejected": -2121.454345703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.220742702484131, + "rewards/margins": 13.557891845703125, + "rewards/rejected": -20.778635025024414, + "step": 16720 + }, + { + "epoch": 1.0, + "learning_rate": 4.220644217273397e-06, + "logits/chosen": -2.357182025909424, + "logits/rejected": -1.22305166721344, + "logps/chosen": -770.1734008789062, + "logps/rejected": -2207.06787109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.922806739807129, + "rewards/margins": 14.708050727844238, + "rewards/rejected": -21.630859375, + "step": 16730 + }, + { + "epoch": 1.0, + "learning_rate": 4.2193854275847115e-06, + "logits/chosen": -2.3817358016967773, + "logits/rejected": -1.6041501760482788, + "logps/chosen": -734.0195922851562, + "logps/rejected": -2147.32958984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.602145195007324, + "rewards/margins": 14.42591381072998, + "rewards/rejected": -21.028059005737305, + "step": 16740 + }, + { + "epoch": 1.0, + "learning_rate": 4.218125810183903e-06, + "logits/chosen": -2.3632442951202393, + "logits/rejected": -1.542915940284729, + "logps/chosen": -747.8185424804688, + "logps/rejected": -2135.134765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.744318962097168, + "rewards/margins": 14.170720100402832, + "rewards/rejected": -20.915037155151367, + "step": 16750 + }, + { + "epoch": 1.0, + "learning_rate": 4.216865365677352e-06, + "logits/chosen": -2.3617970943450928, + "logits/rejected": -1.5336923599243164, + "logps/chosen": -741.0734252929688, + "logps/rejected": -2153.572265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.696285247802734, + "rewards/margins": 14.394821166992188, + "rewards/rejected": -21.091106414794922, + "step": 16760 + }, + { + "epoch": 1.0, + "learning_rate": 4.215604094671835e-06, + "logits/chosen": -2.359928607940674, + "logits/rejected": -1.603082299232483, + "logps/chosen": -763.9637451171875, + "logps/rejected": -2078.13720703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.906715393066406, + "rewards/margins": 13.432960510253906, + "rewards/rejected": -20.339675903320312, + "step": 16770 + }, + { + "epoch": 1.0, + "learning_rate": 4.214341997774528e-06, + "logits/chosen": -2.3614449501037598, + "logits/rejected": -1.3995181322097778, + "logps/chosen": -720.8394165039062, + "logps/rejected": -2140.2333984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.436924934387207, + "rewards/margins": 14.508386611938477, + "rewards/rejected": -20.945308685302734, + "step": 16780 + }, + { + "epoch": 1.0, + "learning_rate": 4.213079075593006e-06, + "logits/chosen": -2.327021598815918, + "logits/rejected": -1.3855149745941162, + "logps/chosen": -765.5531005859375, + "logps/rejected": -2099.421630859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.974505424499512, + "rewards/margins": 13.577832221984863, + "rewards/rejected": -20.552337646484375, + "step": 16790 + }, + { + "epoch": 1.0, + "learning_rate": 4.211815328735239e-06, + "logits/chosen": -2.434871196746826, + "logits/rejected": -1.6826460361480713, + "logps/chosen": -746.3359375, + "logps/rejected": -2131.11572265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.772913455963135, + "rewards/margins": 14.099716186523438, + "rewards/rejected": -20.872629165649414, + "step": 16800 + }, + { + "epoch": 1.0, + "learning_rate": 4.210550757809594e-06, + "logits/chosen": -2.399641513824463, + "logits/rejected": -1.4710413217544556, + "logps/chosen": -759.6830444335938, + "logps/rejected": -2124.6943359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.903332710266113, + "rewards/margins": 13.903970718383789, + "rewards/rejected": -20.807300567626953, + "step": 16810 + }, + { + "epoch": 1.0, + "learning_rate": 4.209285363424836e-06, + "logits/chosen": -2.368192672729492, + "logits/rejected": -1.4062049388885498, + "logps/chosen": -782.8004760742188, + "logps/rejected": -2196.861572265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0867767333984375, + "rewards/margins": 14.429430961608887, + "rewards/rejected": -21.516206741333008, + "step": 16820 + }, + { + "epoch": 1.0, + "learning_rate": 4.208019146190127e-06, + "logits/chosen": -2.4384219646453857, + "logits/rejected": -1.6664139032363892, + "logps/chosen": -596.9757080078125, + "logps/rejected": -1929.383544921875, + "loss": 0.0285, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.284670829772949, + "rewards/margins": 13.568888664245605, + "rewards/rejected": -18.853557586669922, + "step": 16830 + }, + { + "epoch": 1.0, + "learning_rate": 4.206752106715022e-06, + "logits/chosen": -2.482822895050049, + "logits/rejected": -1.8894212245941162, + "logps/chosen": -471.2826232910156, + "logps/rejected": -1671.614501953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.024334907531738, + "rewards/margins": 12.242280006408691, + "rewards/rejected": -16.266613006591797, + "step": 16840 + }, + { + "epoch": 1.0, + "learning_rate": 4.205484245609474e-06, + "logits/chosen": -2.5230908393859863, + "logits/rejected": -1.9142125844955444, + "logps/chosen": -475.72491455078125, + "logps/rejected": -1588.070556640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.095338821411133, + "rewards/margins": 11.3369140625, + "rewards/rejected": -15.43225383758545, + "step": 16850 + }, + { + "epoch": 1.01, + "learning_rate": 4.204215563483833e-06, + "logits/chosen": -2.4751179218292236, + "logits/rejected": -1.8302955627441406, + "logps/chosen": -481.9007873535156, + "logps/rejected": -1738.278076171875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.054772853851318, + "rewards/margins": 12.88042163848877, + "rewards/rejected": -16.935192108154297, + "step": 16860 + }, + { + "epoch": 1.01, + "learning_rate": 4.20294606094884e-06, + "logits/chosen": -2.5249907970428467, + "logits/rejected": -1.858007788658142, + "logps/chosen": -512.87353515625, + "logps/rejected": -1729.623046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.409865379333496, + "rewards/margins": 12.446870803833008, + "rewards/rejected": -16.85673713684082, + "step": 16870 + }, + { + "epoch": 1.01, + "learning_rate": 4.201675738615637e-06, + "logits/chosen": -2.4598803520202637, + "logits/rejected": -1.7686303853988647, + "logps/chosen": -486.41070556640625, + "logps/rejected": -1854.7523193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.208449363708496, + "rewards/margins": 13.898338317871094, + "rewards/rejected": -18.106788635253906, + "step": 16880 + }, + { + "epoch": 1.01, + "learning_rate": 4.200404597095754e-06, + "logits/chosen": -2.4774718284606934, + "logits/rejected": -1.7133426666259766, + "logps/chosen": -487.97900390625, + "logps/rejected": -1825.9058837890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.185755729675293, + "rewards/margins": 13.616279602050781, + "rewards/rejected": -17.802034378051758, + "step": 16890 + }, + { + "epoch": 1.01, + "learning_rate": 4.199132637001119e-06, + "logits/chosen": -2.4569849967956543, + "logits/rejected": -1.841943383216858, + "logps/chosen": -504.598876953125, + "logps/rejected": -1898.904296875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.296687602996826, + "rewards/margins": 14.25239372253418, + "rewards/rejected": -18.549083709716797, + "step": 16900 + }, + { + "epoch": 1.01, + "learning_rate": 4.1978598589440554e-06, + "logits/chosen": -2.5009429454803467, + "logits/rejected": -1.7258809804916382, + "logps/chosen": -495.74420166015625, + "logps/rejected": -1782.6390380859375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2457146644592285, + "rewards/margins": 13.146928787231445, + "rewards/rejected": -17.392642974853516, + "step": 16910 + }, + { + "epoch": 1.01, + "learning_rate": 4.196586263537277e-06, + "logits/chosen": -2.5367157459259033, + "logits/rejected": -1.8052822351455688, + "logps/chosen": -485.5254821777344, + "logps/rejected": -1774.0074462890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.146761894226074, + "rewards/margins": 13.16822624206543, + "rewards/rejected": -17.31498908996582, + "step": 16920 + }, + { + "epoch": 1.01, + "learning_rate": 4.1953118513938925e-06, + "logits/chosen": -2.4980361461639404, + "logits/rejected": -1.8136494159698486, + "logps/chosen": -489.2342834472656, + "logps/rejected": -1803.053466796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.16973352432251, + "rewards/margins": 13.408551216125488, + "rewards/rejected": -17.578285217285156, + "step": 16930 + }, + { + "epoch": 1.01, + "learning_rate": 4.194036623127404e-06, + "logits/chosen": -2.499450445175171, + "logits/rejected": -1.8404333591461182, + "logps/chosen": -489.4947204589844, + "logps/rejected": -1844.065673828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.207486629486084, + "rewards/margins": 13.795560836791992, + "rewards/rejected": -18.003047943115234, + "step": 16940 + }, + { + "epoch": 1.01, + "learning_rate": 4.192760579351708e-06, + "logits/chosen": -2.527506113052368, + "logits/rejected": -1.8366241455078125, + "logps/chosen": -480.9150390625, + "logps/rejected": -1772.190673828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.05794620513916, + "rewards/margins": 13.223226547241211, + "rewards/rejected": -17.281173706054688, + "step": 16950 + }, + { + "epoch": 1.01, + "learning_rate": 4.19148372068109e-06, + "logits/chosen": -2.4995884895324707, + "logits/rejected": -1.8606716394424438, + "logps/chosen": -483.8077087402344, + "logps/rejected": -1755.3734130859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.155109882354736, + "rewards/margins": 12.96032428741455, + "rewards/rejected": -17.115432739257812, + "step": 16960 + }, + { + "epoch": 1.01, + "learning_rate": 4.19020604773023e-06, + "logits/chosen": -2.4837441444396973, + "logits/rejected": -1.7629401683807373, + "logps/chosen": -485.816162109375, + "logps/rejected": -1854.841064453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.140345573425293, + "rewards/margins": 13.965121269226074, + "rewards/rejected": -18.105466842651367, + "step": 16970 + }, + { + "epoch": 1.01, + "learning_rate": 4.188927561114201e-06, + "logits/chosen": -2.5338261127471924, + "logits/rejected": -1.8049129247665405, + "logps/chosen": -482.89532470703125, + "logps/rejected": -1820.87109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.116847038269043, + "rewards/margins": 13.640485763549805, + "rewards/rejected": -17.757333755493164, + "step": 16980 + }, + { + "epoch": 1.01, + "learning_rate": 4.187648261448465e-06, + "logits/chosen": -2.4797210693359375, + "logits/rejected": -1.7240514755249023, + "logps/chosen": -485.45550537109375, + "logps/rejected": -1848.865234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.085972785949707, + "rewards/margins": 13.95671272277832, + "rewards/rejected": -18.042686462402344, + "step": 16990 + }, + { + "epoch": 1.01, + "learning_rate": 4.186368149348878e-06, + "logits/chosen": -2.5164451599121094, + "logits/rejected": -1.8637330532073975, + "logps/chosen": -484.30767822265625, + "logps/rejected": -1860.2633056640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.153355598449707, + "rewards/margins": 14.011320114135742, + "rewards/rejected": -18.164676666259766, + "step": 17000 + }, + { + "epoch": 1.01, + "learning_rate": 4.185087225431686e-06, + "logits/chosen": -2.5140576362609863, + "logits/rejected": -1.8341782093048096, + "logps/chosen": -491.65533447265625, + "logps/rejected": -1873.1441650390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.241944789886475, + "rewards/margins": 14.046676635742188, + "rewards/rejected": -18.28862190246582, + "step": 17010 + }, + { + "epoch": 1.01, + "learning_rate": 4.183805490313524e-06, + "logits/chosen": -2.5188584327697754, + "logits/rejected": -1.8820288181304932, + "logps/chosen": -487.8851013183594, + "logps/rejected": -1876.2552490234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.180233478546143, + "rewards/margins": 14.136329650878906, + "rewards/rejected": -18.31656265258789, + "step": 17020 + }, + { + "epoch": 1.02, + "learning_rate": 4.18252294461142e-06, + "logits/chosen": -2.456089496612549, + "logits/rejected": -1.7385318279266357, + "logps/chosen": -495.3731994628906, + "logps/rejected": -1868.3746337890625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.302315711975098, + "rewards/margins": 13.935907363891602, + "rewards/rejected": -18.23822021484375, + "step": 17030 + }, + { + "epoch": 1.02, + "learning_rate": 4.181239588942793e-06, + "logits/chosen": -2.4832379817962646, + "logits/rejected": -1.722078561782837, + "logps/chosen": -503.6434631347656, + "logps/rejected": -2040.488037109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2944841384887695, + "rewards/margins": 15.673593521118164, + "rewards/rejected": -19.968076705932617, + "step": 17040 + }, + { + "epoch": 1.02, + "learning_rate": 4.179955423925449e-06, + "logits/chosen": -2.463827133178711, + "logits/rejected": -1.79416024684906, + "logps/chosen": -502.85595703125, + "logps/rejected": -1812.0631103515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3465704917907715, + "rewards/margins": 13.3213472366333, + "rewards/rejected": -17.667919158935547, + "step": 17050 + }, + { + "epoch": 1.02, + "learning_rate": 4.178670450177585e-06, + "logits/chosen": -2.452455520629883, + "logits/rejected": -1.6906383037567139, + "logps/chosen": -501.4212951660156, + "logps/rejected": -1940.5306396484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.304912090301514, + "rewards/margins": 14.64726734161377, + "rewards/rejected": -18.952178955078125, + "step": 17060 + }, + { + "epoch": 1.02, + "learning_rate": 4.177384668317788e-06, + "logits/chosen": -2.4643123149871826, + "logits/rejected": -1.743297815322876, + "logps/chosen": -515.8394165039062, + "logps/rejected": -1845.731689453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4667253494262695, + "rewards/margins": 13.538134574890137, + "rewards/rejected": -18.004858016967773, + "step": 17070 + }, + { + "epoch": 1.02, + "learning_rate": 4.176098078965034e-06, + "logits/chosen": -2.45540189743042, + "logits/rejected": -1.652944803237915, + "logps/chosen": -510.85882568359375, + "logps/rejected": -1913.475830078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.380012035369873, + "rewards/margins": 14.311918258666992, + "rewards/rejected": -18.691930770874023, + "step": 17080 + }, + { + "epoch": 1.02, + "learning_rate": 4.1748106827386845e-06, + "logits/chosen": -2.5237057209014893, + "logits/rejected": -1.7816098928451538, + "logps/chosen": -511.62872314453125, + "logps/rejected": -1800.1236572265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.405665874481201, + "rewards/margins": 13.148923873901367, + "rewards/rejected": -17.55458641052246, + "step": 17090 + }, + { + "epoch": 1.02, + "learning_rate": 4.173522480258494e-06, + "logits/chosen": -2.450363874435425, + "logits/rejected": -1.7733078002929688, + "logps/chosen": -505.10443115234375, + "logps/rejected": -1925.458740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.348653316497803, + "rewards/margins": 14.463101387023926, + "rewards/rejected": -18.811756134033203, + "step": 17100 + }, + { + "epoch": 1.02, + "learning_rate": 4.1722334721446045e-06, + "logits/chosen": -2.446411609649658, + "logits/rejected": -1.7941306829452515, + "logps/chosen": -513.7903442382812, + "logps/rejected": -1979.626953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.410715103149414, + "rewards/margins": 14.932963371276855, + "rewards/rejected": -19.343679428100586, + "step": 17110 + }, + { + "epoch": 1.02, + "learning_rate": 4.1709436590175415e-06, + "logits/chosen": -2.51669979095459, + "logits/rejected": -1.6492456197738647, + "logps/chosen": -510.27935791015625, + "logps/rejected": -1934.2607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.318645000457764, + "rewards/margins": 14.578768730163574, + "rewards/rejected": -18.897415161132812, + "step": 17120 + }, + { + "epoch": 1.02, + "learning_rate": 4.1696530414982225e-06, + "logits/chosen": -2.4620120525360107, + "logits/rejected": -1.666776418685913, + "logps/chosen": -515.3135986328125, + "logps/rejected": -1883.139892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.454082489013672, + "rewards/margins": 13.92187786102295, + "rewards/rejected": -18.375957489013672, + "step": 17130 + }, + { + "epoch": 1.02, + "learning_rate": 4.16836162020795e-06, + "logits/chosen": -2.5074501037597656, + "logits/rejected": -1.7234115600585938, + "logps/chosen": -505.24835205078125, + "logps/rejected": -1953.989013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.301390171051025, + "rewards/margins": 14.792343139648438, + "rewards/rejected": -19.093730926513672, + "step": 17140 + }, + { + "epoch": 1.02, + "learning_rate": 4.167069395768416e-06, + "logits/chosen": -2.4544003009796143, + "logits/rejected": -1.7920278310775757, + "logps/chosen": -510.08331298828125, + "logps/rejected": -1976.4967041015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.463223457336426, + "rewards/margins": 14.863743782043457, + "rewards/rejected": -19.326969146728516, + "step": 17150 + }, + { + "epoch": 1.02, + "learning_rate": 4.165776368801695e-06, + "logits/chosen": -2.4103848934173584, + "logits/rejected": -1.7682796716690063, + "logps/chosen": -498.71630859375, + "logps/rejected": -1864.9775390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.283182144165039, + "rewards/margins": 13.923454284667969, + "rewards/rejected": -18.20663833618164, + "step": 17160 + }, + { + "epoch": 1.02, + "learning_rate": 4.164482539930251e-06, + "logits/chosen": -2.4333605766296387, + "logits/rejected": -1.7503821849822998, + "logps/chosen": -512.9955444335938, + "logps/rejected": -1874.722412109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.441753387451172, + "rewards/margins": 13.852701187133789, + "rewards/rejected": -18.29445457458496, + "step": 17170 + }, + { + "epoch": 1.02, + "learning_rate": 4.163187909776935e-06, + "logits/chosen": -2.4827327728271484, + "logits/rejected": -1.8473716974258423, + "logps/chosen": -506.0567932128906, + "logps/rejected": -1883.9544677734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.32346248626709, + "rewards/margins": 14.087536811828613, + "rewards/rejected": -18.410999298095703, + "step": 17180 + }, + { + "epoch": 1.03, + "learning_rate": 4.16189247896498e-06, + "logits/chosen": -2.497642993927002, + "logits/rejected": -1.7155202627182007, + "logps/chosen": -508.09356689453125, + "logps/rejected": -1965.171142578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.308581352233887, + "rewards/margins": 14.894922256469727, + "rewards/rejected": -19.203502655029297, + "step": 17190 + }, + { + "epoch": 1.03, + "learning_rate": 4.1605962481180065e-06, + "logits/chosen": -2.3965444564819336, + "logits/rejected": -1.6197929382324219, + "logps/chosen": -500.9286193847656, + "logps/rejected": -1992.1802978515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.308265686035156, + "rewards/margins": 15.166114807128906, + "rewards/rejected": -19.474380493164062, + "step": 17200 + }, + { + "epoch": 1.03, + "learning_rate": 4.159299217860021e-06, + "logits/chosen": -2.485217332839966, + "logits/rejected": -1.7686046361923218, + "logps/chosen": -500.26995849609375, + "logps/rejected": -1901.918212890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.291665554046631, + "rewards/margins": 14.295034408569336, + "rewards/rejected": -18.58670425415039, + "step": 17210 + }, + { + "epoch": 1.03, + "learning_rate": 4.1580013888154126e-06, + "logits/chosen": -2.470932960510254, + "logits/rejected": -1.5683482885360718, + "logps/chosen": -508.55670166015625, + "logps/rejected": -1930.753173828125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.424046039581299, + "rewards/margins": 14.439923286437988, + "rewards/rejected": -18.863971710205078, + "step": 17220 + }, + { + "epoch": 1.03, + "learning_rate": 4.156702761608956e-06, + "logits/chosen": -2.509525775909424, + "logits/rejected": -1.764733910560608, + "logps/chosen": -507.1607971191406, + "logps/rejected": -1840.3336181640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.332683563232422, + "rewards/margins": 13.622897148132324, + "rewards/rejected": -17.955581665039062, + "step": 17230 + }, + { + "epoch": 1.03, + "learning_rate": 4.155403336865812e-06, + "logits/chosen": -2.5139288902282715, + "logits/rejected": -1.760088562965393, + "logps/chosen": -501.4825744628906, + "logps/rejected": -1794.0924072265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.316361904144287, + "rewards/margins": 13.186816215515137, + "rewards/rejected": -17.503177642822266, + "step": 17240 + }, + { + "epoch": 1.03, + "learning_rate": 4.154103115211523e-06, + "logits/chosen": -2.4876132011413574, + "logits/rejected": -1.8091106414794922, + "logps/chosen": -505.24169921875, + "logps/rejected": -1886.484130859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.348996162414551, + "rewards/margins": 14.074369430541992, + "rewards/rejected": -18.42336654663086, + "step": 17250 + }, + { + "epoch": 1.03, + "learning_rate": 4.152802097272014e-06, + "logits/chosen": -2.5078346729278564, + "logits/rejected": -1.774107575416565, + "logps/chosen": -499.6590881347656, + "logps/rejected": -1811.9866943359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.30530309677124, + "rewards/margins": 13.376077651977539, + "rewards/rejected": -17.681381225585938, + "step": 17260 + }, + { + "epoch": 1.03, + "learning_rate": 4.151500283673598e-06, + "logits/chosen": -2.4770565032958984, + "logits/rejected": -1.7767095565795898, + "logps/chosen": -508.24078369140625, + "logps/rejected": -1892.3564453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3595733642578125, + "rewards/margins": 14.122018814086914, + "rewards/rejected": -18.481592178344727, + "step": 17270 + }, + { + "epoch": 1.03, + "learning_rate": 4.150197675042966e-06, + "logits/chosen": -2.4837493896484375, + "logits/rejected": -1.7595241069793701, + "logps/chosen": -513.33837890625, + "logps/rejected": -1832.9927978515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.461259365081787, + "rewards/margins": 13.420873641967773, + "rewards/rejected": -17.882131576538086, + "step": 17280 + }, + { + "epoch": 1.03, + "learning_rate": 4.1488942720071945e-06, + "logits/chosen": -2.465440273284912, + "logits/rejected": -1.8369996547698975, + "logps/chosen": -511.722412109375, + "logps/rejected": -1821.9427490234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.434153079986572, + "rewards/margins": 13.34478759765625, + "rewards/rejected": -17.778940200805664, + "step": 17290 + }, + { + "epoch": 1.03, + "learning_rate": 4.1475900751937406e-06, + "logits/chosen": -2.49975323677063, + "logits/rejected": -1.7422301769256592, + "logps/chosen": -520.0336303710938, + "logps/rejected": -1826.6204833984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.450678825378418, + "rewards/margins": 13.364021301269531, + "rewards/rejected": -17.814701080322266, + "step": 17300 + }, + { + "epoch": 1.03, + "learning_rate": 4.146285085230447e-06, + "logits/chosen": -2.524829149246216, + "logits/rejected": -1.8348686695098877, + "logps/chosen": -503.810302734375, + "logps/rejected": -1869.6702880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.291109085083008, + "rewards/margins": 13.971366882324219, + "rewards/rejected": -18.262475967407227, + "step": 17310 + }, + { + "epoch": 1.03, + "learning_rate": 4.144979302745533e-06, + "logits/chosen": -2.4442572593688965, + "logits/rejected": -1.7475738525390625, + "logps/chosen": -505.35455322265625, + "logps/rejected": -1886.935546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.35244607925415, + "rewards/margins": 14.079618453979492, + "rewards/rejected": -18.432065963745117, + "step": 17320 + }, + { + "epoch": 1.03, + "learning_rate": 4.143672728367604e-06, + "logits/chosen": -2.476069450378418, + "logits/rejected": -1.7635095119476318, + "logps/chosen": -500.3505859375, + "logps/rejected": -1898.353271484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.342679977416992, + "rewards/margins": 14.209611892700195, + "rewards/rejected": -18.552291870117188, + "step": 17330 + }, + { + "epoch": 1.03, + "learning_rate": 4.1423653627256445e-06, + "logits/chosen": -2.4959280490875244, + "logits/rejected": -1.838212251663208, + "logps/chosen": -499.57159423828125, + "logps/rejected": -1864.0595703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.2806010246276855, + "rewards/margins": 13.93347454071045, + "rewards/rejected": -18.21407699584961, + "step": 17340 + }, + { + "epoch": 1.03, + "learning_rate": 4.141057206449021e-06, + "logits/chosen": -2.4389138221740723, + "logits/rejected": -1.7454252243041992, + "logps/chosen": -496.82476806640625, + "logps/rejected": -1869.240234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.280585289001465, + "rewards/margins": 13.977493286132812, + "rewards/rejected": -18.25807762145996, + "step": 17350 + }, + { + "epoch": 1.04, + "learning_rate": 4.139748260167478e-06, + "logits/chosen": -2.4634149074554443, + "logits/rejected": -1.8515170812606812, + "logps/chosen": -450.03985595703125, + "logps/rejected": -1632.5032958984375, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8266549110412598, + "rewards/margins": 12.067304611206055, + "rewards/rejected": -15.893960952758789, + "step": 17360 + }, + { + "epoch": 1.04, + "learning_rate": 4.138438524511145e-06, + "logits/chosen": -2.476332187652588, + "logits/rejected": -1.8264672756195068, + "logps/chosen": -422.3567810058594, + "logps/rejected": -1630.1575927734375, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5316669940948486, + "rewards/margins": 12.336710929870605, + "rewards/rejected": -15.868377685546875, + "step": 17370 + }, + { + "epoch": 1.04, + "learning_rate": 4.1371280001105265e-06, + "logits/chosen": -2.411874294281006, + "logits/rejected": -1.6243034601211548, + "logps/chosen": -480.309814453125, + "logps/rejected": -1772.8834228515625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.084274768829346, + "rewards/margins": 13.219156265258789, + "rewards/rejected": -17.303430557250977, + "step": 17380 + }, + { + "epoch": 1.04, + "learning_rate": 4.135816687596509e-06, + "logits/chosen": -2.397989273071289, + "logits/rejected": -1.575989842414856, + "logps/chosen": -586.3870239257812, + "logps/rejected": -1879.090087890625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2401909828186035, + "rewards/margins": 13.1250581741333, + "rewards/rejected": -18.36524772644043, + "step": 17390 + }, + { + "epoch": 1.04, + "learning_rate": 4.134504587600359e-06, + "logits/chosen": -2.36417818069458, + "logits/rejected": -1.4021083116531372, + "logps/chosen": -565.8858642578125, + "logps/rejected": -1971.5863037109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.940451622009277, + "rewards/margins": 14.328104019165039, + "rewards/rejected": -19.268550872802734, + "step": 17400 + }, + { + "epoch": 1.04, + "learning_rate": 4.133191700753722e-06, + "logits/chosen": -2.380589246749878, + "logits/rejected": -1.4816755056381226, + "logps/chosen": -531.7255859375, + "logps/rejected": -1897.653076171875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.612483024597168, + "rewards/margins": 13.934819221496582, + "rewards/rejected": -18.547304153442383, + "step": 17410 + }, + { + "epoch": 1.04, + "learning_rate": 4.13187802768862e-06, + "logits/chosen": -2.4006385803222656, + "logits/rejected": -1.4571442604064941, + "logps/chosen": -526.5582275390625, + "logps/rejected": -1899.691162109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.572897911071777, + "rewards/margins": 13.965414047241211, + "rewards/rejected": -18.538311004638672, + "step": 17420 + }, + { + "epoch": 1.04, + "learning_rate": 4.130563569037458e-06, + "logits/chosen": -2.412964105606079, + "logits/rejected": -1.5627691745758057, + "logps/chosen": -502.349609375, + "logps/rejected": -1839.247314453125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.315176963806152, + "rewards/margins": 13.62317943572998, + "rewards/rejected": -17.938358306884766, + "step": 17430 + }, + { + "epoch": 1.04, + "learning_rate": 4.129248325433014e-06, + "logits/chosen": -2.4182536602020264, + "logits/rejected": -1.5798475742340088, + "logps/chosen": -494.93011474609375, + "logps/rejected": -1900.9951171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.233238220214844, + "rewards/margins": 14.321174621582031, + "rewards/rejected": -18.554412841796875, + "step": 17440 + }, + { + "epoch": 1.04, + "learning_rate": 4.127932297508446e-06, + "logits/chosen": -2.4115569591522217, + "logits/rejected": -1.6583518981933594, + "logps/chosen": -499.94622802734375, + "logps/rejected": -1822.626708984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.305069446563721, + "rewards/margins": 13.472333908081055, + "rewards/rejected": -17.77740478515625, + "step": 17450 + }, + { + "epoch": 1.04, + "learning_rate": 4.126615485897292e-06, + "logits/chosen": -2.425764799118042, + "logits/rejected": -1.7175709009170532, + "logps/chosen": -486.023193359375, + "logps/rejected": -1923.296630859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.156233310699463, + "rewards/margins": 14.628520011901855, + "rewards/rejected": -18.784753799438477, + "step": 17460 + }, + { + "epoch": 1.04, + "learning_rate": 4.125297891233464e-06, + "logits/chosen": -2.433492660522461, + "logits/rejected": -1.6867361068725586, + "logps/chosen": -489.0956115722656, + "logps/rejected": -1814.115478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.203707218170166, + "rewards/margins": 13.500149726867676, + "rewards/rejected": -17.703855514526367, + "step": 17470 + }, + { + "epoch": 1.04, + "learning_rate": 4.12397951415125e-06, + "logits/chosen": -2.4400267601013184, + "logits/rejected": -1.6648128032684326, + "logps/chosen": -483.88885498046875, + "logps/rejected": -1870.376953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.125829696655273, + "rewards/margins": 14.138331413269043, + "rewards/rejected": -18.264162063598633, + "step": 17480 + }, + { + "epoch": 1.04, + "learning_rate": 4.12266035528532e-06, + "logits/chosen": -2.4645369052886963, + "logits/rejected": -1.7412662506103516, + "logps/chosen": -501.07012939453125, + "logps/rejected": -1939.9453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.309517860412598, + "rewards/margins": 14.637051582336426, + "rewards/rejected": -18.946569442749023, + "step": 17490 + }, + { + "epoch": 1.04, + "learning_rate": 4.1213404152707155e-06, + "logits/chosen": -2.4678409099578857, + "logits/rejected": -1.6921732425689697, + "logps/chosen": -488.81146240234375, + "logps/rejected": -1902.4437255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.172663688659668, + "rewards/margins": 14.407781600952148, + "rewards/rejected": -18.5804443359375, + "step": 17500 + }, + { + "epoch": 1.04, + "learning_rate": 4.120019694742856e-06, + "logits/chosen": -2.423917055130005, + "logits/rejected": -1.564324140548706, + "logps/chosen": -490.5675354003906, + "logps/rejected": -1999.9710693359375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.186971664428711, + "rewards/margins": 15.369504928588867, + "rewards/rejected": -19.55647850036621, + "step": 17510 + }, + { + "epoch": 1.04, + "learning_rate": 4.118698194337536e-06, + "logits/chosen": -2.477398633956909, + "logits/rejected": -1.6355739831924438, + "logps/chosen": -481.0943908691406, + "logps/rejected": -1966.949462890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1359429359436035, + "rewards/margins": 15.103080749511719, + "rewards/rejected": -19.239023208618164, + "step": 17520 + }, + { + "epoch": 1.05, + "learning_rate": 4.117375914690925e-06, + "logits/chosen": -2.444131374359131, + "logits/rejected": -1.5368421077728271, + "logps/chosen": -484.3766174316406, + "logps/rejected": -1833.0426025390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.17075777053833, + "rewards/margins": 13.706791877746582, + "rewards/rejected": -17.877548217773438, + "step": 17530 + }, + { + "epoch": 1.05, + "learning_rate": 4.11605285643957e-06, + "logits/chosen": -2.4379053115844727, + "logits/rejected": -1.691849946975708, + "logps/chosen": -474.34442138671875, + "logps/rejected": -1884.964599609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.036074161529541, + "rewards/margins": 14.372915267944336, + "rewards/rejected": -18.40899085998535, + "step": 17540 + }, + { + "epoch": 1.05, + "learning_rate": 4.114729020220392e-06, + "logits/chosen": -2.442819595336914, + "logits/rejected": -1.7597144842147827, + "logps/chosen": -488.51373291015625, + "logps/rejected": -1904.753662109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1375226974487305, + "rewards/margins": 14.46729564666748, + "rewards/rejected": -18.604816436767578, + "step": 17550 + }, + { + "epoch": 1.05, + "learning_rate": 4.113404406670682e-06, + "logits/chosen": -2.4453768730163574, + "logits/rejected": -1.7085096836090088, + "logps/chosen": -473.91070556640625, + "logps/rejected": -1920.9613037109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.060914993286133, + "rewards/margins": 14.698003768920898, + "rewards/rejected": -18.758914947509766, + "step": 17560 + }, + { + "epoch": 1.05, + "learning_rate": 4.112079016428112e-06, + "logits/chosen": -2.459413528442383, + "logits/rejected": -1.6526727676391602, + "logps/chosen": -482.23748779296875, + "logps/rejected": -1985.688720703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.127596855163574, + "rewards/margins": 15.275688171386719, + "rewards/rejected": -19.40328598022461, + "step": 17570 + }, + { + "epoch": 1.05, + "learning_rate": 4.110752850130724e-06, + "logits/chosen": -2.4308457374572754, + "logits/rejected": -1.68880295753479, + "logps/chosen": -493.0311584472656, + "logps/rejected": -1891.9652099609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.227743148803711, + "rewards/margins": 14.249063491821289, + "rewards/rejected": -18.476806640625, + "step": 17580 + }, + { + "epoch": 1.05, + "learning_rate": 4.109425908416934e-06, + "logits/chosen": -2.511389970779419, + "logits/rejected": -1.6444885730743408, + "logps/chosen": -491.1732482910156, + "logps/rejected": -1852.9583740234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.204749584197998, + "rewards/margins": 13.872766494750977, + "rewards/rejected": -18.077518463134766, + "step": 17590 + }, + { + "epoch": 1.05, + "learning_rate": 4.108098191925533e-06, + "logits/chosen": -2.4662625789642334, + "logits/rejected": -1.6292108297348022, + "logps/chosen": -472.6700134277344, + "logps/rejected": -1906.73828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.011214256286621, + "rewards/margins": 14.618551254272461, + "rewards/rejected": -18.6297664642334, + "step": 17600 + }, + { + "epoch": 1.05, + "learning_rate": 4.106769701295683e-06, + "logits/chosen": -2.4432120323181152, + "logits/rejected": -1.6918693780899048, + "logps/chosen": -487.81304931640625, + "logps/rejected": -1821.882568359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.137691497802734, + "rewards/margins": 13.625932693481445, + "rewards/rejected": -17.76362419128418, + "step": 17610 + }, + { + "epoch": 1.05, + "learning_rate": 4.105440437166919e-06, + "logits/chosen": -2.4130942821502686, + "logits/rejected": -1.5614664554595947, + "logps/chosen": -482.78900146484375, + "logps/rejected": -2008.5250244140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.174017429351807, + "rewards/margins": 15.468640327453613, + "rewards/rejected": -19.642656326293945, + "step": 17620 + }, + { + "epoch": 1.05, + "learning_rate": 4.104110400179148e-06, + "logits/chosen": -2.4696202278137207, + "logits/rejected": -1.5103695392608643, + "logps/chosen": -487.3702087402344, + "logps/rejected": -1991.1011962890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1339030265808105, + "rewards/margins": 15.327112197875977, + "rewards/rejected": -19.461013793945312, + "step": 17630 + }, + { + "epoch": 1.05, + "learning_rate": 4.102779590972652e-06, + "logits/chosen": -2.4984161853790283, + "logits/rejected": -1.6200205087661743, + "logps/chosen": -467.8919372558594, + "logps/rejected": -1912.6810302734375, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8770751953125, + "rewards/margins": 14.809791564941406, + "rewards/rejected": -18.686864852905273, + "step": 17640 + }, + { + "epoch": 1.05, + "learning_rate": 4.101448010188082e-06, + "logits/chosen": -2.477109432220459, + "logits/rejected": -1.8048560619354248, + "logps/chosen": -446.09295654296875, + "logps/rejected": -1784.4749755859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8233590126037598, + "rewards/margins": 13.589149475097656, + "rewards/rejected": -17.41250991821289, + "step": 17650 + }, + { + "epoch": 1.05, + "learning_rate": 4.100115658466458e-06, + "logits/chosen": -2.5541775226593018, + "logits/rejected": -1.8206411600112915, + "logps/chosen": -447.2823181152344, + "logps/rejected": -1782.5419921875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7884762287139893, + "rewards/margins": 13.592251777648926, + "rewards/rejected": -17.380725860595703, + "step": 17660 + }, + { + "epoch": 1.05, + "learning_rate": 4.098782536449179e-06, + "logits/chosen": -2.5284571647644043, + "logits/rejected": -1.82082998752594, + "logps/chosen": -454.72601318359375, + "logps/rejected": -1906.978271484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8594970703125, + "rewards/margins": 14.762678146362305, + "rewards/rejected": -18.622177124023438, + "step": 17670 + }, + { + "epoch": 1.05, + "learning_rate": 4.0974486447780055e-06, + "logits/chosen": -2.4925918579101562, + "logits/rejected": -1.7452808618545532, + "logps/chosen": -456.5306701660156, + "logps/rejected": -1846.6939697265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.787992477416992, + "rewards/margins": 14.23112964630127, + "rewards/rejected": -18.019121170043945, + "step": 17680 + }, + { + "epoch": 1.05, + "learning_rate": 4.096113984095076e-06, + "logits/chosen": -2.5230438709259033, + "logits/rejected": -1.7652441263198853, + "logps/chosen": -455.17596435546875, + "logps/rejected": -1955.149169921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8115570545196533, + "rewards/margins": 15.305810928344727, + "rewards/rejected": -19.117366790771484, + "step": 17690 + }, + { + "epoch": 1.06, + "learning_rate": 4.094778555042893e-06, + "logits/chosen": -2.5222246646881104, + "logits/rejected": -1.7826074361801147, + "logps/chosen": -461.17694091796875, + "logps/rejected": -1826.841796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8364105224609375, + "rewards/margins": 13.989848136901855, + "rewards/rejected": -17.82625961303711, + "step": 17700 + }, + { + "epoch": 1.06, + "learning_rate": 4.0934423582643355e-06, + "logits/chosen": -2.527495861053467, + "logits/rejected": -1.6925532817840576, + "logps/chosen": -453.72174072265625, + "logps/rejected": -1905.4365234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.847935438156128, + "rewards/margins": 14.76795768737793, + "rewards/rejected": -18.61589241027832, + "step": 17710 + }, + { + "epoch": 1.06, + "learning_rate": 4.0921053944026465e-06, + "logits/chosen": -2.546060085296631, + "logits/rejected": -1.7727102041244507, + "logps/chosen": -466.4256286621094, + "logps/rejected": -1938.958740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9174606800079346, + "rewards/margins": 15.036497116088867, + "rewards/rejected": -18.953960418701172, + "step": 17720 + }, + { + "epoch": 1.06, + "learning_rate": 4.090767664101442e-06, + "logits/chosen": -2.5268516540527344, + "logits/rejected": -1.8755967617034912, + "logps/chosen": -469.6995544433594, + "logps/rejected": -1857.477294921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.986161470413208, + "rewards/margins": 14.15113353729248, + "rewards/rejected": -18.13729476928711, + "step": 17730 + }, + { + "epoch": 1.06, + "learning_rate": 4.089429168004704e-06, + "logits/chosen": -2.5316872596740723, + "logits/rejected": -2.013329029083252, + "logps/chosen": -464.75079345703125, + "logps/rejected": -1678.6650390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9588863849639893, + "rewards/margins": 12.385123252868652, + "rewards/rejected": -16.344009399414062, + "step": 17740 + }, + { + "epoch": 1.06, + "learning_rate": 4.088089906756784e-06, + "logits/chosen": -2.479539155960083, + "logits/rejected": -1.798333764076233, + "logps/chosen": -464.20489501953125, + "logps/rejected": -1868.9368896484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.959743022918701, + "rewards/margins": 14.298166275024414, + "rewards/rejected": -18.25790786743164, + "step": 17750 + }, + { + "epoch": 1.06, + "learning_rate": 4.086749881002403e-06, + "logits/chosen": -2.498898983001709, + "logits/rejected": -1.712054967880249, + "logps/chosen": -465.697265625, + "logps/rejected": -1896.048095703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.891392230987549, + "rewards/margins": 14.63127613067627, + "rewards/rejected": -18.52267074584961, + "step": 17760 + }, + { + "epoch": 1.06, + "learning_rate": 4.08540909138665e-06, + "logits/chosen": -2.5188519954681396, + "logits/rejected": -1.8018581867218018, + "logps/chosen": -469.72021484375, + "logps/rejected": -1754.2474365234375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.993077516555786, + "rewards/margins": 13.105255126953125, + "rewards/rejected": -17.09833335876465, + "step": 17770 + }, + { + "epoch": 1.06, + "learning_rate": 4.084067538554981e-06, + "logits/chosen": -2.5102474689483643, + "logits/rejected": -1.7928187847137451, + "logps/chosen": -458.31201171875, + "logps/rejected": -1893.690185546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.898019313812256, + "rewards/margins": 14.599884033203125, + "rewards/rejected": -18.497905731201172, + "step": 17780 + }, + { + "epoch": 1.06, + "learning_rate": 4.0827252231532185e-06, + "logits/chosen": -2.509915590286255, + "logits/rejected": -1.8525676727294922, + "logps/chosen": -484.85980224609375, + "logps/rejected": -1928.1962890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.135530948638916, + "rewards/margins": 14.7041597366333, + "rewards/rejected": -18.839691162109375, + "step": 17790 + }, + { + "epoch": 1.06, + "learning_rate": 4.081382145827554e-06, + "logits/chosen": -2.4859917163848877, + "logits/rejected": -1.7902657985687256, + "logps/chosen": -477.41259765625, + "logps/rejected": -1944.40234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.036859035491943, + "rewards/margins": 14.959711074829102, + "rewards/rejected": -18.996570587158203, + "step": 17800 + }, + { + "epoch": 1.06, + "learning_rate": 4.080038307224544e-06, + "logits/chosen": -2.507127285003662, + "logits/rejected": -1.7027757167816162, + "logps/chosen": -488.76922607421875, + "logps/rejected": -2024.379638671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.121608734130859, + "rewards/margins": 15.674798965454102, + "rewards/rejected": -19.79640769958496, + "step": 17810 + }, + { + "epoch": 1.06, + "learning_rate": 4.078693707991115e-06, + "logits/chosen": -2.5072951316833496, + "logits/rejected": -1.8556222915649414, + "logps/chosen": -472.9852600097656, + "logps/rejected": -1947.2213134765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.002696514129639, + "rewards/margins": 15.018033981323242, + "rewards/rejected": -19.020732879638672, + "step": 17820 + }, + { + "epoch": 1.06, + "learning_rate": 4.0773483487745536e-06, + "logits/chosen": -2.442354917526245, + "logits/rejected": -1.774278998374939, + "logps/chosen": -478.9989318847656, + "logps/rejected": -1840.274169921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.094116687774658, + "rewards/margins": 13.864102363586426, + "rewards/rejected": -17.958219528198242, + "step": 17830 + }, + { + "epoch": 1.06, + "learning_rate": 4.0760022302225175e-06, + "logits/chosen": -2.488323450088501, + "logits/rejected": -1.8010038137435913, + "logps/chosen": -484.0796813964844, + "logps/rejected": -1855.1751708984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.1115312576293945, + "rewards/margins": 13.98859977722168, + "rewards/rejected": -18.10013198852539, + "step": 17840 + }, + { + "epoch": 1.06, + "learning_rate": 4.0746553529830274e-06, + "logits/chosen": -2.4977288246154785, + "logits/rejected": -1.8551298379898071, + "logps/chosen": -475.93505859375, + "logps/rejected": -1870.7464599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.109603404998779, + "rewards/margins": 14.153036117553711, + "rewards/rejected": -18.262638092041016, + "step": 17850 + }, + { + "epoch": 1.06, + "learning_rate": 4.073307717704471e-06, + "logits/chosen": -2.5018186569213867, + "logits/rejected": -1.8550018072128296, + "logps/chosen": -478.5262145996094, + "logps/rejected": -1865.5123291015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.104914665222168, + "rewards/margins": 14.116636276245117, + "rewards/rejected": -18.2215518951416, + "step": 17860 + }, + { + "epoch": 1.07, + "learning_rate": 4.071959325035599e-06, + "logits/chosen": -2.483258008956909, + "logits/rejected": -1.8218262195587158, + "logps/chosen": -472.17291259765625, + "logps/rejected": -1931.5230712890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.004550457000732, + "rewards/margins": 14.869982719421387, + "rewards/rejected": -18.874534606933594, + "step": 17870 + }, + { + "epoch": 1.07, + "learning_rate": 4.070610175625528e-06, + "logits/chosen": -2.486212968826294, + "logits/rejected": -1.6376031637191772, + "logps/chosen": -494.7484436035156, + "logps/rejected": -1939.8275146484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.232024192810059, + "rewards/margins": 14.719987869262695, + "rewards/rejected": -18.95201301574707, + "step": 17880 + }, + { + "epoch": 1.07, + "learning_rate": 4.069260270123739e-06, + "logits/chosen": -2.5489211082458496, + "logits/rejected": -1.8706848621368408, + "logps/chosen": -471.0279846191406, + "logps/rejected": -1859.203369140625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0136518478393555, + "rewards/margins": 14.134394645690918, + "rewards/rejected": -18.148046493530273, + "step": 17890 + }, + { + "epoch": 1.07, + "learning_rate": 4.067909609180075e-06, + "logits/chosen": -2.500617504119873, + "logits/rejected": -1.943881630897522, + "logps/chosen": -462.2579650878906, + "logps/rejected": -1887.1480712890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8925490379333496, + "rewards/margins": 14.52668571472168, + "rewards/rejected": -18.419235229492188, + "step": 17900 + }, + { + "epoch": 1.07, + "learning_rate": 4.066558193444746e-06, + "logits/chosen": -2.4015374183654785, + "logits/rejected": -1.6972535848617554, + "logps/chosen": -491.69903564453125, + "logps/rejected": -2014.980712890625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.228610038757324, + "rewards/margins": 15.466585159301758, + "rewards/rejected": -19.6951961517334, + "step": 17910 + }, + { + "epoch": 1.07, + "learning_rate": 4.065206023568323e-06, + "logits/chosen": -2.3395934104919434, + "logits/rejected": -1.64919912815094, + "logps/chosen": -563.4887084960938, + "logps/rejected": -2009.396728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.986479759216309, + "rewards/margins": 14.659113883972168, + "rewards/rejected": -19.645591735839844, + "step": 17920 + }, + { + "epoch": 1.07, + "learning_rate": 4.0638531002017386e-06, + "logits/chosen": -2.425755262374878, + "logits/rejected": -1.6718165874481201, + "logps/chosen": -579.167724609375, + "logps/rejected": -2055.524658203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.091667175292969, + "rewards/margins": 15.00953197479248, + "rewards/rejected": -20.1011962890625, + "step": 17930 + }, + { + "epoch": 1.07, + "learning_rate": 4.0624994239962935e-06, + "logits/chosen": -2.3986480236053467, + "logits/rejected": -1.5970408916473389, + "logps/chosen": -582.7232666015625, + "logps/rejected": -2063.50048828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.130894184112549, + "rewards/margins": 15.059125900268555, + "rewards/rejected": -20.190021514892578, + "step": 17940 + }, + { + "epoch": 1.07, + "learning_rate": 4.061144995603644e-06, + "logits/chosen": -2.4266433715820312, + "logits/rejected": -1.5587893724441528, + "logps/chosen": -591.7642822265625, + "logps/rejected": -2060.4462890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.199445724487305, + "rewards/margins": 14.968847274780273, + "rewards/rejected": -20.168292999267578, + "step": 17950 + }, + { + "epoch": 1.07, + "learning_rate": 4.059789815675815e-06, + "logits/chosen": -2.401641368865967, + "logits/rejected": -1.5714163780212402, + "logps/chosen": -570.2064208984375, + "logps/rejected": -2035.700927734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.951666831970215, + "rewards/margins": 14.959800720214844, + "rewards/rejected": -19.911468505859375, + "step": 17960 + }, + { + "epoch": 1.07, + "learning_rate": 4.058433884865188e-06, + "logits/chosen": -2.3976807594299316, + "logits/rejected": -1.5817610025405884, + "logps/chosen": -577.7536010742188, + "logps/rejected": -2034.9381103515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.073649883270264, + "rewards/margins": 14.828862190246582, + "rewards/rejected": -19.90251350402832, + "step": 17970 + }, + { + "epoch": 1.07, + "learning_rate": 4.057077203824509e-06, + "logits/chosen": -2.4659340381622314, + "logits/rejected": -1.7309879064559937, + "logps/chosen": -581.3992309570312, + "logps/rejected": -2162.05615234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.065001010894775, + "rewards/margins": 16.119110107421875, + "rewards/rejected": -21.184112548828125, + "step": 17980 + }, + { + "epoch": 1.07, + "learning_rate": 4.055719773206883e-06, + "logits/chosen": -2.428328037261963, + "logits/rejected": -1.6220118999481201, + "logps/chosen": -568.5042724609375, + "logps/rejected": -2181.231689453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.998475074768066, + "rewards/margins": 16.37039566040039, + "rewards/rejected": -21.368871688842773, + "step": 17990 + }, + { + "epoch": 1.07, + "learning_rate": 4.0543615936657785e-06, + "logits/chosen": -2.3916196823120117, + "logits/rejected": -1.7242666482925415, + "logps/chosen": -560.71044921875, + "logps/rejected": -2039.301513671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.91387939453125, + "rewards/margins": 15.040878295898438, + "rewards/rejected": -19.954757690429688, + "step": 18000 + }, + { + "epoch": 1.07, + "learning_rate": 4.053002665855023e-06, + "logits/chosen": -2.4295754432678223, + "logits/rejected": -1.6837425231933594, + "logps/chosen": -572.5679931640625, + "logps/rejected": -2117.618408203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.044869422912598, + "rewards/margins": 15.68799114227295, + "rewards/rejected": -20.732860565185547, + "step": 18010 + }, + { + "epoch": 1.07, + "learning_rate": 4.051642990428803e-06, + "logits/chosen": -2.3937606811523438, + "logits/rejected": -1.5813525915145874, + "logps/chosen": -556.9114990234375, + "logps/rejected": -2096.77587890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.889843940734863, + "rewards/margins": 15.6396484375, + "rewards/rejected": -20.529491424560547, + "step": 18020 + }, + { + "epoch": 1.08, + "learning_rate": 4.050282568041668e-06, + "logits/chosen": -2.4179940223693848, + "logits/rejected": -1.6754785776138306, + "logps/chosen": -563.8974609375, + "logps/rejected": -1953.124755859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.934612274169922, + "rewards/margins": 14.160372734069824, + "rewards/rejected": -19.094985961914062, + "step": 18030 + }, + { + "epoch": 1.08, + "learning_rate": 4.048921399348525e-06, + "logits/chosen": -2.4270877838134766, + "logits/rejected": -1.708386778831482, + "logps/chosen": -563.6429443359375, + "logps/rejected": -2062.716796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.871340751647949, + "rewards/margins": 15.314763069152832, + "rewards/rejected": -20.18610191345215, + "step": 18040 + }, + { + "epoch": 1.08, + "learning_rate": 4.0475594850046395e-06, + "logits/chosen": -2.4739909172058105, + "logits/rejected": -1.6923935413360596, + "logps/chosen": -563.507080078125, + "logps/rejected": -2080.782470703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.898784637451172, + "rewards/margins": 15.46471881866455, + "rewards/rejected": -20.363506317138672, + "step": 18050 + }, + { + "epoch": 1.08, + "learning_rate": 4.046196825665638e-06, + "logits/chosen": -2.439988136291504, + "logits/rejected": -1.7797050476074219, + "logps/chosen": -563.3170166015625, + "logps/rejected": -2165.19287109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.924996376037598, + "rewards/margins": 16.28469467163086, + "rewards/rejected": -21.20969009399414, + "step": 18060 + }, + { + "epoch": 1.08, + "learning_rate": 4.044833421987504e-06, + "logits/chosen": -2.4289498329162598, + "logits/rejected": -1.5278041362762451, + "logps/chosen": -575.3001098632812, + "logps/rejected": -2073.67822265625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.02435827255249, + "rewards/margins": 15.267329216003418, + "rewards/rejected": -20.291688919067383, + "step": 18070 + }, + { + "epoch": 1.08, + "learning_rate": 4.04346927462658e-06, + "logits/chosen": -2.4386847019195557, + "logits/rejected": -1.5996605157852173, + "logps/chosen": -595.5032348632812, + "logps/rejected": -2125.562744140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.15428352355957, + "rewards/margins": 15.665273666381836, + "rewards/rejected": -20.81955909729004, + "step": 18080 + }, + { + "epoch": 1.08, + "learning_rate": 4.042104384239568e-06, + "logits/chosen": -2.461756467819214, + "logits/rejected": -1.5936501026153564, + "logps/chosen": -562.0401000976562, + "logps/rejected": -2056.59814453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.897141456604004, + "rewards/margins": 15.235135078430176, + "rewards/rejected": -20.132274627685547, + "step": 18090 + }, + { + "epoch": 1.08, + "learning_rate": 4.040738751483524e-06, + "logits/chosen": -2.423644542694092, + "logits/rejected": -1.7131767272949219, + "logps/chosen": -569.5358276367188, + "logps/rejected": -2057.09912109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0512375831604, + "rewards/margins": 15.077661514282227, + "rewards/rejected": -20.128896713256836, + "step": 18100 + }, + { + "epoch": 1.08, + "learning_rate": 4.039372377015865e-06, + "logits/chosen": -2.434237241744995, + "logits/rejected": -1.7409595251083374, + "logps/chosen": -573.0613403320312, + "logps/rejected": -1997.686767578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.034761428833008, + "rewards/margins": 14.498695373535156, + "rewards/rejected": -19.533456802368164, + "step": 18110 + }, + { + "epoch": 1.08, + "learning_rate": 4.038005261494364e-06, + "logits/chosen": -2.400339126586914, + "logits/rejected": -1.581743597984314, + "logps/chosen": -572.6285400390625, + "logps/rejected": -2004.8111572265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.061880111694336, + "rewards/margins": 14.5370454788208, + "rewards/rejected": -19.598926544189453, + "step": 18120 + }, + { + "epoch": 1.08, + "learning_rate": 4.0366374055771484e-06, + "logits/chosen": -2.4468109607696533, + "logits/rejected": -1.7618697881698608, + "logps/chosen": -583.7780151367188, + "logps/rejected": -1956.931884765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.136684894561768, + "rewards/margins": 13.986923217773438, + "rewards/rejected": -19.123607635498047, + "step": 18130 + }, + { + "epoch": 1.08, + "learning_rate": 4.035268809922705e-06, + "logits/chosen": -2.404245376586914, + "logits/rejected": -1.634216070175171, + "logps/chosen": -575.8792114257812, + "logps/rejected": -2058.56396484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.084730625152588, + "rewards/margins": 15.059530258178711, + "rewards/rejected": -20.14426040649414, + "step": 18140 + }, + { + "epoch": 1.08, + "learning_rate": 4.033899475189877e-06, + "logits/chosen": -2.42814564704895, + "logits/rejected": -1.6194740533828735, + "logps/chosen": -577.35498046875, + "logps/rejected": -2095.970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.088072299957275, + "rewards/margins": 15.425311088562012, + "rewards/rejected": -20.513381958007812, + "step": 18150 + }, + { + "epoch": 1.08, + "learning_rate": 4.032529402037862e-06, + "logits/chosen": -2.4327492713928223, + "logits/rejected": -1.619344711303711, + "logps/chosen": -590.06103515625, + "logps/rejected": -2100.68798828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1913161277771, + "rewards/margins": 15.373067855834961, + "rewards/rejected": -20.56438446044922, + "step": 18160 + }, + { + "epoch": 1.08, + "learning_rate": 4.031158591126212e-06, + "logits/chosen": -2.463031053543091, + "logits/rejected": -1.7927582263946533, + "logps/chosen": -578.0684814453125, + "logps/rejected": -2043.279541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.08013391494751, + "rewards/margins": 14.918081283569336, + "rewards/rejected": -19.99821662902832, + "step": 18170 + }, + { + "epoch": 1.08, + "learning_rate": 4.029787043114835e-06, + "logits/chosen": -2.4246108531951904, + "logits/rejected": -1.6608949899673462, + "logps/chosen": -576.1307983398438, + "logps/rejected": -2145.70947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.998186111450195, + "rewards/margins": 16.003522872924805, + "rewards/rejected": -21.001707077026367, + "step": 18180 + }, + { + "epoch": 1.08, + "learning_rate": 4.028414758663996e-06, + "logits/chosen": -2.4318630695343018, + "logits/rejected": -1.7915138006210327, + "logps/chosen": -565.1385498046875, + "logps/rejected": -2096.222412109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.970643520355225, + "rewards/margins": 15.551648139953613, + "rewards/rejected": -20.522289276123047, + "step": 18190 + }, + { + "epoch": 1.09, + "learning_rate": 4.02704173843431e-06, + "logits/chosen": -2.4548258781433105, + "logits/rejected": -1.7383615970611572, + "logps/chosen": -572.3582763671875, + "logps/rejected": -2070.62744140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.052677154541016, + "rewards/margins": 15.21301555633545, + "rewards/rejected": -20.26569366455078, + "step": 18200 + }, + { + "epoch": 1.09, + "learning_rate": 4.025667983086753e-06, + "logits/chosen": -2.4233503341674805, + "logits/rejected": -1.7207801342010498, + "logps/chosen": -560.6092529296875, + "logps/rejected": -2064.45703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.9128828048706055, + "rewards/margins": 15.276954650878906, + "rewards/rejected": -20.189836502075195, + "step": 18210 + }, + { + "epoch": 1.09, + "learning_rate": 4.024293493282647e-06, + "logits/chosen": -2.4205410480499268, + "logits/rejected": -1.7470486164093018, + "logps/chosen": -577.2360229492188, + "logps/rejected": -2033.3267822265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.071435928344727, + "rewards/margins": 14.813715934753418, + "rewards/rejected": -19.885150909423828, + "step": 18220 + }, + { + "epoch": 1.09, + "learning_rate": 4.022918269683672e-06, + "logits/chosen": -2.4781103134155273, + "logits/rejected": -1.7375249862670898, + "logps/chosen": -575.2891235351562, + "logps/rejected": -2236.759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.077924728393555, + "rewards/margins": 16.844196319580078, + "rewards/rejected": -21.922121047973633, + "step": 18230 + }, + { + "epoch": 1.09, + "learning_rate": 4.021542312951862e-06, + "logits/chosen": -2.4229373931884766, + "logits/rejected": -1.668891191482544, + "logps/chosen": -574.341552734375, + "logps/rejected": -2143.08935546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.093480110168457, + "rewards/margins": 15.897329330444336, + "rewards/rejected": -20.99081039428711, + "step": 18240 + }, + { + "epoch": 1.09, + "learning_rate": 4.0201656237496025e-06, + "logits/chosen": -2.4136390686035156, + "logits/rejected": -1.586894154548645, + "logps/chosen": -591.627685546875, + "logps/rejected": -2092.67041015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.244629859924316, + "rewards/margins": 15.242563247680664, + "rewards/rejected": -20.487192153930664, + "step": 18250 + }, + { + "epoch": 1.09, + "learning_rate": 4.018788202739631e-06, + "logits/chosen": -2.4138712882995605, + "logits/rejected": -1.6801588535308838, + "logps/chosen": -563.4005737304688, + "logps/rejected": -2119.266357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.887686729431152, + "rewards/margins": 15.871030807495117, + "rewards/rejected": -20.758718490600586, + "step": 18260 + }, + { + "epoch": 1.09, + "learning_rate": 4.017410050585038e-06, + "logits/chosen": -2.4683022499084473, + "logits/rejected": -1.6388444900512695, + "logps/chosen": -571.9047241210938, + "logps/rejected": -2097.96484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.993393421173096, + "rewards/margins": 15.549291610717773, + "rewards/rejected": -20.54268455505371, + "step": 18270 + }, + { + "epoch": 1.09, + "learning_rate": 4.016031167949266e-06, + "logits/chosen": -2.418471097946167, + "logits/rejected": -1.7153934240341187, + "logps/chosen": -570.7261962890625, + "logps/rejected": -2079.66015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.050310134887695, + "rewards/margins": 15.307368278503418, + "rewards/rejected": -20.357681274414062, + "step": 18280 + }, + { + "epoch": 1.09, + "learning_rate": 4.0146515554961085e-06, + "logits/chosen": -2.464060068130493, + "logits/rejected": -1.6445667743682861, + "logps/chosen": -592.742431640625, + "logps/rejected": -2130.32568359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.21104621887207, + "rewards/margins": 15.652671813964844, + "rewards/rejected": -20.863718032836914, + "step": 18290 + }, + { + "epoch": 1.09, + "learning_rate": 4.013271213889712e-06, + "logits/chosen": -2.4584240913391113, + "logits/rejected": -1.6072142124176025, + "logps/chosen": -572.1554565429688, + "logps/rejected": -2021.398681640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.012666702270508, + "rewards/margins": 14.76545238494873, + "rewards/rejected": -19.778118133544922, + "step": 18300 + }, + { + "epoch": 1.09, + "learning_rate": 4.011890143794572e-06, + "logits/chosen": -2.3909761905670166, + "logits/rejected": -1.5935088396072388, + "logps/chosen": -574.08740234375, + "logps/rejected": -2128.396728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0253167152404785, + "rewards/margins": 15.819714546203613, + "rewards/rejected": -20.84503173828125, + "step": 18310 + }, + { + "epoch": 1.09, + "learning_rate": 4.0105083458755365e-06, + "logits/chosen": -2.475860595703125, + "logits/rejected": -1.584288477897644, + "logps/chosen": -585.6168212890625, + "logps/rejected": -2116.597900390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.191895961761475, + "rewards/margins": 15.533170700073242, + "rewards/rejected": -20.725067138671875, + "step": 18320 + }, + { + "epoch": 1.09, + "learning_rate": 4.009125820797802e-06, + "logits/chosen": -2.4147820472717285, + "logits/rejected": -1.649003028869629, + "logps/chosen": -591.7201538085938, + "logps/rejected": -2111.204345703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.20508337020874, + "rewards/margins": 15.464157104492188, + "rewards/rejected": -20.669240951538086, + "step": 18330 + }, + { + "epoch": 1.09, + "learning_rate": 4.007742569226918e-06, + "logits/chosen": -2.4479329586029053, + "logits/rejected": -1.5543371438980103, + "logps/chosen": -568.5439453125, + "logps/rejected": -2125.545166015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.948000907897949, + "rewards/margins": 15.869298934936523, + "rewards/rejected": -20.81730079650879, + "step": 18340 + }, + { + "epoch": 1.09, + "learning_rate": 4.00635859182878e-06, + "logits/chosen": -2.4063315391540527, + "logits/rejected": -1.5799083709716797, + "logps/chosen": -572.2252807617188, + "logps/rejected": -2092.376220703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.975944519042969, + "rewards/margins": 15.509515762329102, + "rewards/rejected": -20.48546028137207, + "step": 18350 + }, + { + "epoch": 1.09, + "learning_rate": 4.0049738892696345e-06, + "logits/chosen": -2.412095546722412, + "logits/rejected": -1.5356043577194214, + "logps/chosen": -589.81494140625, + "logps/rejected": -2052.302734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.130119323730469, + "rewards/margins": 14.941217422485352, + "rewards/rejected": -20.071334838867188, + "step": 18360 + }, + { + "epoch": 1.1, + "learning_rate": 4.003588462216077e-06, + "logits/chosen": -2.464381694793701, + "logits/rejected": -1.7117910385131836, + "logps/chosen": -586.8700561523438, + "logps/rejected": -2118.38916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.185194969177246, + "rewards/margins": 15.54792308807373, + "rewards/rejected": -20.73311996459961, + "step": 18370 + }, + { + "epoch": 1.1, + "learning_rate": 4.002202311335054e-06, + "logits/chosen": -2.4191954135894775, + "logits/rejected": -1.5890213251113892, + "logps/chosen": -585.6409912109375, + "logps/rejected": -2119.44873046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.190682888031006, + "rewards/margins": 15.557121276855469, + "rewards/rejected": -20.747806549072266, + "step": 18380 + }, + { + "epoch": 1.1, + "learning_rate": 4.000815437293858e-06, + "logits/chosen": -2.465808153152466, + "logits/rejected": -1.6405709981918335, + "logps/chosen": -591.2027587890625, + "logps/rejected": -1970.989013671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.239993095397949, + "rewards/margins": 14.022939682006836, + "rewards/rejected": -19.2629337310791, + "step": 18390 + }, + { + "epoch": 1.1, + "learning_rate": 3.999427840760129e-06, + "logits/chosen": -2.4105422496795654, + "logits/rejected": -1.6706573963165283, + "logps/chosen": -623.7108154296875, + "logps/rejected": -2102.048583984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.485660552978516, + "rewards/margins": 15.09272289276123, + "rewards/rejected": -20.57838249206543, + "step": 18400 + }, + { + "epoch": 1.1, + "learning_rate": 3.998039522401857e-06, + "logits/chosen": -2.434446096420288, + "logits/rejected": -1.7005269527435303, + "logps/chosen": -576.99267578125, + "logps/rejected": -2070.35693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.130105018615723, + "rewards/margins": 15.124641418457031, + "rewards/rejected": -20.254745483398438, + "step": 18410 + }, + { + "epoch": 1.1, + "learning_rate": 3.996650482887377e-06, + "logits/chosen": -2.403810977935791, + "logits/rejected": -1.6445831060409546, + "logps/chosen": -590.7000732421875, + "logps/rejected": -2155.33935546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.18209171295166, + "rewards/margins": 15.930499076843262, + "rewards/rejected": -21.112590789794922, + "step": 18420 + }, + { + "epoch": 1.1, + "learning_rate": 3.995260722885374e-06, + "logits/chosen": -2.4557278156280518, + "logits/rejected": -1.640233039855957, + "logps/chosen": -597.3026733398438, + "logps/rejected": -2141.43505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.278873920440674, + "rewards/margins": 15.692459106445312, + "rewards/rejected": -20.971332550048828, + "step": 18430 + }, + { + "epoch": 1.1, + "learning_rate": 3.993870243064879e-06, + "logits/chosen": -2.423123598098755, + "logits/rejected": -1.7185027599334717, + "logps/chosen": -582.2466430664062, + "logps/rejected": -2098.70458984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.135513782501221, + "rewards/margins": 15.407316207885742, + "rewards/rejected": -20.542831420898438, + "step": 18440 + }, + { + "epoch": 1.1, + "learning_rate": 3.992479044095267e-06, + "logits/chosen": -2.395264148712158, + "logits/rejected": -1.3168280124664307, + "logps/chosen": -580.3234252929688, + "logps/rejected": -2064.65087890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.094180107116699, + "rewards/margins": 15.102246284484863, + "rewards/rejected": -20.19642448425293, + "step": 18450 + }, + { + "epoch": 1.1, + "learning_rate": 3.991087126646262e-06, + "logits/chosen": -2.4225120544433594, + "logits/rejected": -1.6663070917129517, + "logps/chosen": -577.60546875, + "logps/rejected": -2063.363525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.085602760314941, + "rewards/margins": 15.106854438781738, + "rewards/rejected": -20.192455291748047, + "step": 18460 + }, + { + "epoch": 1.1, + "learning_rate": 3.989694491387934e-06, + "logits/chosen": -2.4331905841827393, + "logits/rejected": -1.6541917324066162, + "logps/chosen": -584.953125, + "logps/rejected": -2174.080078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.105698585510254, + "rewards/margins": 16.189939498901367, + "rewards/rejected": -21.295639038085938, + "step": 18470 + }, + { + "epoch": 1.1, + "learning_rate": 3.988301138990697e-06, + "logits/chosen": -2.439736843109131, + "logits/rejected": -1.763725996017456, + "logps/chosen": -580.5911865234375, + "logps/rejected": -2106.355224609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.115292072296143, + "rewards/margins": 15.506525039672852, + "rewards/rejected": -20.621816635131836, + "step": 18480 + }, + { + "epoch": 1.1, + "learning_rate": 3.98690707012531e-06, + "logits/chosen": -2.457688808441162, + "logits/rejected": -1.6716125011444092, + "logps/chosen": -593.5896606445312, + "logps/rejected": -2095.024658203125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.294414043426514, + "rewards/margins": 15.212228775024414, + "rewards/rejected": -20.506643295288086, + "step": 18490 + }, + { + "epoch": 1.1, + "learning_rate": 3.9855122854628796e-06, + "logits/chosen": -2.428159475326538, + "logits/rejected": -1.7865359783172607, + "logps/chosen": -591.8485717773438, + "logps/rejected": -2143.146484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.165593147277832, + "rewards/margins": 15.821266174316406, + "rewards/rejected": -20.986858367919922, + "step": 18500 + }, + { + "epoch": 1.1, + "learning_rate": 3.984116785674852e-06, + "logits/chosen": -2.400676727294922, + "logits/rejected": -1.593571662902832, + "logps/chosen": -590.8531494140625, + "logps/rejected": -2173.65087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.158577919006348, + "rewards/margins": 16.131921768188477, + "rewards/rejected": -21.290496826171875, + "step": 18510 + }, + { + "epoch": 1.1, + "learning_rate": 3.982720571433024e-06, + "logits/chosen": -2.4227564334869385, + "logits/rejected": -1.610022783279419, + "logps/chosen": -576.7730712890625, + "logps/rejected": -2210.098388671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.046576499938965, + "rewards/margins": 16.621313095092773, + "rewards/rejected": -21.667888641357422, + "step": 18520 + }, + { + "epoch": 1.1, + "learning_rate": 3.981323643409532e-06, + "logits/chosen": -2.4051318168640137, + "logits/rejected": -1.5803598165512085, + "logps/chosen": -578.0377807617188, + "logps/rejected": -2217.57080078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.03226375579834, + "rewards/margins": 16.688276290893555, + "rewards/rejected": -21.720542907714844, + "step": 18530 + }, + { + "epoch": 1.11, + "learning_rate": 3.979926002276856e-06, + "logits/chosen": -2.45286226272583, + "logits/rejected": -1.78036630153656, + "logps/chosen": -565.7950439453125, + "logps/rejected": -2043.772705078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.984052658081055, + "rewards/margins": 15.017298698425293, + "rewards/rejected": -20.001354217529297, + "step": 18540 + }, + { + "epoch": 1.11, + "learning_rate": 3.978527648707823e-06, + "logits/chosen": -2.4044864177703857, + "logits/rejected": -1.5653653144836426, + "logps/chosen": -582.3046875, + "logps/rejected": -2123.6064453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.073502540588379, + "rewards/margins": 15.72291374206543, + "rewards/rejected": -20.796417236328125, + "step": 18550 + }, + { + "epoch": 1.11, + "learning_rate": 3.977128583375597e-06, + "logits/chosen": -2.4024529457092285, + "logits/rejected": -1.6322603225708008, + "logps/chosen": -578.3257446289062, + "logps/rejected": -2054.75048828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.081921577453613, + "rewards/margins": 15.031837463378906, + "rewards/rejected": -20.113759994506836, + "step": 18560 + }, + { + "epoch": 1.11, + "learning_rate": 3.97572880695369e-06, + "logits/chosen": -2.5162785053253174, + "logits/rejected": -1.7088098526000977, + "logps/chosen": -576.5661010742188, + "logps/rejected": -2068.79296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.04717493057251, + "rewards/margins": 15.203173637390137, + "rewards/rejected": -20.250349044799805, + "step": 18570 + }, + { + "epoch": 1.11, + "learning_rate": 3.974328320115955e-06, + "logits/chosen": -2.40729022026062, + "logits/rejected": -1.7766488790512085, + "logps/chosen": -587.5242309570312, + "logps/rejected": -1974.434814453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.223254203796387, + "rewards/margins": 14.075922012329102, + "rewards/rejected": -19.299177169799805, + "step": 18580 + }, + { + "epoch": 1.11, + "learning_rate": 3.972927123536585e-06, + "logits/chosen": -2.4628021717071533, + "logits/rejected": -1.70864737033844, + "logps/chosen": -579.6982421875, + "logps/rejected": -2060.56201171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.101744174957275, + "rewards/margins": 15.061245918273926, + "rewards/rejected": -20.16299057006836, + "step": 18590 + }, + { + "epoch": 1.11, + "learning_rate": 3.971525217890117e-06, + "logits/chosen": -2.423273801803589, + "logits/rejected": -1.690574049949646, + "logps/chosen": -571.34765625, + "logps/rejected": -2000.7408447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.986751556396484, + "rewards/margins": 14.565298080444336, + "rewards/rejected": -19.552051544189453, + "step": 18600 + }, + { + "epoch": 1.11, + "learning_rate": 3.970122603851427e-06, + "logits/chosen": -2.462780475616455, + "logits/rejected": -1.649350881576538, + "logps/chosen": -585.4249877929688, + "logps/rejected": -2191.431640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.158599376678467, + "rewards/margins": 16.30327033996582, + "rewards/rejected": -21.461872100830078, + "step": 18610 + }, + { + "epoch": 1.11, + "learning_rate": 3.968719282095735e-06, + "logits/chosen": -2.473378896713257, + "logits/rejected": -1.777745008468628, + "logps/chosen": -581.1444091796875, + "logps/rejected": -2015.3248291015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.090381622314453, + "rewards/margins": 14.620091438293457, + "rewards/rejected": -19.710472106933594, + "step": 18620 + }, + { + "epoch": 1.11, + "learning_rate": 3.967315253298599e-06, + "logits/chosen": -2.4520063400268555, + "logits/rejected": -1.5474574565887451, + "logps/chosen": -577.2261352539062, + "logps/rejected": -2186.58544921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.078354358673096, + "rewards/margins": 16.338790893554688, + "rewards/rejected": -21.417142868041992, + "step": 18630 + }, + { + "epoch": 1.11, + "learning_rate": 3.965910518135919e-06, + "logits/chosen": -2.4735147953033447, + "logits/rejected": -1.75032639503479, + "logps/chosen": -571.6150512695312, + "logps/rejected": -2049.250732421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0258049964904785, + "rewards/margins": 15.034421920776367, + "rewards/rejected": -20.060226440429688, + "step": 18640 + }, + { + "epoch": 1.11, + "learning_rate": 3.9645050772839335e-06, + "logits/chosen": -2.4404940605163574, + "logits/rejected": -1.5375405550003052, + "logps/chosen": -601.4315185546875, + "logps/rejected": -2158.363037109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.248210906982422, + "rewards/margins": 15.8966646194458, + "rewards/rejected": -21.144874572753906, + "step": 18650 + }, + { + "epoch": 1.11, + "learning_rate": 3.963098931419223e-06, + "logits/chosen": -2.438211441040039, + "logits/rejected": -1.67769455909729, + "logps/chosen": -582.6536865234375, + "logps/rejected": -2127.242919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.155867576599121, + "rewards/margins": 15.684659957885742, + "rewards/rejected": -20.840526580810547, + "step": 18660 + }, + { + "epoch": 1.11, + "learning_rate": 3.961692081218706e-06, + "logits/chosen": -2.406198501586914, + "logits/rejected": -1.5734789371490479, + "logps/chosen": -594.7872314453125, + "logps/rejected": -2089.34228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.275328159332275, + "rewards/margins": 15.170985221862793, + "rewards/rejected": -20.446313858032227, + "step": 18670 + }, + { + "epoch": 1.11, + "learning_rate": 3.9602845273596395e-06, + "logits/chosen": -2.431891918182373, + "logits/rejected": -1.6575632095336914, + "logps/chosen": -589.1453857421875, + "logps/rejected": -2112.016357421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.218883514404297, + "rewards/margins": 15.459136962890625, + "rewards/rejected": -20.678020477294922, + "step": 18680 + }, + { + "epoch": 1.11, + "learning_rate": 3.958876270519619e-06, + "logits/chosen": -2.377873420715332, + "logits/rejected": -1.61625075340271, + "logps/chosen": -574.5419311523438, + "logps/rejected": -2149.80859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.032929420471191, + "rewards/margins": 16.02180290222168, + "rewards/rejected": -21.05473518371582, + "step": 18690 + }, + { + "epoch": 1.12, + "learning_rate": 3.957467311376582e-06, + "logits/chosen": -2.4256389141082764, + "logits/rejected": -1.6294775009155273, + "logps/chosen": -592.460205078125, + "logps/rejected": -2052.91259765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.270570278167725, + "rewards/margins": 14.820477485656738, + "rewards/rejected": -20.091049194335938, + "step": 18700 + }, + { + "epoch": 1.12, + "learning_rate": 3.9560576506088e-06, + "logits/chosen": -2.448901414871216, + "logits/rejected": -1.760441541671753, + "logps/chosen": -573.1102294921875, + "logps/rejected": -2039.9058837890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.979536533355713, + "rewards/margins": 14.981622695922852, + "rewards/rejected": -19.96116065979004, + "step": 18710 + }, + { + "epoch": 1.12, + "learning_rate": 3.9546472888948825e-06, + "logits/chosen": -2.4136698246002197, + "logits/rejected": -1.521928310394287, + "logps/chosen": -575.8858032226562, + "logps/rejected": -2040.4007568359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.106022834777832, + "rewards/margins": 14.859670639038086, + "rewards/rejected": -19.965694427490234, + "step": 18720 + }, + { + "epoch": 1.12, + "learning_rate": 3.95323622691378e-06, + "logits/chosen": -2.42920184135437, + "logits/rejected": -1.612730622291565, + "logps/chosen": -589.2239379882812, + "logps/rejected": -1978.653564453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.161189079284668, + "rewards/margins": 14.200078010559082, + "rewards/rejected": -19.361265182495117, + "step": 18730 + }, + { + "epoch": 1.12, + "learning_rate": 3.951824465344775e-06, + "logits/chosen": -2.4377827644348145, + "logits/rejected": -1.729501485824585, + "logps/chosen": -565.1973876953125, + "logps/rejected": -2181.59375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.977604389190674, + "rewards/margins": 16.4086971282959, + "rewards/rejected": -21.386301040649414, + "step": 18740 + }, + { + "epoch": 1.12, + "learning_rate": 3.950412004867491e-06, + "logits/chosen": -2.4053878784179688, + "logits/rejected": -1.4727222919464111, + "logps/chosen": -587.2947387695312, + "logps/rejected": -2066.32080078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.127694129943848, + "rewards/margins": 15.096753120422363, + "rewards/rejected": -20.224445343017578, + "step": 18750 + }, + { + "epoch": 1.12, + "learning_rate": 3.948998846161887e-06, + "logits/chosen": -2.44976806640625, + "logits/rejected": -1.6216999292373657, + "logps/chosen": -590.5778198242188, + "logps/rejected": -2218.245849609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.092316627502441, + "rewards/margins": 16.639482498168945, + "rewards/rejected": -21.731800079345703, + "step": 18760 + }, + { + "epoch": 1.12, + "learning_rate": 3.947584989908256e-06, + "logits/chosen": -2.4116270542144775, + "logits/rejected": -1.6825393438339233, + "logps/chosen": -583.7651977539062, + "logps/rejected": -2166.87060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.084784507751465, + "rewards/margins": 16.144289016723633, + "rewards/rejected": -21.229074478149414, + "step": 18770 + }, + { + "epoch": 1.12, + "learning_rate": 3.94617043678723e-06, + "logits/chosen": -2.4326183795928955, + "logits/rejected": -1.6881961822509766, + "logps/chosen": -571.5545654296875, + "logps/rejected": -2062.08544921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.986852645874023, + "rewards/margins": 15.193838119506836, + "rewards/rejected": -20.18069076538086, + "step": 18780 + }, + { + "epoch": 1.12, + "learning_rate": 3.944755187479774e-06, + "logits/chosen": -2.3741493225097656, + "logits/rejected": -1.5365625619888306, + "logps/chosen": -588.3641967773438, + "logps/rejected": -2107.576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.192361831665039, + "rewards/margins": 15.432011604309082, + "rewards/rejected": -20.624370574951172, + "step": 18790 + }, + { + "epoch": 1.12, + "learning_rate": 3.94333924266719e-06, + "logits/chosen": -2.418208599090576, + "logits/rejected": -1.5694580078125, + "logps/chosen": -592.94482421875, + "logps/rejected": -2228.24560546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1974592208862305, + "rewards/margins": 16.632471084594727, + "rewards/rejected": -21.829931259155273, + "step": 18800 + }, + { + "epoch": 1.12, + "learning_rate": 3.941922603031113e-06, + "logits/chosen": -2.47251558303833, + "logits/rejected": -1.781071662902832, + "logps/chosen": -581.3961791992188, + "logps/rejected": -2081.569091796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.147972106933594, + "rewards/margins": 15.221562385559082, + "rewards/rejected": -20.36953353881836, + "step": 18810 + }, + { + "epoch": 1.12, + "learning_rate": 3.940505269253512e-06, + "logits/chosen": -2.43013858795166, + "logits/rejected": -1.6017624139785767, + "logps/chosen": -585.4404296875, + "logps/rejected": -2109.77880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0790300369262695, + "rewards/margins": 15.569494247436523, + "rewards/rejected": -20.64852523803711, + "step": 18820 + }, + { + "epoch": 1.12, + "learning_rate": 3.9390872420166935e-06, + "logits/chosen": -2.434863328933716, + "logits/rejected": -1.7917795181274414, + "logps/chosen": -568.0145263671875, + "logps/rejected": -2000.833251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.994668483734131, + "rewards/margins": 14.582235336303711, + "rewards/rejected": -19.576902389526367, + "step": 18830 + }, + { + "epoch": 1.12, + "learning_rate": 3.937668522003295e-06, + "logits/chosen": -2.4125287532806396, + "logits/rejected": -1.5828489065170288, + "logps/chosen": -599.6592407226562, + "logps/rejected": -2020.774169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2493696212768555, + "rewards/margins": 14.51384449005127, + "rewards/rejected": -19.763214111328125, + "step": 18840 + }, + { + "epoch": 1.12, + "learning_rate": 3.936249109896288e-06, + "logits/chosen": -2.406860589981079, + "logits/rejected": -1.5879347324371338, + "logps/chosen": -600.56591796875, + "logps/rejected": -2141.70947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3231329917907715, + "rewards/margins": 15.65721607208252, + "rewards/rejected": -20.980350494384766, + "step": 18850 + }, + { + "epoch": 1.12, + "learning_rate": 3.934829006378978e-06, + "logits/chosen": -2.4400432109832764, + "logits/rejected": -1.6759154796600342, + "logps/chosen": -596.8572998046875, + "logps/rejected": -2105.65771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.258796691894531, + "rewards/margins": 15.354408264160156, + "rewards/rejected": -20.613203048706055, + "step": 18860 + }, + { + "epoch": 1.13, + "learning_rate": 3.933408212135003e-06, + "logits/chosen": -2.402916431427002, + "logits/rejected": -1.6431405544281006, + "logps/chosen": -587.33203125, + "logps/rejected": -2115.8408203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.133054256439209, + "rewards/margins": 15.576817512512207, + "rewards/rejected": -20.70987319946289, + "step": 18870 + }, + { + "epoch": 1.13, + "learning_rate": 3.931986727848333e-06, + "logits/chosen": -2.4330458641052246, + "logits/rejected": -1.7147125005722046, + "logps/chosen": -576.5330200195312, + "logps/rejected": -1894.7047119140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.09561014175415, + "rewards/margins": 13.43321418762207, + "rewards/rejected": -18.528823852539062, + "step": 18880 + }, + { + "epoch": 1.13, + "learning_rate": 3.930564554203269e-06, + "logits/chosen": -2.463778018951416, + "logits/rejected": -1.658193826675415, + "logps/chosen": -581.7781372070312, + "logps/rejected": -2077.526123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.033658981323242, + "rewards/margins": 15.29339599609375, + "rewards/rejected": -20.32705307006836, + "step": 18890 + }, + { + "epoch": 1.13, + "learning_rate": 3.929141691884448e-06, + "logits/chosen": -2.425546646118164, + "logits/rejected": -1.6244443655014038, + "logps/chosen": -588.731689453125, + "logps/rejected": -2111.0546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1908063888549805, + "rewards/margins": 15.485649108886719, + "rewards/rejected": -20.676456451416016, + "step": 18900 + }, + { + "epoch": 1.13, + "learning_rate": 3.927718141576835e-06, + "logits/chosen": -2.4536585807800293, + "logits/rejected": -1.5219862461090088, + "logps/chosen": -580.9444580078125, + "logps/rejected": -2092.42529296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.127568244934082, + "rewards/margins": 15.361177444458008, + "rewards/rejected": -20.488744735717773, + "step": 18910 + }, + { + "epoch": 1.13, + "learning_rate": 3.926293903965726e-06, + "logits/chosen": -2.4751644134521484, + "logits/rejected": -1.7000887393951416, + "logps/chosen": -589.4120483398438, + "logps/rejected": -2060.10595703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.231362342834473, + "rewards/margins": 14.927868843078613, + "rewards/rejected": -20.159229278564453, + "step": 18920 + }, + { + "epoch": 1.13, + "learning_rate": 3.9248689797367515e-06, + "logits/chosen": -2.4613261222839355, + "logits/rejected": -1.6467431783676147, + "logps/chosen": -573.01806640625, + "logps/rejected": -2157.39892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.038286209106445, + "rewards/margins": 16.096567153930664, + "rewards/rejected": -21.13485336303711, + "step": 18930 + }, + { + "epoch": 1.13, + "learning_rate": 3.923443369575867e-06, + "logits/chosen": -2.460026741027832, + "logits/rejected": -1.7462852001190186, + "logps/chosen": -594.6763305664062, + "logps/rejected": -2164.188720703125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.296042442321777, + "rewards/margins": 15.895207405090332, + "rewards/rejected": -21.191251754760742, + "step": 18940 + }, + { + "epoch": 1.13, + "learning_rate": 3.922017074169362e-06, + "logits/chosen": -2.438420057296753, + "logits/rejected": -1.7412683963775635, + "logps/chosen": -590.4034423828125, + "logps/rejected": -2050.882568359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.192535877227783, + "rewards/margins": 14.876579284667969, + "rewards/rejected": -20.06911277770996, + "step": 18950 + }, + { + "epoch": 1.13, + "learning_rate": 3.920590094203856e-06, + "logits/chosen": -2.4192967414855957, + "logits/rejected": -1.612460732460022, + "logps/chosen": -611.8150634765625, + "logps/rejected": -2017.2027587890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.390728950500488, + "rewards/margins": 14.345804214477539, + "rewards/rejected": -19.73653221130371, + "step": 18960 + }, + { + "epoch": 1.13, + "learning_rate": 3.919162430366298e-06, + "logits/chosen": -2.4383883476257324, + "logits/rejected": -1.751230001449585, + "logps/chosen": -612.71630859375, + "logps/rejected": -2110.81005859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.437900066375732, + "rewards/margins": 15.229235649108887, + "rewards/rejected": -20.66713523864746, + "step": 18970 + }, + { + "epoch": 1.13, + "learning_rate": 3.917734083343963e-06, + "logits/chosen": -2.4496424198150635, + "logits/rejected": -1.783003807067871, + "logps/chosen": -599.7667846679688, + "logps/rejected": -2171.818603515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2839250564575195, + "rewards/margins": 15.9990234375, + "rewards/rejected": -21.282947540283203, + "step": 18980 + }, + { + "epoch": 1.13, + "learning_rate": 3.916305053824458e-06, + "logits/chosen": -2.4426653385162354, + "logits/rejected": -1.578319787979126, + "logps/chosen": -606.4851684570312, + "logps/rejected": -2078.822021484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.384678363800049, + "rewards/margins": 14.971735000610352, + "rewards/rejected": -20.356412887573242, + "step": 18990 + }, + { + "epoch": 1.13, + "learning_rate": 3.914875342495719e-06, + "logits/chosen": -2.4250175952911377, + "logits/rejected": -1.5958974361419678, + "logps/chosen": -612.9554443359375, + "logps/rejected": -2085.518798828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.453532695770264, + "rewards/margins": 14.965519905090332, + "rewards/rejected": -20.41905403137207, + "step": 19000 + }, + { + "epoch": 1.13, + "learning_rate": 3.913444950046008e-06, + "logits/chosen": -2.4320201873779297, + "logits/rejected": -1.6604028940200806, + "logps/chosen": -640.2800903320312, + "logps/rejected": -2152.78955078125, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.71614408493042, + "rewards/margins": 15.363919258117676, + "rewards/rejected": -21.080060958862305, + "step": 19010 + }, + { + "epoch": 1.13, + "learning_rate": 3.912013877163916e-06, + "logits/chosen": -2.4315667152404785, + "logits/rejected": -1.5563900470733643, + "logps/chosen": -738.6211547851562, + "logps/rejected": -2324.96044921875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.711573123931885, + "rewards/margins": 16.072275161743164, + "rewards/rejected": -22.78384780883789, + "step": 19020 + }, + { + "epoch": 1.13, + "learning_rate": 3.910582124538362e-06, + "logits/chosen": -2.4067509174346924, + "logits/rejected": -1.494365930557251, + "logps/chosen": -708.9762573242188, + "logps/rejected": -2117.27880859375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.331134796142578, + "rewards/margins": 14.391212463378906, + "rewards/rejected": -20.722347259521484, + "step": 19030 + }, + { + "epoch": 1.14, + "learning_rate": 3.9091496928585905e-06, + "logits/chosen": -2.4253509044647217, + "logits/rejected": -1.4519166946411133, + "logps/chosen": -693.6114501953125, + "logps/rejected": -2197.43408203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.216944217681885, + "rewards/margins": 15.3165922164917, + "rewards/rejected": -21.533536911010742, + "step": 19040 + }, + { + "epoch": 1.14, + "learning_rate": 3.907716582814175e-06, + "logits/chosen": -2.3511993885040283, + "logits/rejected": -1.4239299297332764, + "logps/chosen": -684.6517333984375, + "logps/rejected": -2094.09521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1122965812683105, + "rewards/margins": 14.395807266235352, + "rewards/rejected": -20.508102416992188, + "step": 19050 + }, + { + "epoch": 1.14, + "learning_rate": 3.906282795095015e-06, + "logits/chosen": -2.3663926124572754, + "logits/rejected": -1.547985315322876, + "logps/chosen": -679.8900756835938, + "logps/rejected": -1971.953857421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.080678462982178, + "rewards/margins": 13.204913139343262, + "rewards/rejected": -19.28559112548828, + "step": 19060 + }, + { + "epoch": 1.14, + "learning_rate": 3.904848330391335e-06, + "logits/chosen": -2.4014837741851807, + "logits/rejected": -1.526257038116455, + "logps/chosen": -648.63427734375, + "logps/rejected": -2164.51318359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7840752601623535, + "rewards/margins": 15.398852348327637, + "rewards/rejected": -21.182926177978516, + "step": 19070 + }, + { + "epoch": 1.14, + "learning_rate": 3.903413189393687e-06, + "logits/chosen": -2.40606689453125, + "logits/rejected": -1.504984736442566, + "logps/chosen": -646.3700561523438, + "logps/rejected": -2217.57763671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.737691879272461, + "rewards/margins": 15.996622085571289, + "rewards/rejected": -21.73431396484375, + "step": 19080 + }, + { + "epoch": 1.14, + "learning_rate": 3.901977372792946e-06, + "logits/chosen": -2.3846077919006348, + "logits/rejected": -1.4350707530975342, + "logps/chosen": -657.8864135742188, + "logps/rejected": -2128.233154296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.900952339172363, + "rewards/margins": 14.946009635925293, + "rewards/rejected": -20.846960067749023, + "step": 19090 + }, + { + "epoch": 1.14, + "learning_rate": 3.900540881280317e-06, + "logits/chosen": -2.438363552093506, + "logits/rejected": -1.625722885131836, + "logps/chosen": -639.6251831054688, + "logps/rejected": -2263.510498046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.714897632598877, + "rewards/margins": 16.484432220458984, + "rewards/rejected": -22.199329376220703, + "step": 19100 + }, + { + "epoch": 1.14, + "learning_rate": 3.899103715547325e-06, + "logits/chosen": -2.4112725257873535, + "logits/rejected": -1.5246140956878662, + "logps/chosen": -672.3448486328125, + "logps/rejected": -2198.49462890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.034895420074463, + "rewards/margins": 15.500343322753906, + "rewards/rejected": -21.53523826599121, + "step": 19110 + }, + { + "epoch": 1.14, + "learning_rate": 3.897665876285823e-06, + "logits/chosen": -2.4422569274902344, + "logits/rejected": -1.609088659286499, + "logps/chosen": -664.594970703125, + "logps/rejected": -2261.351806640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.946292400360107, + "rewards/margins": 16.211267471313477, + "rewards/rejected": -22.15755844116211, + "step": 19120 + }, + { + "epoch": 1.14, + "learning_rate": 3.896227364187985e-06, + "logits/chosen": -2.374281406402588, + "logits/rejected": -1.2726308107376099, + "logps/chosen": -655.2998046875, + "logps/rejected": -2075.43017578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.827486991882324, + "rewards/margins": 14.486024856567383, + "rewards/rejected": -20.31351089477539, + "step": 19130 + }, + { + "epoch": 1.14, + "learning_rate": 3.894788179946313e-06, + "logits/chosen": -2.3776257038116455, + "logits/rejected": -1.4452688694000244, + "logps/chosen": -643.957275390625, + "logps/rejected": -2193.710693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.695496559143066, + "rewards/margins": 15.794363021850586, + "rewards/rejected": -21.489858627319336, + "step": 19140 + }, + { + "epoch": 1.14, + "learning_rate": 3.893348324253628e-06, + "logits/chosen": -2.324923038482666, + "logits/rejected": -1.4607222080230713, + "logps/chosen": -665.5977783203125, + "logps/rejected": -2046.682373046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.953238487243652, + "rewards/margins": 14.085844039916992, + "rewards/rejected": -20.03908348083496, + "step": 19150 + }, + { + "epoch": 1.14, + "learning_rate": 3.891907797803077e-06, + "logits/chosen": -2.390087604522705, + "logits/rejected": -1.572028398513794, + "logps/chosen": -694.201171875, + "logps/rejected": -2201.85986328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.244933605194092, + "rewards/margins": 15.308710098266602, + "rewards/rejected": -21.553646087646484, + "step": 19160 + }, + { + "epoch": 1.14, + "learning_rate": 3.890466601288131e-06, + "logits/chosen": -2.4312405586242676, + "logits/rejected": -1.529668927192688, + "logps/chosen": -644.5238037109375, + "logps/rejected": -2179.727294921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.701764106750488, + "rewards/margins": 15.659899711608887, + "rewards/rejected": -21.361663818359375, + "step": 19170 + }, + { + "epoch": 1.14, + "learning_rate": 3.88902473540258e-06, + "logits/chosen": -2.4329147338867188, + "logits/rejected": -1.6470997333526611, + "logps/chosen": -658.8306884765625, + "logps/rejected": -2235.673828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.887040138244629, + "rewards/margins": 16.029508590698242, + "rewards/rejected": -21.916547775268555, + "step": 19180 + }, + { + "epoch": 1.14, + "learning_rate": 3.887582200840539e-06, + "logits/chosen": -2.4618046283721924, + "logits/rejected": -1.4584816694259644, + "logps/chosen": -638.4754028320312, + "logps/rejected": -2274.69189453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.638727188110352, + "rewards/margins": 16.662248611450195, + "rewards/rejected": -22.300975799560547, + "step": 19190 + }, + { + "epoch": 1.14, + "learning_rate": 3.886138998296446e-06, + "logits/chosen": -2.4454612731933594, + "logits/rejected": -1.6296141147613525, + "logps/chosen": -631.6937866210938, + "logps/rejected": -2212.90966796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.612523078918457, + "rewards/margins": 16.053213119506836, + "rewards/rejected": -21.66573715209961, + "step": 19200 + }, + { + "epoch": 1.15, + "learning_rate": 3.8846951284650545e-06, + "logits/chosen": -2.4372353553771973, + "logits/rejected": -1.6444288492202759, + "logps/chosen": -650.3663940429688, + "logps/rejected": -2110.470947265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.784151077270508, + "rewards/margins": 14.881584167480469, + "rewards/rejected": -20.665735244750977, + "step": 19210 + }, + { + "epoch": 1.15, + "learning_rate": 3.883250592041447e-06, + "logits/chosen": -2.486525058746338, + "logits/rejected": -1.655632734298706, + "logps/chosen": -647.8833618164062, + "logps/rejected": -2222.6318359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.737036228179932, + "rewards/margins": 16.033292770385742, + "rewards/rejected": -21.770328521728516, + "step": 19220 + }, + { + "epoch": 1.15, + "learning_rate": 3.881805389721021e-06, + "logits/chosen": -2.384709596633911, + "logits/rejected": -1.623321771621704, + "logps/chosen": -645.1917724609375, + "logps/rejected": -2177.830322265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.731366157531738, + "rewards/margins": 15.603930473327637, + "rewards/rejected": -21.335296630859375, + "step": 19230 + }, + { + "epoch": 1.15, + "learning_rate": 3.8803595221995005e-06, + "logits/chosen": -2.418848752975464, + "logits/rejected": -1.6666618585586548, + "logps/chosen": -626.2412719726562, + "logps/rejected": -2212.12744140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.575583457946777, + "rewards/margins": 16.101163864135742, + "rewards/rejected": -21.676748275756836, + "step": 19240 + }, + { + "epoch": 1.15, + "learning_rate": 3.878912990172922e-06, + "logits/chosen": -2.4608511924743652, + "logits/rejected": -1.5922057628631592, + "logps/chosen": -650.2113647460938, + "logps/rejected": -2160.10498046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.838407039642334, + "rewards/margins": 15.32129955291748, + "rewards/rejected": -21.159709930419922, + "step": 19250 + }, + { + "epoch": 1.15, + "learning_rate": 3.877465794337648e-06, + "logits/chosen": -2.4093728065490723, + "logits/rejected": -1.7604045867919922, + "logps/chosen": -615.9984130859375, + "logps/rejected": -2147.171142578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.458701133728027, + "rewards/margins": 15.577181816101074, + "rewards/rejected": -21.0358829498291, + "step": 19260 + }, + { + "epoch": 1.15, + "learning_rate": 3.876017935390357e-06, + "logits/chosen": -2.4236056804656982, + "logits/rejected": -1.5977721214294434, + "logps/chosen": -628.7794189453125, + "logps/rejected": -2270.100830078125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.564779758453369, + "rewards/margins": 16.704166412353516, + "rewards/rejected": -22.26894760131836, + "step": 19270 + }, + { + "epoch": 1.15, + "learning_rate": 3.874569414028051e-06, + "logits/chosen": -2.4561169147491455, + "logits/rejected": -1.6084585189819336, + "logps/chosen": -623.3856201171875, + "logps/rejected": -2121.858642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.475296497344971, + "rewards/margins": 15.298396110534668, + "rewards/rejected": -20.773693084716797, + "step": 19280 + }, + { + "epoch": 1.15, + "learning_rate": 3.873120230948045e-06, + "logits/chosen": -2.444462299346924, + "logits/rejected": -1.7487976551055908, + "logps/chosen": -632.6966552734375, + "logps/rejected": -2109.161865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.594335556030273, + "rewards/margins": 15.046765327453613, + "rewards/rejected": -20.641101837158203, + "step": 19290 + }, + { + "epoch": 1.15, + "learning_rate": 3.871670386847979e-06, + "logits/chosen": -2.397474765777588, + "logits/rejected": -1.5530986785888672, + "logps/chosen": -630.4021606445312, + "logps/rejected": -2202.519287109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.622138977050781, + "rewards/margins": 15.957435607910156, + "rewards/rejected": -21.57957649230957, + "step": 19300 + }, + { + "epoch": 1.15, + "learning_rate": 3.870219882425805e-06, + "logits/chosen": -2.3863298892974854, + "logits/rejected": -1.7190964221954346, + "logps/chosen": -625.0134887695312, + "logps/rejected": -2079.658447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5410261154174805, + "rewards/margins": 14.815251350402832, + "rewards/rejected": -20.35627555847168, + "step": 19310 + }, + { + "epoch": 1.15, + "learning_rate": 3.868768718379798e-06, + "logits/chosen": -2.4297068119049072, + "logits/rejected": -1.6571534872055054, + "logps/chosen": -638.139404296875, + "logps/rejected": -2141.787841796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.699477195739746, + "rewards/margins": 15.281217575073242, + "rewards/rejected": -20.980697631835938, + "step": 19320 + }, + { + "epoch": 1.15, + "learning_rate": 3.867316895408548e-06, + "logits/chosen": -2.46977162361145, + "logits/rejected": -1.6708837747573853, + "logps/chosen": -640.0345458984375, + "logps/rejected": -2071.86083984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.618253707885742, + "rewards/margins": 14.659602165222168, + "rewards/rejected": -20.27785873413086, + "step": 19330 + }, + { + "epoch": 1.15, + "learning_rate": 3.86586441421096e-06, + "logits/chosen": -2.4563231468200684, + "logits/rejected": -1.712656021118164, + "logps/chosen": -644.7281494140625, + "logps/rejected": -2106.98388671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.744048118591309, + "rewards/margins": 14.865092277526855, + "rewards/rejected": -20.609140396118164, + "step": 19340 + }, + { + "epoch": 1.15, + "learning_rate": 3.8644112754862614e-06, + "logits/chosen": -2.4537181854248047, + "logits/rejected": -1.5892608165740967, + "logps/chosen": -633.906005859375, + "logps/rejected": -2036.0816650390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.64388370513916, + "rewards/margins": 14.273458480834961, + "rewards/rejected": -19.91734504699707, + "step": 19350 + }, + { + "epoch": 1.15, + "learning_rate": 3.862957479933993e-06, + "logits/chosen": -2.4331767559051514, + "logits/rejected": -1.6498463153839111, + "logps/chosen": -642.0687866210938, + "logps/rejected": -2218.45458984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.69646692276001, + "rewards/margins": 16.042327880859375, + "rewards/rejected": -21.73879623413086, + "step": 19360 + }, + { + "epoch": 1.16, + "learning_rate": 3.86150302825401e-06, + "logits/chosen": -2.4428954124450684, + "logits/rejected": -1.8684608936309814, + "logps/chosen": -628.1819458007812, + "logps/rejected": -2182.301513671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.556127548217773, + "rewards/margins": 15.820112228393555, + "rewards/rejected": -21.376239776611328, + "step": 19370 + }, + { + "epoch": 1.16, + "learning_rate": 3.860047921146487e-06, + "logits/chosen": -2.414626121520996, + "logits/rejected": -1.6948535442352295, + "logps/chosen": -633.673828125, + "logps/rejected": -2107.531982421875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.637334823608398, + "rewards/margins": 14.996297836303711, + "rewards/rejected": -20.63363265991211, + "step": 19380 + }, + { + "epoch": 1.16, + "learning_rate": 3.858592159311912e-06, + "logits/chosen": -2.480952739715576, + "logits/rejected": -1.5439393520355225, + "logps/chosen": -646.4916381835938, + "logps/rejected": -2164.063720703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7568511962890625, + "rewards/margins": 15.43677806854248, + "rewards/rejected": -21.19363021850586, + "step": 19390 + }, + { + "epoch": 1.16, + "learning_rate": 3.857135743451089e-06, + "logits/chosen": -2.416769504547119, + "logits/rejected": -1.4766902923583984, + "logps/chosen": -659.0882568359375, + "logps/rejected": -2264.719482421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.875912666320801, + "rewards/margins": 16.313209533691406, + "rewards/rejected": -22.18912124633789, + "step": 19400 + }, + { + "epoch": 1.16, + "learning_rate": 3.855678674265136e-06, + "logits/chosen": -2.4593052864074707, + "logits/rejected": -1.6137712001800537, + "logps/chosen": -707.1406860351562, + "logps/rejected": -2162.64453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.403662204742432, + "rewards/margins": 14.770123481750488, + "rewards/rejected": -21.173784255981445, + "step": 19410 + }, + { + "epoch": 1.16, + "learning_rate": 3.854220952455488e-06, + "logits/chosen": -2.4449572563171387, + "logits/rejected": -1.6290550231933594, + "logps/chosen": -664.4036865234375, + "logps/rejected": -2246.533447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.906922817230225, + "rewards/margins": 16.115657806396484, + "rewards/rejected": -22.022584915161133, + "step": 19420 + }, + { + "epoch": 1.16, + "learning_rate": 3.852762578723889e-06, + "logits/chosen": -2.4287936687469482, + "logits/rejected": -1.6006819009780884, + "logps/chosen": -674.2080688476562, + "logps/rejected": -2235.00537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.022744178771973, + "rewards/margins": 15.886322021484375, + "rewards/rejected": -21.9090633392334, + "step": 19430 + }, + { + "epoch": 1.16, + "learning_rate": 3.851303553772402e-06, + "logits/chosen": -2.4297120571136475, + "logits/rejected": -1.449948787689209, + "logps/chosen": -683.7781372070312, + "logps/rejected": -2207.921630859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0833635330200195, + "rewards/margins": 15.55608081817627, + "rewards/rejected": -21.63944435119629, + "step": 19440 + }, + { + "epoch": 1.16, + "learning_rate": 3.849843878303402e-06, + "logits/chosen": -2.42291259765625, + "logits/rejected": -1.6586339473724365, + "logps/chosen": -681.8558349609375, + "logps/rejected": -2255.81201171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1228132247924805, + "rewards/margins": 15.994463920593262, + "rewards/rejected": -22.11727523803711, + "step": 19450 + }, + { + "epoch": 1.16, + "learning_rate": 3.848383553019576e-06, + "logits/chosen": -2.4260642528533936, + "logits/rejected": -1.546029806137085, + "logps/chosen": -675.8665771484375, + "logps/rejected": -2167.619140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0546159744262695, + "rewards/margins": 15.173860549926758, + "rewards/rejected": -21.22847557067871, + "step": 19460 + }, + { + "epoch": 1.16, + "learning_rate": 3.846922578623924e-06, + "logits/chosen": -2.3976385593414307, + "logits/rejected": -1.5862057209014893, + "logps/chosen": -695.1292724609375, + "logps/rejected": -2242.57080078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.211791038513184, + "rewards/margins": 15.753082275390625, + "rewards/rejected": -21.964874267578125, + "step": 19470 + }, + { + "epoch": 1.16, + "learning_rate": 3.845460955819761e-06, + "logits/chosen": -2.488187313079834, + "logits/rejected": -1.4584896564483643, + "logps/chosen": -679.7158813476562, + "logps/rejected": -2193.178955078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.117929935455322, + "rewards/margins": 15.385274887084961, + "rewards/rejected": -21.503204345703125, + "step": 19480 + }, + { + "epoch": 1.16, + "learning_rate": 3.8439986853107105e-06, + "logits/chosen": -2.461911678314209, + "logits/rejected": -1.7156527042388916, + "logps/chosen": -669.7090454101562, + "logps/rejected": -2247.7197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.966146945953369, + "rewards/margins": 16.059839248657227, + "rewards/rejected": -22.025985717773438, + "step": 19490 + }, + { + "epoch": 1.16, + "learning_rate": 3.84253576780071e-06, + "logits/chosen": -2.4296975135803223, + "logits/rejected": -1.5426061153411865, + "logps/chosen": -674.3680419921875, + "logps/rejected": -2241.84033203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.025854110717773, + "rewards/margins": 15.952400207519531, + "rewards/rejected": -21.978252410888672, + "step": 19500 + }, + { + "epoch": 1.16, + "learning_rate": 3.841072203994009e-06, + "logits/chosen": -2.4097282886505127, + "logits/rejected": -1.609312653541565, + "logps/chosen": -678.0603637695312, + "logps/rejected": -2137.066650390625, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1021599769592285, + "rewards/margins": 14.818140029907227, + "rewards/rejected": -20.920299530029297, + "step": 19510 + }, + { + "epoch": 1.16, + "learning_rate": 3.839607994595165e-06, + "logits/chosen": -2.445357084274292, + "logits/rejected": -1.6492640972137451, + "logps/chosen": -634.388916015625, + "logps/rejected": -2148.0, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5802507400512695, + "rewards/margins": 15.463589668273926, + "rewards/rejected": -21.043838500976562, + "step": 19520 + }, + { + "epoch": 1.16, + "learning_rate": 3.83814314030905e-06, + "logits/chosen": -2.4373509883880615, + "logits/rejected": -1.7173750400543213, + "logps/chosen": -623.189453125, + "logps/rejected": -2131.537353515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.511532783508301, + "rewards/margins": 15.367982864379883, + "rewards/rejected": -20.8795166015625, + "step": 19530 + }, + { + "epoch": 1.17, + "learning_rate": 3.836677641840843e-06, + "logits/chosen": -2.452751636505127, + "logits/rejected": -1.742274522781372, + "logps/chosen": -628.5477294921875, + "logps/rejected": -2187.826171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.569386959075928, + "rewards/margins": 15.871366500854492, + "rewards/rejected": -21.440753936767578, + "step": 19540 + }, + { + "epoch": 1.17, + "learning_rate": 3.835211499896038e-06, + "logits/chosen": -2.4553511142730713, + "logits/rejected": -1.8020976781845093, + "logps/chosen": -624.2432861328125, + "logps/rejected": -2130.275146484375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.526525497436523, + "rewards/margins": 15.343249320983887, + "rewards/rejected": -20.869775772094727, + "step": 19550 + }, + { + "epoch": 1.17, + "learning_rate": 3.833744715180433e-06, + "logits/chosen": -2.4191277027130127, + "logits/rejected": -1.856886863708496, + "logps/chosen": -638.2380981445312, + "logps/rejected": -2114.58251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.704461574554443, + "rewards/margins": 15.00163459777832, + "rewards/rejected": -20.706098556518555, + "step": 19560 + }, + { + "epoch": 1.17, + "learning_rate": 3.8322772884001395e-06, + "logits/chosen": -2.4464073181152344, + "logits/rejected": -1.6840827465057373, + "logps/chosen": -677.3611450195312, + "logps/rejected": -2271.059326171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.099024295806885, + "rewards/margins": 16.164966583251953, + "rewards/rejected": -22.263988494873047, + "step": 19570 + }, + { + "epoch": 1.17, + "learning_rate": 3.830809220261576e-06, + "logits/chosen": -2.4375863075256348, + "logits/rejected": -1.7291505336761475, + "logps/chosen": -719.6675415039062, + "logps/rejected": -2231.7314453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.476625919342041, + "rewards/margins": 15.389833450317383, + "rewards/rejected": -21.866458892822266, + "step": 19580 + }, + { + "epoch": 1.17, + "learning_rate": 3.829340511471471e-06, + "logits/chosen": -2.38728928565979, + "logits/rejected": -1.6721597909927368, + "logps/chosen": -700.004150390625, + "logps/rejected": -2094.158203125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.341429710388184, + "rewards/margins": 14.175494194030762, + "rewards/rejected": -20.516925811767578, + "step": 19590 + }, + { + "epoch": 1.17, + "learning_rate": 3.8278711627368615e-06, + "logits/chosen": -2.3945021629333496, + "logits/rejected": -1.6620852947235107, + "logps/chosen": -689.5613403320312, + "logps/rejected": -2134.9873046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.193993091583252, + "rewards/margins": 14.725484848022461, + "rewards/rejected": -20.919477462768555, + "step": 19600 + }, + { + "epoch": 1.17, + "learning_rate": 3.826401174765091e-06, + "logits/chosen": -2.4258460998535156, + "logits/rejected": -1.6512095928192139, + "logps/chosen": -675.6314697265625, + "logps/rejected": -2264.68017578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.006427764892578, + "rewards/margins": 16.193603515625, + "rewards/rejected": -22.200031280517578, + "step": 19610 + }, + { + "epoch": 1.17, + "learning_rate": 3.824930548263811e-06, + "logits/chosen": -2.3972089290618896, + "logits/rejected": -1.636574387550354, + "logps/chosen": -690.4176025390625, + "logps/rejected": -2360.85986328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.218028545379639, + "rewards/margins": 16.950647354125977, + "rewards/rejected": -23.16867446899414, + "step": 19620 + }, + { + "epoch": 1.17, + "learning_rate": 3.823459283940982e-06, + "logits/chosen": -2.4045517444610596, + "logits/rejected": -1.5844156742095947, + "logps/chosen": -684.6450805664062, + "logps/rejected": -2211.670654296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.11983585357666, + "rewards/margins": 15.539546012878418, + "rewards/rejected": -21.659381866455078, + "step": 19630 + }, + { + "epoch": 1.17, + "learning_rate": 3.821987382504871e-06, + "logits/chosen": -2.4694623947143555, + "logits/rejected": -1.6164662837982178, + "logps/chosen": -701.6036987304688, + "logps/rejected": -2346.535888671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.309390068054199, + "rewards/margins": 16.71131134033203, + "rewards/rejected": -23.020702362060547, + "step": 19640 + }, + { + "epoch": 1.17, + "learning_rate": 3.82051484466405e-06, + "logits/chosen": -2.430166721343994, + "logits/rejected": -1.6057933568954468, + "logps/chosen": -674.1409301757812, + "logps/rejected": -2361.849609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.000202655792236, + "rewards/margins": 17.18228530883789, + "rewards/rejected": -23.182485580444336, + "step": 19650 + }, + { + "epoch": 1.17, + "learning_rate": 3.819041671127402e-06, + "logits/chosen": -2.3906283378601074, + "logits/rejected": -1.5469478368759155, + "logps/chosen": -685.7679443359375, + "logps/rejected": -2304.86181640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1436967849731445, + "rewards/margins": 16.45716094970703, + "rewards/rejected": -22.60085678100586, + "step": 19660 + }, + { + "epoch": 1.17, + "learning_rate": 3.817567862604108e-06, + "logits/chosen": -2.4010043144226074, + "logits/rejected": -1.652410864830017, + "logps/chosen": -714.7589721679688, + "logps/rejected": -2185.849853515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.383256912231445, + "rewards/margins": 15.027995109558105, + "rewards/rejected": -21.411251068115234, + "step": 19670 + }, + { + "epoch": 1.17, + "learning_rate": 3.816093419803663e-06, + "logits/chosen": -2.423339366912842, + "logits/rejected": -1.669748306274414, + "logps/chosen": -687.4555053710938, + "logps/rejected": -2335.543212890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.155217170715332, + "rewards/margins": 16.75697898864746, + "rewards/rejected": -22.91219711303711, + "step": 19680 + }, + { + "epoch": 1.17, + "learning_rate": 3.814618343435862e-06, + "logits/chosen": -2.453176975250244, + "logits/rejected": -1.6904735565185547, + "logps/chosen": -726.4951782226562, + "logps/rejected": -2228.556640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.542147159576416, + "rewards/margins": 15.297780990600586, + "rewards/rejected": -21.83992576599121, + "step": 19690 + }, + { + "epoch": 1.17, + "learning_rate": 3.813142634210807e-06, + "logits/chosen": -2.402284622192383, + "logits/rejected": -1.6562061309814453, + "logps/chosen": -713.6048583984375, + "logps/rejected": -2229.706787109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.386910438537598, + "rewards/margins": 15.459782600402832, + "rewards/rejected": -21.846691131591797, + "step": 19700 + }, + { + "epoch": 1.18, + "learning_rate": 3.811666292838905e-06, + "logits/chosen": -2.429776430130005, + "logits/rejected": -1.5450440645217896, + "logps/chosen": -686.7469482421875, + "logps/rejected": -2284.23583984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2007551193237305, + "rewards/margins": 16.187829971313477, + "rewards/rejected": -22.38858413696289, + "step": 19710 + }, + { + "epoch": 1.18, + "learning_rate": 3.8101893200308665e-06, + "logits/chosen": -2.4085307121276855, + "logits/rejected": -1.666009545326233, + "logps/chosen": -701.5992431640625, + "logps/rejected": -2236.3408203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.300387382507324, + "rewards/margins": 15.608100891113281, + "rewards/rejected": -21.908489227294922, + "step": 19720 + }, + { + "epoch": 1.18, + "learning_rate": 3.8087117164977058e-06, + "logits/chosen": -2.4086666107177734, + "logits/rejected": -1.5676634311676025, + "logps/chosen": -695.079345703125, + "logps/rejected": -2223.494384765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.194132328033447, + "rewards/margins": 15.594525337219238, + "rewards/rejected": -21.788660049438477, + "step": 19730 + }, + { + "epoch": 1.18, + "learning_rate": 3.8072334829507414e-06, + "logits/chosen": -2.4112401008605957, + "logits/rejected": -1.6185181140899658, + "logps/chosen": -744.180908203125, + "logps/rejected": -2247.8046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.758402347564697, + "rewards/margins": 15.282132148742676, + "rewards/rejected": -22.040531158447266, + "step": 19740 + }, + { + "epoch": 1.18, + "learning_rate": 3.805754620101595e-06, + "logits/chosen": -2.45473575592041, + "logits/rejected": -1.5737849473953247, + "logps/chosen": -696.271484375, + "logps/rejected": -2235.82958984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.19371223449707, + "rewards/margins": 15.713704109191895, + "rewards/rejected": -21.907419204711914, + "step": 19750 + }, + { + "epoch": 1.18, + "learning_rate": 3.8042751286621914e-06, + "logits/chosen": -2.4228978157043457, + "logits/rejected": -1.648114562034607, + "logps/chosen": -723.0416259765625, + "logps/rejected": -2260.931884765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.529881477355957, + "rewards/margins": 15.629361152648926, + "rewards/rejected": -22.15924072265625, + "step": 19760 + }, + { + "epoch": 1.18, + "learning_rate": 3.802795009344757e-06, + "logits/chosen": -2.4214844703674316, + "logits/rejected": -1.6928796768188477, + "logps/chosen": -717.1437377929688, + "logps/rejected": -2205.320556640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.50119686126709, + "rewards/margins": 15.113761901855469, + "rewards/rejected": -21.614959716796875, + "step": 19770 + }, + { + "epoch": 1.18, + "learning_rate": 3.8013142628618228e-06, + "logits/chosen": -2.439035654067993, + "logits/rejected": -1.6923401355743408, + "logps/chosen": -709.7481689453125, + "logps/rejected": -2181.283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.329468727111816, + "rewards/margins": 15.043319702148438, + "rewards/rejected": -21.372787475585938, + "step": 19780 + }, + { + "epoch": 1.18, + "learning_rate": 3.799832889926217e-06, + "logits/chosen": -2.4125614166259766, + "logits/rejected": -1.5453345775604248, + "logps/chosen": -686.9441528320312, + "logps/rejected": -2153.67041015625, + "loss": 0.0018, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.177915573120117, + "rewards/margins": 14.90595817565918, + "rewards/rejected": -21.083871841430664, + "step": 19790 + }, + { + "epoch": 1.18, + "learning_rate": 3.798350891251076e-06, + "logits/chosen": -2.521059274673462, + "logits/rejected": -1.8647966384887695, + "logps/chosen": -600.8491821289062, + "logps/rejected": -1884.7415771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2849602699279785, + "rewards/margins": 13.11591625213623, + "rewards/rejected": -18.400875091552734, + "step": 19800 + }, + { + "epoch": 1.18, + "learning_rate": 3.7968682675498326e-06, + "logits/chosen": -2.516024589538574, + "logits/rejected": -1.6712154150009155, + "logps/chosen": -600.975341796875, + "logps/rejected": -1797.578369140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1717023849487305, + "rewards/margins": 12.360315322875977, + "rewards/rejected": -17.53201675415039, + "step": 19810 + }, + { + "epoch": 1.18, + "learning_rate": 3.7953850195362215e-06, + "logits/chosen": -2.4865622520446777, + "logits/rejected": -1.8830080032348633, + "logps/chosen": -587.298828125, + "logps/rejected": -1834.937744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.176946640014648, + "rewards/margins": 12.725759506225586, + "rewards/rejected": -17.902706146240234, + "step": 19820 + }, + { + "epoch": 1.18, + "learning_rate": 3.7939011479242784e-06, + "logits/chosen": -2.5504798889160156, + "logits/rejected": -1.9818395376205444, + "logps/chosen": -584.0774536132812, + "logps/rejected": -1713.05859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.184175491333008, + "rewards/margins": 11.511774063110352, + "rewards/rejected": -16.69594955444336, + "step": 19830 + }, + { + "epoch": 1.18, + "learning_rate": 3.792416653428339e-06, + "logits/chosen": -2.487333297729492, + "logits/rejected": -1.8258297443389893, + "logps/chosen": -614.9920654296875, + "logps/rejected": -1860.1904296875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.439891338348389, + "rewards/margins": 12.71738338470459, + "rewards/rejected": -18.157276153564453, + "step": 19840 + }, + { + "epoch": 1.18, + "learning_rate": 3.7909315367630395e-06, + "logits/chosen": -2.4524757862091064, + "logits/rejected": -1.7186510562896729, + "logps/chosen": -726.0413208007812, + "logps/rejected": -2124.65478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.556002140045166, + "rewards/margins": 14.251226425170898, + "rewards/rejected": -20.80722999572754, + "step": 19850 + }, + { + "epoch": 1.18, + "learning_rate": 3.7894457986433143e-06, + "logits/chosen": -2.411911964416504, + "logits/rejected": -1.725236177444458, + "logps/chosen": -714.9217529296875, + "logps/rejected": -2075.58251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.408775329589844, + "rewards/margins": 13.90735149383545, + "rewards/rejected": -20.316125869750977, + "step": 19860 + }, + { + "epoch": 1.18, + "learning_rate": 3.787959439784399e-06, + "logits/chosen": -2.464773654937744, + "logits/rejected": -1.6341469287872314, + "logps/chosen": -725.5577392578125, + "logps/rejected": -2158.45703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.542091369628906, + "rewards/margins": 14.603052139282227, + "rewards/rejected": -21.145143508911133, + "step": 19870 + }, + { + "epoch": 1.19, + "learning_rate": 3.786472460901825e-06, + "logits/chosen": -2.460944652557373, + "logits/rejected": -1.6704657077789307, + "logps/chosen": -743.1644287109375, + "logps/rejected": -2130.414306640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.743188381195068, + "rewards/margins": 14.130762100219727, + "rewards/rejected": -20.873950958251953, + "step": 19880 + }, + { + "epoch": 1.19, + "learning_rate": 3.7849848627114248e-06, + "logits/chosen": -2.396027088165283, + "logits/rejected": -1.5830419063568115, + "logps/chosen": -710.6334228515625, + "logps/rejected": -2125.48779296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4196271896362305, + "rewards/margins": 14.396522521972656, + "rewards/rejected": -20.816150665283203, + "step": 19890 + }, + { + "epoch": 1.19, + "learning_rate": 3.7834966459293276e-06, + "logits/chosen": -2.5110268592834473, + "logits/rejected": -1.853914499282837, + "logps/chosen": -690.5023803710938, + "logps/rejected": -2164.75341796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.220052719116211, + "rewards/margins": 14.992225646972656, + "rewards/rejected": -21.212276458740234, + "step": 19900 + }, + { + "epoch": 1.19, + "learning_rate": 3.782007811271963e-06, + "logits/chosen": -2.3962490558624268, + "logits/rejected": -1.6502708196640015, + "logps/chosen": -730.6785888671875, + "logps/rejected": -2190.157958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.593006134033203, + "rewards/margins": 14.865641593933105, + "rewards/rejected": -21.458650588989258, + "step": 19910 + }, + { + "epoch": 1.19, + "learning_rate": 3.7805183594560525e-06, + "logits/chosen": -2.4705920219421387, + "logits/rejected": -1.6797077655792236, + "logps/chosen": -707.6903076171875, + "logps/rejected": -2157.732666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.382025718688965, + "rewards/margins": 14.732988357543945, + "rewards/rejected": -21.11501121520996, + "step": 19920 + }, + { + "epoch": 1.19, + "learning_rate": 3.7790282911986207e-06, + "logits/chosen": -2.4509634971618652, + "logits/rejected": -1.7376073598861694, + "logps/chosen": -718.7891845703125, + "logps/rejected": -2152.28662109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.460655212402344, + "rewards/margins": 14.629115104675293, + "rewards/rejected": -21.089771270751953, + "step": 19930 + }, + { + "epoch": 1.19, + "learning_rate": 3.7775376072169865e-06, + "logits/chosen": -2.4039664268493652, + "logits/rejected": -1.6219505071640015, + "logps/chosen": -712.9774780273438, + "logps/rejected": -2088.49755859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.386423110961914, + "rewards/margins": 14.062962532043457, + "rewards/rejected": -20.449386596679688, + "step": 19940 + }, + { + "epoch": 1.19, + "learning_rate": 3.7760463082287647e-06, + "logits/chosen": -2.4822564125061035, + "logits/rejected": -1.6608390808105469, + "logps/chosen": -730.3082275390625, + "logps/rejected": -2127.183349609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.644900321960449, + "rewards/margins": 14.182416915893555, + "rewards/rejected": -20.82731819152832, + "step": 19950 + }, + { + "epoch": 1.19, + "learning_rate": 3.7745543949518653e-06, + "logits/chosen": -2.4757676124572754, + "logits/rejected": -1.5202369689941406, + "logps/chosen": -702.8236694335938, + "logps/rejected": -2147.153076171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.300644874572754, + "rewards/margins": 14.718698501586914, + "rewards/rejected": -21.019344329833984, + "step": 19960 + }, + { + "epoch": 1.19, + "learning_rate": 3.773061868104497e-06, + "logits/chosen": -2.4862468242645264, + "logits/rejected": -1.699405312538147, + "logps/chosen": -715.7203369140625, + "logps/rejected": -2244.36474609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4668731689453125, + "rewards/margins": 15.52226448059082, + "rewards/rejected": -21.989137649536133, + "step": 19970 + }, + { + "epoch": 1.19, + "learning_rate": 3.7715687284051618e-06, + "logits/chosen": -2.432709217071533, + "logits/rejected": -1.7734043598175049, + "logps/chosen": -720.5367431640625, + "logps/rejected": -2175.069091796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4894232749938965, + "rewards/margins": 14.82103157043457, + "rewards/rejected": -21.310453414916992, + "step": 19980 + }, + { + "epoch": 1.19, + "learning_rate": 3.7700749765726565e-06, + "logits/chosen": -2.485107898712158, + "logits/rejected": -1.7537950277328491, + "logps/chosen": -725.8624267578125, + "logps/rejected": -2249.2236328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.542919158935547, + "rewards/margins": 15.504534721374512, + "rewards/rejected": -22.047452926635742, + "step": 19990 + }, + { + "epoch": 1.19, + "learning_rate": 3.7685806133260735e-06, + "logits/chosen": -2.4595675468444824, + "logits/rejected": -1.8017524480819702, + "logps/chosen": -712.7359619140625, + "logps/rejected": -2123.7998046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.469931602478027, + "rewards/margins": 14.33509635925293, + "rewards/rejected": -20.80502700805664, + "step": 20000 + }, + { + "epoch": 1.19, + "learning_rate": 3.7670856393848e-06, + "logits/chosen": -2.462545156478882, + "logits/rejected": -1.7239282131195068, + "logps/chosen": -703.0462646484375, + "logps/rejected": -2150.6328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2664289474487305, + "rewards/margins": 14.801916122436523, + "rewards/rejected": -21.06834602355957, + "step": 20010 + }, + { + "epoch": 1.19, + "learning_rate": 3.7655900554685155e-06, + "logits/chosen": -2.4963691234588623, + "logits/rejected": -1.7177060842514038, + "logps/chosen": -704.4401245117188, + "logps/rejected": -2144.2236328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.281062126159668, + "rewards/margins": 14.737077713012695, + "rewards/rejected": -21.018142700195312, + "step": 20020 + }, + { + "epoch": 1.19, + "learning_rate": 3.764093862297194e-06, + "logits/chosen": -2.442348003387451, + "logits/rejected": -1.6153513193130493, + "logps/chosen": -751.6729125976562, + "logps/rejected": -2095.708740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.802587032318115, + "rewards/margins": 13.718399047851562, + "rewards/rejected": -20.520984649658203, + "step": 20030 + }, + { + "epoch": 1.19, + "learning_rate": 3.7625970605911038e-06, + "logits/chosen": -2.4147543907165527, + "logits/rejected": -1.6058988571166992, + "logps/chosen": -718.6707763671875, + "logps/rejected": -2218.920654296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.502419948577881, + "rewards/margins": 15.237574577331543, + "rewards/rejected": -21.7399959564209, + "step": 20040 + }, + { + "epoch": 1.2, + "learning_rate": 3.7610996510708047e-06, + "logits/chosen": -2.4676403999328613, + "logits/rejected": -1.7148635387420654, + "logps/chosen": -744.6165161132812, + "logps/rejected": -2087.411865234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.734065055847168, + "rewards/margins": 13.708221435546875, + "rewards/rejected": -20.44228744506836, + "step": 20050 + }, + { + "epoch": 1.2, + "learning_rate": 3.7596016344571494e-06, + "logits/chosen": -2.4884865283966064, + "logits/rejected": -1.7314598560333252, + "logps/chosen": -718.5643310546875, + "logps/rejected": -2161.568359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.521903991699219, + "rewards/margins": 14.641695022583008, + "rewards/rejected": -21.163599014282227, + "step": 20060 + }, + { + "epoch": 1.2, + "learning_rate": 3.7581030114712837e-06, + "logits/chosen": -2.4517202377319336, + "logits/rejected": -1.6614373922348022, + "logps/chosen": -732.9185791015625, + "logps/rejected": -2071.201416015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.658509254455566, + "rewards/margins": 13.616209030151367, + "rewards/rejected": -20.274717330932617, + "step": 20070 + }, + { + "epoch": 1.2, + "learning_rate": 3.756603782834645e-06, + "logits/chosen": -2.4829306602478027, + "logits/rejected": -1.63885498046875, + "logps/chosen": -700.6236572265625, + "logps/rejected": -2094.004150390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.272980690002441, + "rewards/margins": 14.219317436218262, + "rewards/rejected": -20.492298126220703, + "step": 20080 + }, + { + "epoch": 1.2, + "learning_rate": 3.75510394926896e-06, + "logits/chosen": -2.3629651069641113, + "logits/rejected": -1.6648222208023071, + "logps/chosen": -697.0595092773438, + "logps/rejected": -2181.031982421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2522454261779785, + "rewards/margins": 15.114419937133789, + "rewards/rejected": -21.36666488647461, + "step": 20090 + }, + { + "epoch": 1.2, + "learning_rate": 3.75360351149625e-06, + "logits/chosen": -2.4625484943389893, + "logits/rejected": -1.4396889209747314, + "logps/chosen": -717.2733764648438, + "logps/rejected": -2109.014404296875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.482513427734375, + "rewards/margins": 14.170186996459961, + "rewards/rejected": -20.652698516845703, + "step": 20100 + }, + { + "epoch": 1.2, + "learning_rate": 3.7521024702388263e-06, + "logits/chosen": -2.4113082885742188, + "logits/rejected": -1.6051502227783203, + "logps/chosen": -787.086181640625, + "logps/rejected": -2071.182861328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.175157070159912, + "rewards/margins": 13.106172561645508, + "rewards/rejected": -20.281330108642578, + "step": 20110 + }, + { + "epoch": 1.2, + "learning_rate": 3.7506008262192887e-06, + "logits/chosen": -2.44734263420105, + "logits/rejected": -1.5622599124908447, + "logps/chosen": -779.1883544921875, + "logps/rejected": -2161.333984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.069028377532959, + "rewards/margins": 14.10406494140625, + "rewards/rejected": -21.173091888427734, + "step": 20120 + }, + { + "epoch": 1.2, + "learning_rate": 3.7490985801605303e-06, + "logits/chosen": -2.4695963859558105, + "logits/rejected": -1.692665696144104, + "logps/chosen": -764.8941650390625, + "logps/rejected": -2281.818115234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.882218837738037, + "rewards/margins": 15.483190536499023, + "rewards/rejected": -22.36541175842285, + "step": 20130 + }, + { + "epoch": 1.2, + "learning_rate": 3.7475957327857303e-06, + "logits/chosen": -2.478667736053467, + "logits/rejected": -1.7032852172851562, + "logps/chosen": -783.625, + "logps/rejected": -2184.034912109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.140067100524902, + "rewards/margins": 14.26215934753418, + "rewards/rejected": -21.40222930908203, + "step": 20140 + }, + { + "epoch": 1.2, + "learning_rate": 3.7460922848183613e-06, + "logits/chosen": -2.4780983924865723, + "logits/rejected": -1.7582502365112305, + "logps/chosen": -743.4362182617188, + "logps/rejected": -2173.8544921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.761366367340088, + "rewards/margins": 14.5201416015625, + "rewards/rejected": -21.281505584716797, + "step": 20150 + }, + { + "epoch": 1.2, + "learning_rate": 3.744588236982181e-06, + "logits/chosen": -2.4655935764312744, + "logits/rejected": -1.5962154865264893, + "logps/chosen": -717.21044921875, + "logps/rejected": -2259.107421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.403078556060791, + "rewards/margins": 15.756948471069336, + "rewards/rejected": -22.1600284576416, + "step": 20160 + }, + { + "epoch": 1.2, + "learning_rate": 3.7430835900012402e-06, + "logits/chosen": -2.4063265323638916, + "logits/rejected": -1.640024185180664, + "logps/chosen": -747.3234252929688, + "logps/rejected": -2133.22998046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.80081033706665, + "rewards/margins": 14.095545768737793, + "rewards/rejected": -20.89635467529297, + "step": 20170 + }, + { + "epoch": 1.2, + "learning_rate": 3.7415783445998748e-06, + "logits/chosen": -2.454364776611328, + "logits/rejected": -1.6012300252914429, + "logps/chosen": -741.1151733398438, + "logps/rejected": -2091.73095703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.69805908203125, + "rewards/margins": 13.776611328125, + "rewards/rejected": -20.47467041015625, + "step": 20180 + }, + { + "epoch": 1.2, + "learning_rate": 3.7400725015027107e-06, + "logits/chosen": -2.453296184539795, + "logits/rejected": -1.6234394311904907, + "logps/chosen": -725.951904296875, + "logps/rejected": -2117.425048828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.569129943847656, + "rewards/margins": 14.168045043945312, + "rewards/rejected": -20.73717498779297, + "step": 20190 + }, + { + "epoch": 1.2, + "learning_rate": 3.7385660614346593e-06, + "logits/chosen": -2.4414806365966797, + "logits/rejected": -1.5562654733657837, + "logps/chosen": -743.3698120117188, + "logps/rejected": -2164.83154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7859063148498535, + "rewards/margins": 14.40992546081543, + "rewards/rejected": -21.195831298828125, + "step": 20200 + }, + { + "epoch": 1.21, + "learning_rate": 3.737059025120922e-06, + "logits/chosen": -2.3946146965026855, + "logits/rejected": -1.5436949729919434, + "logps/chosen": -764.67724609375, + "logps/rejected": -2111.206298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.967588901519775, + "rewards/margins": 13.69953727722168, + "rewards/rejected": -20.667125701904297, + "step": 20210 + }, + { + "epoch": 1.21, + "learning_rate": 3.7355513932869862e-06, + "logits/chosen": -2.39076566696167, + "logits/rejected": -1.465269684791565, + "logps/chosen": -749.89111328125, + "logps/rejected": -2227.09228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.753817558288574, + "rewards/margins": 15.069528579711914, + "rewards/rejected": -21.823345184326172, + "step": 20220 + }, + { + "epoch": 1.21, + "learning_rate": 3.734043166658625e-06, + "logits/chosen": -2.4422006607055664, + "logits/rejected": -1.643113374710083, + "logps/chosen": -776.3575439453125, + "logps/rejected": -2263.26025390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.043302059173584, + "rewards/margins": 15.137153625488281, + "rewards/rejected": -22.180456161499023, + "step": 20230 + }, + { + "epoch": 1.21, + "learning_rate": 3.7325343459618997e-06, + "logits/chosen": -2.448605537414551, + "logits/rejected": -1.6096426248550415, + "logps/chosen": -759.3972778320312, + "logps/rejected": -2182.2578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9468278884887695, + "rewards/margins": 14.442903518676758, + "rewards/rejected": -21.38973045349121, + "step": 20240 + }, + { + "epoch": 1.21, + "learning_rate": 3.7310249319231552e-06, + "logits/chosen": -2.4850122928619385, + "logits/rejected": -1.5631635189056396, + "logps/chosen": -758.197509765625, + "logps/rejected": -2210.454833984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.87628698348999, + "rewards/margins": 14.790120124816895, + "rewards/rejected": -21.66640853881836, + "step": 20250 + }, + { + "epoch": 1.21, + "learning_rate": 3.7295149252690243e-06, + "logits/chosen": -2.449164867401123, + "logits/rejected": -1.6849250793457031, + "logps/chosen": -756.6493530273438, + "logps/rejected": -2276.497314453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.831171989440918, + "rewards/margins": 15.459567070007324, + "rewards/rejected": -22.290740966796875, + "step": 20260 + }, + { + "epoch": 1.21, + "learning_rate": 3.7280043267264243e-06, + "logits/chosen": -2.4412150382995605, + "logits/rejected": -1.721429467201233, + "logps/chosen": -730.1072998046875, + "logps/rejected": -2083.028076171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.570645809173584, + "rewards/margins": 13.828722953796387, + "rewards/rejected": -20.399368286132812, + "step": 20270 + }, + { + "epoch": 1.21, + "learning_rate": 3.726493137022557e-06, + "logits/chosen": -2.444004535675049, + "logits/rejected": -1.6659719944000244, + "logps/chosen": -741.5281982421875, + "logps/rejected": -2122.233642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.733421325683594, + "rewards/margins": 14.046656608581543, + "rewards/rejected": -20.780078887939453, + "step": 20280 + }, + { + "epoch": 1.21, + "learning_rate": 3.7249813568849093e-06, + "logits/chosen": -2.434513807296753, + "logits/rejected": -1.5139683485031128, + "logps/chosen": -714.4747314453125, + "logps/rejected": -2237.73974609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.42211389541626, + "rewards/margins": 15.508999824523926, + "rewards/rejected": -21.931116104125977, + "step": 20290 + }, + { + "epoch": 1.21, + "learning_rate": 3.7234689870412516e-06, + "logits/chosen": -2.4672675132751465, + "logits/rejected": -1.7210947275161743, + "logps/chosen": -731.9305419921875, + "logps/rejected": -2271.30908203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.607611656188965, + "rewards/margins": 15.653890609741211, + "rewards/rejected": -22.261503219604492, + "step": 20300 + }, + { + "epoch": 1.21, + "learning_rate": 3.7219560282196397e-06, + "logits/chosen": -2.4900944232940674, + "logits/rejected": -1.751827597618103, + "logps/chosen": -721.9556884765625, + "logps/rejected": -2111.3125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.547348976135254, + "rewards/margins": 14.11528205871582, + "rewards/rejected": -20.662630081176758, + "step": 20310 + }, + { + "epoch": 1.21, + "learning_rate": 3.7204424811484106e-06, + "logits/chosen": -2.483314037322998, + "logits/rejected": -1.7162530422210693, + "logps/chosen": -650.69140625, + "logps/rejected": -2188.5068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.873778820037842, + "rewards/margins": 15.551076889038086, + "rewards/rejected": -21.424856185913086, + "step": 20320 + }, + { + "epoch": 1.21, + "learning_rate": 3.7189283465561875e-06, + "logits/chosen": -2.48433518409729, + "logits/rejected": -1.6195640563964844, + "logps/chosen": -666.6669311523438, + "logps/rejected": -2105.72509765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.940940856933594, + "rewards/margins": 14.667495727539062, + "rewards/rejected": -20.608434677124023, + "step": 20330 + }, + { + "epoch": 1.21, + "learning_rate": 3.7174136251718735e-06, + "logits/chosen": -2.4860236644744873, + "logits/rejected": -1.6262848377227783, + "logps/chosen": -637.8846435546875, + "logps/rejected": -2037.778564453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.710381507873535, + "rewards/margins": 14.222219467163086, + "rewards/rejected": -19.932600021362305, + "step": 20340 + }, + { + "epoch": 1.21, + "learning_rate": 3.715898317724656e-06, + "logits/chosen": -2.472130537033081, + "logits/rejected": -1.76004958152771, + "logps/chosen": -666.4798583984375, + "logps/rejected": -2164.29345703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.006655693054199, + "rewards/margins": 15.194696426391602, + "rewards/rejected": -21.201351165771484, + "step": 20350 + }, + { + "epoch": 1.21, + "learning_rate": 3.7143824249440036e-06, + "logits/chosen": -2.4729645252227783, + "logits/rejected": -1.697258710861206, + "logps/chosen": -671.0719604492188, + "logps/rejected": -2179.861572265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.95095157623291, + "rewards/margins": 15.404520988464355, + "rewards/rejected": -21.355472564697266, + "step": 20360 + }, + { + "epoch": 1.21, + "learning_rate": 3.712865947559667e-06, + "logits/chosen": -2.4415438175201416, + "logits/rejected": -1.7322537899017334, + "logps/chosen": -650.4191284179688, + "logps/rejected": -2109.13818359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.758990287780762, + "rewards/margins": 14.88379192352295, + "rewards/rejected": -20.642784118652344, + "step": 20370 + }, + { + "epoch": 1.22, + "learning_rate": 3.71134888630168e-06, + "logits/chosen": -2.4740488529205322, + "logits/rejected": -1.5691754817962646, + "logps/chosen": -634.2974243164062, + "logps/rejected": -2162.528564453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.568485736846924, + "rewards/margins": 15.611262321472168, + "rewards/rejected": -21.179746627807617, + "step": 20380 + }, + { + "epoch": 1.22, + "learning_rate": 3.7098312419003534e-06, + "logits/chosen": -2.479860782623291, + "logits/rejected": -1.7121738195419312, + "logps/chosen": -681.0477294921875, + "logps/rejected": -2138.248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.072665691375732, + "rewards/margins": 14.86327838897705, + "rewards/rejected": -20.935945510864258, + "step": 20390 + }, + { + "epoch": 1.22, + "learning_rate": 3.7083130150862835e-06, + "logits/chosen": -2.4743640422821045, + "logits/rejected": -1.6872440576553345, + "logps/chosen": -675.5589599609375, + "logps/rejected": -2101.13525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.026510238647461, + "rewards/margins": 14.551546096801758, + "rewards/rejected": -20.578052520751953, + "step": 20400 + }, + { + "epoch": 1.22, + "learning_rate": 3.7067942065903428e-06, + "logits/chosen": -2.464850902557373, + "logits/rejected": -1.6050668954849243, + "logps/chosen": -688.620361328125, + "logps/rejected": -2224.5966796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.199791431427002, + "rewards/margins": 15.603715896606445, + "rewards/rejected": -21.80350685119629, + "step": 20410 + }, + { + "epoch": 1.22, + "learning_rate": 3.7052748171436864e-06, + "logits/chosen": -2.495365619659424, + "logits/rejected": -1.6217811107635498, + "logps/chosen": -673.3978271484375, + "logps/rejected": -2109.186767578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.979520320892334, + "rewards/margins": 14.661161422729492, + "rewards/rejected": -20.640682220458984, + "step": 20420 + }, + { + "epoch": 1.22, + "learning_rate": 3.7037548474777484e-06, + "logits/chosen": -2.4664156436920166, + "logits/rejected": -1.7587788105010986, + "logps/chosen": -679.1708374023438, + "logps/rejected": -2135.41943359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.082489490509033, + "rewards/margins": 14.836898803710938, + "rewards/rejected": -20.91938591003418, + "step": 20430 + }, + { + "epoch": 1.22, + "learning_rate": 3.702234298324242e-06, + "logits/chosen": -2.4712092876434326, + "logits/rejected": -1.7792400121688843, + "logps/chosen": -663.8434448242188, + "logps/rejected": -2156.8828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.972809791564941, + "rewards/margins": 15.135971069335938, + "rewards/rejected": -21.108783721923828, + "step": 20440 + }, + { + "epoch": 1.22, + "learning_rate": 3.70071317041516e-06, + "logits/chosen": -2.473125457763672, + "logits/rejected": -1.626107931137085, + "logps/chosen": -671.4423217773438, + "logps/rejected": -2044.482666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.002254009246826, + "rewards/margins": 14.009384155273438, + "rewards/rejected": -20.011640548706055, + "step": 20450 + }, + { + "epoch": 1.22, + "learning_rate": 3.6991914644827732e-06, + "logits/chosen": -2.4861018657684326, + "logits/rejected": -1.7092483043670654, + "logps/chosen": -641.4082641601562, + "logps/rejected": -2140.445068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.680509567260742, + "rewards/margins": 15.283851623535156, + "rewards/rejected": -20.9643611907959, + "step": 20460 + }, + { + "epoch": 1.22, + "learning_rate": 3.69766918125963e-06, + "logits/chosen": -2.5313472747802734, + "logits/rejected": -1.7116215229034424, + "logps/chosen": -658.8989868164062, + "logps/rejected": -2159.94140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.859596252441406, + "rewards/margins": 15.302118301391602, + "rewards/rejected": -21.161714553833008, + "step": 20470 + }, + { + "epoch": 1.22, + "learning_rate": 3.6961463214785586e-06, + "logits/chosen": -2.4091949462890625, + "logits/rejected": -1.5267252922058105, + "logps/chosen": -683.8599243164062, + "logps/rejected": -2082.410400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.169626712799072, + "rewards/margins": 14.226409912109375, + "rewards/rejected": -20.39603614807129, + "step": 20480 + }, + { + "epoch": 1.22, + "learning_rate": 3.6946228858726642e-06, + "logits/chosen": -2.4524855613708496, + "logits/rejected": -1.7046267986297607, + "logps/chosen": -653.5396728515625, + "logps/rejected": -2105.697998046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.858658790588379, + "rewards/margins": 14.751986503601074, + "rewards/rejected": -20.610647201538086, + "step": 20490 + }, + { + "epoch": 1.22, + "learning_rate": 3.693098875175327e-06, + "logits/chosen": -2.474484920501709, + "logits/rejected": -1.6468740701675415, + "logps/chosen": -654.9970703125, + "logps/rejected": -2135.3017578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.837571144104004, + "rewards/margins": 15.067509651184082, + "rewards/rejected": -20.905078887939453, + "step": 20500 + }, + { + "epoch": 1.22, + "learning_rate": 3.6915742901202063e-06, + "logits/chosen": -2.456890106201172, + "logits/rejected": -1.647017240524292, + "logps/chosen": -696.1260986328125, + "logps/rejected": -2066.407470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.259953498840332, + "rewards/margins": 13.97563648223877, + "rewards/rejected": -20.2355899810791, + "step": 20510 + }, + { + "epoch": 1.22, + "learning_rate": 3.690049131441238e-06, + "logits/chosen": -2.430833578109741, + "logits/rejected": -1.6713597774505615, + "logps/chosen": -673.7564697265625, + "logps/rejected": -2098.320556640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.040738105773926, + "rewards/margins": 14.503358840942383, + "rewards/rejected": -20.544097900390625, + "step": 20520 + }, + { + "epoch": 1.22, + "learning_rate": 3.688523399872632e-06, + "logits/chosen": -2.4463565349578857, + "logits/rejected": -1.7423012256622314, + "logps/chosen": -655.0904541015625, + "logps/rejected": -2113.822021484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.792442321777344, + "rewards/margins": 14.891270637512207, + "rewards/rejected": -20.683712005615234, + "step": 20530 + }, + { + "epoch": 1.22, + "learning_rate": 3.6869970961488775e-06, + "logits/chosen": -2.461862087249756, + "logits/rejected": -1.7687437534332275, + "logps/chosen": -662.8002319335938, + "logps/rejected": -2136.2822265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.948121070861816, + "rewards/margins": 14.972970962524414, + "rewards/rejected": -20.921091079711914, + "step": 20540 + }, + { + "epoch": 1.23, + "learning_rate": 3.6854702210047353e-06, + "logits/chosen": -2.454721212387085, + "logits/rejected": -1.7588142156600952, + "logps/chosen": -648.6082153320312, + "logps/rejected": -2143.8603515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.801299095153809, + "rewards/margins": 15.18993091583252, + "rewards/rejected": -20.991230010986328, + "step": 20550 + }, + { + "epoch": 1.23, + "learning_rate": 3.683942775175244e-06, + "logits/chosen": -2.4438016414642334, + "logits/rejected": -1.8082036972045898, + "logps/chosen": -664.4244384765625, + "logps/rejected": -2067.6640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.922482967376709, + "rewards/margins": 14.319664001464844, + "rewards/rejected": -20.242145538330078, + "step": 20560 + }, + { + "epoch": 1.23, + "learning_rate": 3.682414759395716e-06, + "logits/chosen": -2.4565670490264893, + "logits/rejected": -1.7016149759292603, + "logps/chosen": -669.6326904296875, + "logps/rejected": -2172.015869140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.933630466461182, + "rewards/margins": 15.339485168457031, + "rewards/rejected": -21.273113250732422, + "step": 20570 + }, + { + "epoch": 1.23, + "learning_rate": 3.6808861744017386e-06, + "logits/chosen": -2.4652888774871826, + "logits/rejected": -1.738120675086975, + "logps/chosen": -668.4066162109375, + "logps/rejected": -2112.568115234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.990682125091553, + "rewards/margins": 14.691904067993164, + "rewards/rejected": -20.682584762573242, + "step": 20580 + }, + { + "epoch": 1.23, + "learning_rate": 3.6793570209291712e-06, + "logits/chosen": -2.4217259883880615, + "logits/rejected": -1.7189384698867798, + "logps/chosen": -658.1981201171875, + "logps/rejected": -2059.14794921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.869626522064209, + "rewards/margins": 14.286331176757812, + "rewards/rejected": -20.155956268310547, + "step": 20590 + }, + { + "epoch": 1.23, + "learning_rate": 3.677827299714149e-06, + "logits/chosen": -2.4898948669433594, + "logits/rejected": -1.8239272832870483, + "logps/chosen": -692.4293212890625, + "logps/rejected": -2120.50537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2052717208862305, + "rewards/margins": 14.563695907592773, + "rewards/rejected": -20.76896858215332, + "step": 20600 + }, + { + "epoch": 1.23, + "learning_rate": 3.6762970114930796e-06, + "logits/chosen": -2.4875376224517822, + "logits/rejected": -1.7626311779022217, + "logps/chosen": -652.4815673828125, + "logps/rejected": -2044.1331787109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.822597503662109, + "rewards/margins": 14.176427841186523, + "rewards/rejected": -19.999025344848633, + "step": 20610 + }, + { + "epoch": 1.23, + "learning_rate": 3.674766157002644e-06, + "logits/chosen": -2.4822402000427246, + "logits/rejected": -1.7607629299163818, + "logps/chosen": -651.5037841796875, + "logps/rejected": -2132.2158203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8685784339904785, + "rewards/margins": 15.018178939819336, + "rewards/rejected": -20.88675880432129, + "step": 20620 + }, + { + "epoch": 1.23, + "learning_rate": 3.6732347369797957e-06, + "logits/chosen": -2.4420878887176514, + "logits/rejected": -1.7447468042373657, + "logps/chosen": -676.2449951171875, + "logps/rejected": -2096.66845703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.088441848754883, + "rewards/margins": 14.435941696166992, + "rewards/rejected": -20.524385452270508, + "step": 20630 + }, + { + "epoch": 1.23, + "learning_rate": 3.6717027521617593e-06, + "logits/chosen": -2.4506049156188965, + "logits/rejected": -1.7718089818954468, + "logps/chosen": -654.029052734375, + "logps/rejected": -2120.25390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.864768028259277, + "rewards/margins": 14.890789985656738, + "rewards/rejected": -20.755558013916016, + "step": 20640 + }, + { + "epoch": 1.23, + "learning_rate": 3.6701702032860338e-06, + "logits/chosen": -2.479494094848633, + "logits/rejected": -1.646539330482483, + "logps/chosen": -693.0401611328125, + "logps/rejected": -2190.19580078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.205155372619629, + "rewards/margins": 15.25434684753418, + "rewards/rejected": -21.459503173828125, + "step": 20650 + }, + { + "epoch": 1.23, + "learning_rate": 3.6686370910903868e-06, + "logits/chosen": -2.4293553829193115, + "logits/rejected": -1.7668412923812866, + "logps/chosen": -691.0670166015625, + "logps/rejected": -2089.1103515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2532148361206055, + "rewards/margins": 14.192298889160156, + "rewards/rejected": -20.445514678955078, + "step": 20660 + }, + { + "epoch": 1.23, + "learning_rate": 3.6671034163128594e-06, + "logits/chosen": -2.408173084259033, + "logits/rejected": -1.6831672191619873, + "logps/chosen": -707.9718017578125, + "logps/rejected": -2005.025634765625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.401788234710693, + "rewards/margins": 13.1932373046875, + "rewards/rejected": -19.595027923583984, + "step": 20670 + }, + { + "epoch": 1.23, + "learning_rate": 3.665569179691762e-06, + "logits/chosen": -2.5145297050476074, + "logits/rejected": -1.824671983718872, + "logps/chosen": -676.7529907226562, + "logps/rejected": -2107.651611328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.046151161193848, + "rewards/margins": 14.593416213989258, + "rewards/rejected": -20.63956642150879, + "step": 20680 + }, + { + "epoch": 1.23, + "learning_rate": 3.6640343819656765e-06, + "logits/chosen": -2.4992661476135254, + "logits/rejected": -1.747283935546875, + "logps/chosen": -676.99267578125, + "logps/rejected": -2084.810546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.066755771636963, + "rewards/margins": 14.351968765258789, + "rewards/rejected": -20.41872215270996, + "step": 20690 + }, + { + "epoch": 1.23, + "learning_rate": 3.662499023873454e-06, + "logits/chosen": -2.4843173027038574, + "logits/rejected": -1.8976541757583618, + "logps/chosen": -687.6077880859375, + "logps/rejected": -2108.32421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.213799953460693, + "rewards/margins": 14.418657302856445, + "rewards/rejected": -20.63245391845703, + "step": 20700 + }, + { + "epoch": 1.23, + "learning_rate": 3.6609631061542167e-06, + "logits/chosen": -2.4406116008758545, + "logits/rejected": -1.7040554285049438, + "logps/chosen": -689.0453491210938, + "logps/rejected": -2089.630615234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.13143253326416, + "rewards/margins": 14.329890251159668, + "rewards/rejected": -20.461322784423828, + "step": 20710 + }, + { + "epoch": 1.24, + "learning_rate": 3.659426629547355e-06, + "logits/chosen": -2.456800937652588, + "logits/rejected": -1.7517706155776978, + "logps/chosen": -698.29052734375, + "logps/rejected": -2195.347412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.282711505889893, + "rewards/margins": 15.229995727539062, + "rewards/rejected": -21.512704849243164, + "step": 20720 + }, + { + "epoch": 1.24, + "learning_rate": 3.657889594792528e-06, + "logits/chosen": -2.4849791526794434, + "logits/rejected": -1.7175337076187134, + "logps/chosen": -694.3587646484375, + "logps/rejected": -2109.540283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.189963340759277, + "rewards/margins": 14.466740608215332, + "rewards/rejected": -20.65670394897461, + "step": 20730 + }, + { + "epoch": 1.24, + "learning_rate": 3.6563520026296652e-06, + "logits/chosen": -2.4880306720733643, + "logits/rejected": -1.748125433921814, + "logps/chosen": -681.92919921875, + "logps/rejected": -2144.51318359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.10265588760376, + "rewards/margins": 14.898979187011719, + "rewards/rejected": -21.001636505126953, + "step": 20740 + }, + { + "epoch": 1.24, + "learning_rate": 3.654813853798963e-06, + "logits/chosen": -2.4754929542541504, + "logits/rejected": -1.8221591711044312, + "logps/chosen": -707.025146484375, + "logps/rejected": -2102.070556640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.36374044418335, + "rewards/margins": 14.208064079284668, + "rewards/rejected": -20.57180404663086, + "step": 20750 + }, + { + "epoch": 1.24, + "learning_rate": 3.653275149040887e-06, + "logits/chosen": -2.5213348865509033, + "logits/rejected": -1.8826122283935547, + "logps/chosen": -680.316650390625, + "logps/rejected": -2065.49658203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.141587257385254, + "rewards/margins": 14.078985214233398, + "rewards/rejected": -20.220569610595703, + "step": 20760 + }, + { + "epoch": 1.24, + "learning_rate": 3.651735889096168e-06, + "logits/chosen": -2.479457378387451, + "logits/rejected": -1.6938304901123047, + "logps/chosen": -678.1740112304688, + "logps/rejected": -2156.16943359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.08205509185791, + "rewards/margins": 15.026997566223145, + "rewards/rejected": -21.109052658081055, + "step": 20770 + }, + { + "epoch": 1.24, + "learning_rate": 3.650196074705807e-06, + "logits/chosen": -2.4668941497802734, + "logits/rejected": -1.8444026708602905, + "logps/chosen": -682.8187255859375, + "logps/rejected": -2190.101806640625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.132690906524658, + "rewards/margins": 15.323724746704102, + "rewards/rejected": -21.456417083740234, + "step": 20780 + }, + { + "epoch": 1.24, + "learning_rate": 3.6486557066110694e-06, + "logits/chosen": -2.412271499633789, + "logits/rejected": -1.585246205329895, + "logps/chosen": -817.2189331054688, + "logps/rejected": -2153.4755859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.491541385650635, + "rewards/margins": 13.592976570129395, + "rewards/rejected": -21.084518432617188, + "step": 20790 + }, + { + "epoch": 1.24, + "learning_rate": 3.6471147855534907e-06, + "logits/chosen": -2.422679901123047, + "logits/rejected": -1.5299392938613892, + "logps/chosen": -787.6778564453125, + "logps/rejected": -2167.410888671875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.154283046722412, + "rewards/margins": 14.079950332641602, + "rewards/rejected": -21.23423194885254, + "step": 20800 + }, + { + "epoch": 1.24, + "learning_rate": 3.6455733122748677e-06, + "logits/chosen": -2.470364570617676, + "logits/rejected": -1.59609055519104, + "logps/chosen": -751.57177734375, + "logps/rejected": -2259.604248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.823506832122803, + "rewards/margins": 15.3096284866333, + "rewards/rejected": -22.133134841918945, + "step": 20810 + }, + { + "epoch": 1.24, + "learning_rate": 3.644031287517267e-06, + "logits/chosen": -2.472994565963745, + "logits/rejected": -1.6659599542617798, + "logps/chosen": -735.2820434570312, + "logps/rejected": -2213.89306640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6775312423706055, + "rewards/margins": 15.024894714355469, + "rewards/rejected": -21.70242691040039, + "step": 20820 + }, + { + "epoch": 1.24, + "learning_rate": 3.6424887120230202e-06, + "logits/chosen": -2.4433536529541016, + "logits/rejected": -1.7132898569107056, + "logps/chosen": -705.6458740234375, + "logps/rejected": -2187.11865234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.410492897033691, + "rewards/margins": 15.008404731750488, + "rewards/rejected": -21.418895721435547, + "step": 20830 + }, + { + "epoch": 1.24, + "learning_rate": 3.6409455865347214e-06, + "logits/chosen": -2.5055129528045654, + "logits/rejected": -1.731720209121704, + "logps/chosen": -726.8396606445312, + "logps/rejected": -2297.735595703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.521996974945068, + "rewards/margins": 16.00748634338379, + "rewards/rejected": -22.529481887817383, + "step": 20840 + }, + { + "epoch": 1.24, + "learning_rate": 3.639401911795232e-06, + "logits/chosen": -2.454561233520508, + "logits/rejected": -1.7025566101074219, + "logps/chosen": -717.2439575195312, + "logps/rejected": -2237.553955078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.520376682281494, + "rewards/margins": 15.413125991821289, + "rewards/rejected": -21.933500289916992, + "step": 20850 + }, + { + "epoch": 1.24, + "learning_rate": 3.6378576885476756e-06, + "logits/chosen": -2.4555790424346924, + "logits/rejected": -1.5655767917633057, + "logps/chosen": -726.5053100585938, + "logps/rejected": -2208.334716796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.492929935455322, + "rewards/margins": 15.1435546875, + "rewards/rejected": -21.636486053466797, + "step": 20860 + }, + { + "epoch": 1.24, + "learning_rate": 3.636312917535444e-06, + "logits/chosen": -2.446092128753662, + "logits/rejected": -1.7255109548568726, + "logps/chosen": -729.3865966796875, + "logps/rejected": -2139.46533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6073737144470215, + "rewards/margins": 14.336313247680664, + "rewards/rejected": -20.94368553161621, + "step": 20870 + }, + { + "epoch": 1.25, + "learning_rate": 3.6347675995021874e-06, + "logits/chosen": -2.425011157989502, + "logits/rejected": -1.6506309509277344, + "logps/chosen": -736.1517944335938, + "logps/rejected": -2133.299560546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.604236602783203, + "rewards/margins": 14.290321350097656, + "rewards/rejected": -20.89455795288086, + "step": 20880 + }, + { + "epoch": 1.25, + "learning_rate": 3.6332217351918233e-06, + "logits/chosen": -2.4474692344665527, + "logits/rejected": -1.774534821510315, + "logps/chosen": -739.7838134765625, + "logps/rejected": -2083.779052734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.721421718597412, + "rewards/margins": 13.662416458129883, + "rewards/rejected": -20.383838653564453, + "step": 20890 + }, + { + "epoch": 1.25, + "learning_rate": 3.63167532534853e-06, + "logits/chosen": -2.516420841217041, + "logits/rejected": -1.6279466152191162, + "logps/chosen": -741.2396240234375, + "logps/rejected": -2037.9840087890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6679229736328125, + "rewards/margins": 13.264866828918457, + "rewards/rejected": -19.932788848876953, + "step": 20900 + }, + { + "epoch": 1.25, + "learning_rate": 3.6301283707167495e-06, + "logits/chosen": -2.434250831604004, + "logits/rejected": -1.691577672958374, + "logps/chosen": -741.7247314453125, + "logps/rejected": -2167.69140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.699967861175537, + "rewards/margins": 14.536752700805664, + "rewards/rejected": -21.23672103881836, + "step": 20910 + }, + { + "epoch": 1.25, + "learning_rate": 3.6285808720411856e-06, + "logits/chosen": -2.482849597930908, + "logits/rejected": -1.672562837600708, + "logps/chosen": -714.7093505859375, + "logps/rejected": -2199.061767578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3877949714660645, + "rewards/margins": 15.153948783874512, + "rewards/rejected": -21.5417423248291, + "step": 20920 + }, + { + "epoch": 1.25, + "learning_rate": 3.6270328300668055e-06, + "logits/chosen": -2.4620463848114014, + "logits/rejected": -1.4450523853302002, + "logps/chosen": -724.5155029296875, + "logps/rejected": -2126.53759765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5006422996521, + "rewards/margins": 14.318803787231445, + "rewards/rejected": -20.819446563720703, + "step": 20930 + }, + { + "epoch": 1.25, + "learning_rate": 3.6254842455388347e-06, + "logits/chosen": -2.4494175910949707, + "logits/rejected": -1.6283330917358398, + "logps/chosen": -726.3651733398438, + "logps/rejected": -2171.459228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.505451202392578, + "rewards/margins": 14.771087646484375, + "rewards/rejected": -21.276538848876953, + "step": 20940 + }, + { + "epoch": 1.25, + "learning_rate": 3.6239351192027616e-06, + "logits/chosen": -2.4249844551086426, + "logits/rejected": -1.6074130535125732, + "logps/chosen": -727.5736083984375, + "logps/rejected": -2145.417724609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.613343715667725, + "rewards/margins": 14.38514518737793, + "rewards/rejected": -20.998489379882812, + "step": 20950 + }, + { + "epoch": 1.25, + "learning_rate": 3.622385451804337e-06, + "logits/chosen": -2.420029640197754, + "logits/rejected": -1.5557714700698853, + "logps/chosen": -738.0623779296875, + "logps/rejected": -2102.94189453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.633828163146973, + "rewards/margins": 13.953872680664062, + "rewards/rejected": -20.58770179748535, + "step": 20960 + }, + { + "epoch": 1.25, + "learning_rate": 3.6208352440895704e-06, + "logits/chosen": -2.466874837875366, + "logits/rejected": -1.6336870193481445, + "logps/chosen": -724.2703857421875, + "logps/rejected": -2199.95458984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5008673667907715, + "rewards/margins": 15.03534984588623, + "rewards/rejected": -21.536218643188477, + "step": 20970 + }, + { + "epoch": 1.25, + "learning_rate": 3.619284496804731e-06, + "logits/chosen": -2.519918918609619, + "logits/rejected": -1.7429912090301514, + "logps/chosen": -734.0985717773438, + "logps/rejected": -2191.677734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6025872230529785, + "rewards/margins": 14.864239692687988, + "rewards/rejected": -21.466829299926758, + "step": 20980 + }, + { + "epoch": 1.25, + "learning_rate": 3.61773321069635e-06, + "logits/chosen": -2.4939072132110596, + "logits/rejected": -1.7662982940673828, + "logps/chosen": -708.3992919921875, + "logps/rejected": -2130.6220703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.399084568023682, + "rewards/margins": 14.44725227355957, + "rewards/rejected": -20.846338272094727, + "step": 20990 + }, + { + "epoch": 1.25, + "learning_rate": 3.6161813865112155e-06, + "logits/chosen": -2.42710542678833, + "logits/rejected": -1.6042293310165405, + "logps/chosen": -725.4501342773438, + "logps/rejected": -2133.883056640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.567335605621338, + "rewards/margins": 14.334434509277344, + "rewards/rejected": -20.90176773071289, + "step": 21000 + }, + { + "epoch": 1.25, + "eval_logits/chosen": -2.429273843765259, + "eval_logits/rejected": -2.0264174938201904, + "eval_logps/chosen": -695.5377197265625, + "eval_logps/rejected": -1973.51513671875, + "eval_loss": 5.566523032030091e-05, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": -6.284632682800293, + "eval_rewards/margins": 12.995035171508789, + "eval_rewards/rejected": -19.279666900634766, + "eval_runtime": 3.9014, + "eval_samples_per_second": 1.282, + "eval_steps_per_second": 0.256, + "step": 21000 + }, + { + "epoch": 1.25, + "learning_rate": 3.6146290249963758e-06, + "logits/chosen": -2.514305353164673, + "logits/rejected": -1.7401418685913086, + "logps/chosen": -631.6317749023438, + "logps/rejected": -2216.654296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.597524166107178, + "rewards/margins": 16.110492706298828, + "rewards/rejected": -21.708017349243164, + "step": 21010 + }, + { + "epoch": 1.25, + "learning_rate": 3.6130761268991367e-06, + "logits/chosen": -2.5170412063598633, + "logits/rejected": -1.7861757278442383, + "logps/chosen": -620.9783325195312, + "logps/rejected": -1911.967529296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.483581066131592, + "rewards/margins": 13.210233688354492, + "rewards/rejected": -18.69381332397461, + "step": 21020 + }, + { + "epoch": 1.25, + "learning_rate": 3.611522692967065e-06, + "logits/chosen": -2.513805866241455, + "logits/rejected": -1.884523630142212, + "logps/chosen": -602.3370361328125, + "logps/rejected": -1960.3101806640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.332019805908203, + "rewards/margins": 13.831326484680176, + "rewards/rejected": -19.16334342956543, + "step": 21030 + }, + { + "epoch": 1.25, + "learning_rate": 3.609968723947983e-06, + "logits/chosen": -2.489393711090088, + "logits/rejected": -1.7424297332763672, + "logps/chosen": -591.4859008789062, + "logps/rejected": -2025.855224609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.192417621612549, + "rewards/margins": 14.62555980682373, + "rewards/rejected": -19.817974090576172, + "step": 21040 + }, + { + "epoch": 1.26, + "learning_rate": 3.608414220589972e-06, + "logits/chosen": -2.5238869190216064, + "logits/rejected": -1.9023336172103882, + "logps/chosen": -596.3215942382812, + "logps/rejected": -1963.0657958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2571516036987305, + "rewards/margins": 13.936464309692383, + "rewards/rejected": -19.193614959716797, + "step": 21050 + }, + { + "epoch": 1.26, + "learning_rate": 3.6068591836413687e-06, + "logits/chosen": -2.490185499191284, + "logits/rejected": -1.7834551334381104, + "logps/chosen": -603.3471069335938, + "logps/rejected": -2006.552490234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.310717582702637, + "rewards/margins": 14.312942504882812, + "rewards/rejected": -19.623661041259766, + "step": 21060 + }, + { + "epoch": 1.26, + "learning_rate": 3.605303613850768e-06, + "logits/chosen": -2.4666080474853516, + "logits/rejected": -1.7728939056396484, + "logps/chosen": -616.7235107421875, + "logps/rejected": -2032.9417724609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.493735313415527, + "rewards/margins": 14.391496658325195, + "rewards/rejected": -19.88523292541504, + "step": 21070 + }, + { + "epoch": 1.26, + "learning_rate": 3.6037475119670228e-06, + "logits/chosen": -2.470466136932373, + "logits/rejected": -1.7680575847625732, + "logps/chosen": -629.8743896484375, + "logps/rejected": -2012.2230224609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.618157386779785, + "rewards/margins": 14.060831069946289, + "rewards/rejected": -19.678985595703125, + "step": 21080 + }, + { + "epoch": 1.26, + "learning_rate": 3.602190878739239e-06, + "logits/chosen": -2.4677486419677734, + "logits/rejected": -1.7098267078399658, + "logps/chosen": -623.0912475585938, + "logps/rejected": -2075.862548828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.485490322113037, + "rewards/margins": 14.833086013793945, + "rewards/rejected": -20.31857681274414, + "step": 21090 + }, + { + "epoch": 1.26, + "learning_rate": 3.6006337149167798e-06, + "logits/chosen": -2.497671604156494, + "logits/rejected": -1.700360894203186, + "logps/chosen": -628.538818359375, + "logps/rejected": -2144.21435546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.569694519042969, + "rewards/margins": 15.427713394165039, + "rewards/rejected": -20.997406005859375, + "step": 21100 + }, + { + "epoch": 1.26, + "learning_rate": 3.5990760212492654e-06, + "logits/chosen": -2.448495388031006, + "logits/rejected": -1.6695770025253296, + "logps/chosen": -685.2747802734375, + "logps/rejected": -2122.378173828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.092088222503662, + "rewards/margins": 14.689336776733398, + "rewards/rejected": -20.78142738342285, + "step": 21110 + }, + { + "epoch": 1.26, + "learning_rate": 3.5975177984865673e-06, + "logits/chosen": -2.535414695739746, + "logits/rejected": -1.9017093181610107, + "logps/chosen": -580.9510498046875, + "logps/rejected": -2040.114990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.141242980957031, + "rewards/margins": 14.816377639770508, + "rewards/rejected": -19.957618713378906, + "step": 21120 + }, + { + "epoch": 1.26, + "learning_rate": 3.5959590473788153e-06, + "logits/chosen": -2.548466205596924, + "logits/rejected": -1.8243532180786133, + "logps/chosen": -560.7193603515625, + "logps/rejected": -1944.0279541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.915360450744629, + "rewards/margins": 14.080775260925293, + "rewards/rejected": -18.996135711669922, + "step": 21130 + }, + { + "epoch": 1.26, + "learning_rate": 3.594399768676392e-06, + "logits/chosen": -2.552002191543579, + "logits/rejected": -1.9150972366333008, + "logps/chosen": -565.1198120117188, + "logps/rejected": -1881.9713134765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.954471588134766, + "rewards/margins": 13.427462577819824, + "rewards/rejected": -18.381933212280273, + "step": 21140 + }, + { + "epoch": 1.26, + "learning_rate": 3.592839963129934e-06, + "logits/chosen": -2.5349223613739014, + "logits/rejected": -1.7797346115112305, + "logps/chosen": -560.50634765625, + "logps/rejected": -1947.1732177734375, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.866715431213379, + "rewards/margins": 14.161288261413574, + "rewards/rejected": -19.028003692626953, + "step": 21150 + }, + { + "epoch": 1.26, + "learning_rate": 3.5912796314903314e-06, + "logits/chosen": -2.4852821826934814, + "logits/rejected": -1.7157974243164062, + "logps/chosen": -609.7518310546875, + "logps/rejected": -1889.7470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.393662452697754, + "rewards/margins": 13.06396198272705, + "rewards/rejected": -18.457622528076172, + "step": 21160 + }, + { + "epoch": 1.26, + "learning_rate": 3.5897187745087282e-06, + "logits/chosen": -2.495877981185913, + "logits/rejected": -1.6502430438995361, + "logps/chosen": -612.7188720703125, + "logps/rejected": -2056.68701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.41024112701416, + "rewards/margins": 14.70646858215332, + "rewards/rejected": -20.116710662841797, + "step": 21170 + }, + { + "epoch": 1.26, + "learning_rate": 3.588157392936521e-06, + "logits/chosen": -2.521859645843506, + "logits/rejected": -1.80938720703125, + "logps/chosen": -617.1654052734375, + "logps/rejected": -1956.566162109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.458057880401611, + "rewards/margins": 13.672389030456543, + "rewards/rejected": -19.13044548034668, + "step": 21180 + }, + { + "epoch": 1.26, + "learning_rate": 3.5865954875253586e-06, + "logits/chosen": -2.5012972354888916, + "logits/rejected": -1.7145286798477173, + "logps/chosen": -613.40283203125, + "logps/rejected": -1880.7730712890625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4084577560424805, + "rewards/margins": 12.96649169921875, + "rewards/rejected": -18.374950408935547, + "step": 21190 + }, + { + "epoch": 1.26, + "learning_rate": 3.5850330590271433e-06, + "logits/chosen": -2.5295252799987793, + "logits/rejected": -1.8283491134643555, + "logps/chosen": -638.0762939453125, + "logps/rejected": -2095.95703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.673929691314697, + "rewards/margins": 14.831954956054688, + "rewards/rejected": -20.50588607788086, + "step": 21200 + }, + { + "epoch": 1.26, + "learning_rate": 3.583470108194026e-06, + "logits/chosen": -2.478515625, + "logits/rejected": -1.7181427478790283, + "logps/chosen": -651.3512573242188, + "logps/rejected": -2017.12890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.779629707336426, + "rewards/margins": 13.944623947143555, + "rewards/rejected": -19.724254608154297, + "step": 21210 + }, + { + "epoch": 1.27, + "learning_rate": 3.5819066357784127e-06, + "logits/chosen": -2.4857566356658936, + "logits/rejected": -1.7222270965576172, + "logps/chosen": -650.5392456054688, + "logps/rejected": -1951.137939453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.741454124450684, + "rewards/margins": 13.332399368286133, + "rewards/rejected": -19.073848724365234, + "step": 21220 + }, + { + "epoch": 1.27, + "learning_rate": 3.580342642532959e-06, + "logits/chosen": -2.4658689498901367, + "logits/rejected": -1.757494330406189, + "logps/chosen": -636.5446166992188, + "logps/rejected": -2036.270263671875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.646054267883301, + "rewards/margins": 14.27424430847168, + "rewards/rejected": -19.92030143737793, + "step": 21230 + }, + { + "epoch": 1.27, + "learning_rate": 3.5787781292105704e-06, + "logits/chosen": -2.4690022468566895, + "logits/rejected": -1.6877222061157227, + "logps/chosen": -605.8209838867188, + "logps/rejected": -1962.4974365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.357986927032471, + "rewards/margins": 13.839436531066895, + "rewards/rejected": -19.19742202758789, + "step": 21240 + }, + { + "epoch": 1.27, + "learning_rate": 3.5772130965644046e-06, + "logits/chosen": -2.515326499938965, + "logits/rejected": -1.7361444234848022, + "logps/chosen": -588.761962890625, + "logps/rejected": -2003.291015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.146796226501465, + "rewards/margins": 14.438896179199219, + "rewards/rejected": -19.585689544677734, + "step": 21250 + }, + { + "epoch": 1.27, + "learning_rate": 3.5756475453478677e-06, + "logits/chosen": -2.490269422531128, + "logits/rejected": -1.6516516208648682, + "logps/chosen": -612.7196044921875, + "logps/rejected": -2064.92138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3775129318237305, + "rewards/margins": 14.830899238586426, + "rewards/rejected": -20.208410263061523, + "step": 21260 + }, + { + "epoch": 1.27, + "learning_rate": 3.5740814763146164e-06, + "logits/chosen": -2.5134780406951904, + "logits/rejected": -1.6897766590118408, + "logps/chosen": -604.3936767578125, + "logps/rejected": -1983.6298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.35338830947876, + "rewards/margins": 14.049871444702148, + "rewards/rejected": -19.40325927734375, + "step": 21270 + }, + { + "epoch": 1.27, + "learning_rate": 3.572514890218556e-06, + "logits/chosen": -2.5203697681427, + "logits/rejected": -1.6718181371688843, + "logps/chosen": -602.0690307617188, + "logps/rejected": -2037.7562255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.336389064788818, + "rewards/margins": 14.5853271484375, + "rewards/rejected": -19.921716690063477, + "step": 21280 + }, + { + "epoch": 1.27, + "learning_rate": 3.570947787813841e-06, + "logits/chosen": -2.487020254135132, + "logits/rejected": -1.7430976629257202, + "logps/chosen": -593.3908081054688, + "logps/rejected": -2056.58154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.22043514251709, + "rewards/margins": 14.905171394348145, + "rewards/rejected": -20.1256046295166, + "step": 21290 + }, + { + "epoch": 1.27, + "learning_rate": 3.569380169854875e-06, + "logits/chosen": -2.488032817840576, + "logits/rejected": -1.632168173789978, + "logps/chosen": -611.8372192382812, + "logps/rejected": -1999.085205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4370598793029785, + "rewards/margins": 14.111448287963867, + "rewards/rejected": -19.548507690429688, + "step": 21300 + }, + { + "epoch": 1.27, + "learning_rate": 3.5678120370963084e-06, + "logits/chosen": -2.5183680057525635, + "logits/rejected": -1.8025617599487305, + "logps/chosen": -597.0517578125, + "logps/rejected": -1930.069091796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.250010967254639, + "rewards/margins": 13.616002082824707, + "rewards/rejected": -18.86601448059082, + "step": 21310 + }, + { + "epoch": 1.27, + "learning_rate": 3.5662433902930403e-06, + "logits/chosen": -2.442584276199341, + "logits/rejected": -1.6706193685531616, + "logps/chosen": -618.8317260742188, + "logps/rejected": -1982.1165771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.470541954040527, + "rewards/margins": 13.907827377319336, + "rewards/rejected": -19.378368377685547, + "step": 21320 + }, + { + "epoch": 1.27, + "learning_rate": 3.5646742302002185e-06, + "logits/chosen": -2.4887428283691406, + "logits/rejected": -1.6971962451934814, + "logps/chosen": -602.1279296875, + "logps/rejected": -1964.0687255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.355284214019775, + "rewards/margins": 13.83500862121582, + "rewards/rejected": -19.190292358398438, + "step": 21330 + }, + { + "epoch": 1.27, + "learning_rate": 3.563104557573236e-06, + "logits/chosen": -2.487840175628662, + "logits/rejected": -1.8235365152359009, + "logps/chosen": -580.7733154296875, + "logps/rejected": -2014.584228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.101648807525635, + "rewards/margins": 14.609840393066406, + "rewards/rejected": -19.711490631103516, + "step": 21340 + }, + { + "epoch": 1.27, + "learning_rate": 3.561534373167732e-06, + "logits/chosen": -2.4788708686828613, + "logits/rejected": -1.5825953483581543, + "logps/chosen": -616.3478393554688, + "logps/rejected": -2048.65234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.419474124908447, + "rewards/margins": 14.60319709777832, + "rewards/rejected": -20.02267074584961, + "step": 21350 + }, + { + "epoch": 1.27, + "learning_rate": 3.5599636777395954e-06, + "logits/chosen": -2.4959380626678467, + "logits/rejected": -1.7919471263885498, + "logps/chosen": -602.7192993164062, + "logps/rejected": -1959.619873046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.341959476470947, + "rewards/margins": 13.8114595413208, + "rewards/rejected": -19.153419494628906, + "step": 21360 + }, + { + "epoch": 1.27, + "learning_rate": 3.5583924720449574e-06, + "logits/chosen": -2.4493470191955566, + "logits/rejected": -1.7387266159057617, + "logps/chosen": -618.1738891601562, + "logps/rejected": -2023.214111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.465683937072754, + "rewards/margins": 14.320589065551758, + "rewards/rejected": -19.786273956298828, + "step": 21370 + }, + { + "epoch": 1.27, + "learning_rate": 3.5568207568401973e-06, + "logits/chosen": -2.486210346221924, + "logits/rejected": -1.7481534481048584, + "logps/chosen": -607.80126953125, + "logps/rejected": -2006.5657958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3803911209106445, + "rewards/margins": 14.247230529785156, + "rewards/rejected": -19.627620697021484, + "step": 21380 + }, + { + "epoch": 1.28, + "learning_rate": 3.555248532881938e-06, + "logits/chosen": -2.5146591663360596, + "logits/rejected": -1.6977474689483643, + "logps/chosen": -591.3853149414062, + "logps/rejected": -1988.006103515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1755051612854, + "rewards/margins": 14.262504577636719, + "rewards/rejected": -19.43800926208496, + "step": 21390 + }, + { + "epoch": 1.28, + "learning_rate": 3.553675800927048e-06, + "logits/chosen": -2.4551687240600586, + "logits/rejected": -1.6877771615982056, + "logps/chosen": -608.7168579101562, + "logps/rejected": -1949.022216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.419816017150879, + "rewards/margins": 13.639951705932617, + "rewards/rejected": -19.059768676757812, + "step": 21400 + }, + { + "epoch": 1.28, + "learning_rate": 3.5521025617326417e-06, + "logits/chosen": -2.4608747959136963, + "logits/rejected": -1.7299420833587646, + "logps/chosen": -609.2739868164062, + "logps/rejected": -2001.3521728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.369372844696045, + "rewards/margins": 14.196691513061523, + "rewards/rejected": -19.56606674194336, + "step": 21410 + }, + { + "epoch": 1.28, + "learning_rate": 3.5505288160560745e-06, + "logits/chosen": -2.478466272354126, + "logits/rejected": -1.704193353652954, + "logps/chosen": -594.9210815429688, + "logps/rejected": -1999.275390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.255639553070068, + "rewards/margins": 14.279073715209961, + "rewards/rejected": -19.534713745117188, + "step": 21420 + }, + { + "epoch": 1.28, + "learning_rate": 3.5489545646549493e-06, + "logits/chosen": -2.4669811725616455, + "logits/rejected": -1.5950911045074463, + "logps/chosen": -607.9227905273438, + "logps/rejected": -2009.942138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.379992485046387, + "rewards/margins": 14.260841369628906, + "rewards/rejected": -19.64083480834961, + "step": 21430 + }, + { + "epoch": 1.28, + "learning_rate": 3.5473798082871104e-06, + "logits/chosen": -2.4986166954040527, + "logits/rejected": -1.7576913833618164, + "logps/chosen": -615.1521606445312, + "logps/rejected": -1960.003173828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.417405605316162, + "rewards/margins": 13.735610961914062, + "rewards/rejected": -19.15301513671875, + "step": 21440 + }, + { + "epoch": 1.28, + "learning_rate": 3.545804547710645e-06, + "logits/chosen": -2.5485997200012207, + "logits/rejected": -1.818245530128479, + "logps/chosen": -626.73779296875, + "logps/rejected": -1915.455810546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.582411289215088, + "rewards/margins": 13.131564140319824, + "rewards/rejected": -18.713977813720703, + "step": 21450 + }, + { + "epoch": 1.28, + "learning_rate": 3.5442287836838836e-06, + "logits/chosen": -2.482678174972534, + "logits/rejected": -1.7725378274917603, + "logps/chosen": -602.3047485351562, + "logps/rejected": -1913.274169921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.334011077880859, + "rewards/margins": 13.367243766784668, + "rewards/rejected": -18.70125389099121, + "step": 21460 + }, + { + "epoch": 1.28, + "learning_rate": 3.542652516965399e-06, + "logits/chosen": -2.429887056350708, + "logits/rejected": -1.6835758686065674, + "logps/chosen": -615.8826904296875, + "logps/rejected": -2084.634521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.47912073135376, + "rewards/margins": 14.928762435913086, + "rewards/rejected": -20.407886505126953, + "step": 21470 + }, + { + "epoch": 1.28, + "learning_rate": 3.5410757483140057e-06, + "logits/chosen": -2.531876564025879, + "logits/rejected": -1.8681939840316772, + "logps/chosen": -605.0841674804688, + "logps/rejected": -1913.543212890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.355047702789307, + "rewards/margins": 13.343737602233887, + "rewards/rejected": -18.698787689208984, + "step": 21480 + }, + { + "epoch": 1.28, + "learning_rate": 3.539498478488761e-06, + "logits/chosen": -2.468790292739868, + "logits/rejected": -1.7156394720077515, + "logps/chosen": -617.9779052734375, + "logps/rejected": -1969.9342041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.457930564880371, + "rewards/margins": 13.790555953979492, + "rewards/rejected": -19.24848747253418, + "step": 21490 + }, + { + "epoch": 1.28, + "learning_rate": 3.5379207082489614e-06, + "logits/chosen": -2.4229488372802734, + "logits/rejected": -1.7582178115844727, + "logps/chosen": -604.6818237304688, + "logps/rejected": -1953.2474365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.296062469482422, + "rewards/margins": 13.803260803222656, + "rewards/rejected": -19.099323272705078, + "step": 21500 + }, + { + "epoch": 1.28, + "learning_rate": 3.5363424383541465e-06, + "logits/chosen": -2.509929895401001, + "logits/rejected": -1.7872483730316162, + "logps/chosen": -605.3284301757812, + "logps/rejected": -1942.843017578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3707170486450195, + "rewards/margins": 13.614112854003906, + "rewards/rejected": -18.98482894897461, + "step": 21510 + }, + { + "epoch": 1.28, + "learning_rate": 3.5347636695640946e-06, + "logits/chosen": -2.4600162506103516, + "logits/rejected": -1.7402681112289429, + "logps/chosen": -600.1130981445312, + "logps/rejected": -1994.5250244140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3255934715271, + "rewards/margins": 14.16047191619873, + "rewards/rejected": -19.486064910888672, + "step": 21520 + }, + { + "epoch": 1.28, + "learning_rate": 3.5331844026388258e-06, + "logits/chosen": -2.465712785720825, + "logits/rejected": -1.6099964380264282, + "logps/chosen": -626.1910400390625, + "logps/rejected": -1896.82421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.433286666870117, + "rewards/margins": 13.096705436706543, + "rewards/rejected": -18.529993057250977, + "step": 21530 + }, + { + "epoch": 1.28, + "learning_rate": 3.5316046383385983e-06, + "logits/chosen": -2.4844346046447754, + "logits/rejected": -1.6433908939361572, + "logps/chosen": -620.7142944335938, + "logps/rejected": -2040.514892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.428653717041016, + "rewards/margins": 14.537397384643555, + "rewards/rejected": -19.96605110168457, + "step": 21540 + }, + { + "epoch": 1.29, + "learning_rate": 3.530024377423912e-06, + "logits/chosen": -2.5438289642333984, + "logits/rejected": -1.6116530895233154, + "logps/chosen": -621.6603393554688, + "logps/rejected": -2018.3577880859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.505305290222168, + "rewards/margins": 14.235536575317383, + "rewards/rejected": -19.740840911865234, + "step": 21550 + }, + { + "epoch": 1.29, + "learning_rate": 3.528443620655502e-06, + "logits/chosen": -2.5055434703826904, + "logits/rejected": -1.7532317638397217, + "logps/chosen": -617.9520263671875, + "logps/rejected": -2071.67578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4363226890563965, + "rewards/margins": 14.83204460144043, + "rewards/rejected": -20.268367767333984, + "step": 21560 + }, + { + "epoch": 1.29, + "learning_rate": 3.526862368794347e-06, + "logits/chosen": -2.4360508918762207, + "logits/rejected": -1.6198307275772095, + "logps/chosen": -618.7401123046875, + "logps/rejected": -1976.96484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.407491207122803, + "rewards/margins": 13.92060661315918, + "rewards/rejected": -19.328096389770508, + "step": 21570 + }, + { + "epoch": 1.29, + "learning_rate": 3.5252806226016612e-06, + "logits/chosen": -2.471069574356079, + "logits/rejected": -1.6139262914657593, + "logps/chosen": -614.47314453125, + "logps/rejected": -1954.8154296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.415159702301025, + "rewards/margins": 13.704744338989258, + "rewards/rejected": -19.119903564453125, + "step": 21580 + }, + { + "epoch": 1.29, + "learning_rate": 3.523698382838896e-06, + "logits/chosen": -2.507568120956421, + "logits/rejected": -1.82930588722229, + "logps/chosen": -614.035400390625, + "logps/rejected": -1945.619873046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.429479598999023, + "rewards/margins": 13.589604377746582, + "rewards/rejected": -19.019084930419922, + "step": 21590 + }, + { + "epoch": 1.29, + "learning_rate": 3.522115650267743e-06, + "logits/chosen": -2.432840347290039, + "logits/rejected": -1.7645504474639893, + "logps/chosen": -601.7796630859375, + "logps/rejected": -2040.706787109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.296938419342041, + "rewards/margins": 14.668472290039062, + "rewards/rejected": -19.965412139892578, + "step": 21600 + }, + { + "epoch": 1.29, + "learning_rate": 3.5205324256501285e-06, + "logits/chosen": -2.4773707389831543, + "logits/rejected": -1.7266902923583984, + "logps/chosen": -597.7127685546875, + "logps/rejected": -1986.8638916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.255453586578369, + "rewards/margins": 14.162862777709961, + "rewards/rejected": -19.418315887451172, + "step": 21610 + }, + { + "epoch": 1.29, + "learning_rate": 3.518948709748217e-06, + "logits/chosen": -2.419682264328003, + "logits/rejected": -1.6081256866455078, + "logps/chosen": -623.2051391601562, + "logps/rejected": -2066.27490234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.519444942474365, + "rewards/margins": 14.697102546691895, + "rewards/rejected": -20.216548919677734, + "step": 21620 + }, + { + "epoch": 1.29, + "learning_rate": 3.5173645033244103e-06, + "logits/chosen": -2.4731409549713135, + "logits/rejected": -1.7948799133300781, + "logps/chosen": -620.6326293945312, + "logps/rejected": -2094.9150390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.481078147888184, + "rewards/margins": 15.031265258789062, + "rewards/rejected": -20.51234245300293, + "step": 21630 + }, + { + "epoch": 1.29, + "learning_rate": 3.5157798071413452e-06, + "logits/chosen": -2.4647164344787598, + "logits/rejected": -1.7921628952026367, + "logps/chosen": -616.2389526367188, + "logps/rejected": -1973.233154296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.465694427490234, + "rewards/margins": 13.825384140014648, + "rewards/rejected": -19.291080474853516, + "step": 21640 + }, + { + "epoch": 1.29, + "learning_rate": 3.5141946219618928e-06, + "logits/chosen": -2.466055154800415, + "logits/rejected": -1.647521734237671, + "logps/chosen": -601.3466186523438, + "logps/rejected": -1972.0982666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.333405494689941, + "rewards/margins": 13.941200256347656, + "rewards/rejected": -19.274606704711914, + "step": 21650 + }, + { + "epoch": 1.29, + "learning_rate": 3.5126089485491627e-06, + "logits/chosen": -2.451770305633545, + "logits/rejected": -1.6413968801498413, + "logps/chosen": -611.9031372070312, + "logps/rejected": -2054.0556640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4122748374938965, + "rewards/margins": 14.69556999206543, + "rewards/rejected": -20.107845306396484, + "step": 21660 + }, + { + "epoch": 1.29, + "learning_rate": 3.5110227876664977e-06, + "logits/chosen": -2.5428543090820312, + "logits/rejected": -1.7631118297576904, + "logps/chosen": -603.2133178710938, + "logps/rejected": -1983.0006103515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.305502414703369, + "rewards/margins": 14.079052925109863, + "rewards/rejected": -19.38455581665039, + "step": 21670 + }, + { + "epoch": 1.29, + "learning_rate": 3.5094361400774756e-06, + "logits/chosen": -2.48779559135437, + "logits/rejected": -1.8233932256698608, + "logps/chosen": -605.1459350585938, + "logps/rejected": -1998.015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.339136600494385, + "rewards/margins": 14.205533027648926, + "rewards/rejected": -19.5446720123291, + "step": 21680 + }, + { + "epoch": 1.29, + "learning_rate": 3.5078490065459083e-06, + "logits/chosen": -2.529968738555908, + "logits/rejected": -1.836866021156311, + "logps/chosen": -617.603515625, + "logps/rejected": -2060.56640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.391180992126465, + "rewards/margins": 14.770368576049805, + "rewards/rejected": -20.16155242919922, + "step": 21690 + }, + { + "epoch": 1.29, + "learning_rate": 3.506261387835842e-06, + "logits/chosen": -2.4674768447875977, + "logits/rejected": -1.7002454996109009, + "logps/chosen": -595.8067016601562, + "logps/rejected": -2033.349365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3350114822387695, + "rewards/margins": 14.567095756530762, + "rewards/rejected": -19.902109146118164, + "step": 21700 + }, + { + "epoch": 1.29, + "learning_rate": 3.5046732847115565e-06, + "logits/chosen": -2.4659173488616943, + "logits/rejected": -1.6102313995361328, + "logps/chosen": -613.7962036132812, + "logps/rejected": -2065.6728515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.383681297302246, + "rewards/margins": 14.822418212890625, + "rewards/rejected": -20.206096649169922, + "step": 21710 + }, + { + "epoch": 1.3, + "learning_rate": 3.503084697937565e-06, + "logits/chosen": -2.4851298332214355, + "logits/rejected": -1.7398601770401, + "logps/chosen": -626.9296264648438, + "logps/rejected": -1961.398193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.552237510681152, + "rewards/margins": 13.637939453125, + "rewards/rejected": -19.19017791748047, + "step": 21720 + }, + { + "epoch": 1.3, + "learning_rate": 3.501495628278613e-06, + "logits/chosen": -2.4352924823760986, + "logits/rejected": -1.7566356658935547, + "logps/chosen": -614.1588134765625, + "logps/rejected": -2028.0081787109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.440464973449707, + "rewards/margins": 14.405982971191406, + "rewards/rejected": -19.846446990966797, + "step": 21730 + }, + { + "epoch": 1.3, + "learning_rate": 3.499906076499678e-06, + "logits/chosen": -2.497608184814453, + "logits/rejected": -1.7746717929840088, + "logps/chosen": -619.250732421875, + "logps/rejected": -1945.7193603515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.539491653442383, + "rewards/margins": 13.480783462524414, + "rewards/rejected": -19.020275115966797, + "step": 21740 + }, + { + "epoch": 1.3, + "learning_rate": 3.4983160433659702e-06, + "logits/chosen": -2.554208278656006, + "logits/rejected": -1.83584725856781, + "logps/chosen": -611.3408813476562, + "logps/rejected": -2050.238037109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.399575710296631, + "rewards/margins": 14.648820877075195, + "rewards/rejected": -20.048397064208984, + "step": 21750 + }, + { + "epoch": 1.3, + "learning_rate": 3.4967255296429315e-06, + "logits/chosen": -2.5557472705841064, + "logits/rejected": -1.8169481754302979, + "logps/chosen": -605.4384765625, + "logps/rejected": -2015.6314697265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.311775207519531, + "rewards/margins": 14.402791976928711, + "rewards/rejected": -19.714567184448242, + "step": 21760 + }, + { + "epoch": 1.3, + "learning_rate": 3.495134536096236e-06, + "logits/chosen": -2.419447660446167, + "logits/rejected": -1.6529115438461304, + "logps/chosen": -612.9288330078125, + "logps/rejected": -2003.553466796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.485176086425781, + "rewards/margins": 14.083253860473633, + "rewards/rejected": -19.568431854248047, + "step": 21770 + }, + { + "epoch": 1.3, + "learning_rate": 3.493543063491788e-06, + "logits/chosen": -2.4840126037597656, + "logits/rejected": -1.7451903820037842, + "logps/chosen": -607.338623046875, + "logps/rejected": -1991.9224853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.387466907501221, + "rewards/margins": 14.095609664916992, + "rewards/rejected": -19.48307991027832, + "step": 21780 + }, + { + "epoch": 1.3, + "learning_rate": 3.4919511125957207e-06, + "logits/chosen": -2.537621021270752, + "logits/rejected": -1.804608941078186, + "logps/chosen": -612.5257568359375, + "logps/rejected": -2021.732177734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.404144287109375, + "rewards/margins": 14.361419677734375, + "rewards/rejected": -19.76556396484375, + "step": 21790 + }, + { + "epoch": 1.3, + "learning_rate": 3.490358684174401e-06, + "logits/chosen": -2.5062525272369385, + "logits/rejected": -1.6373249292373657, + "logps/chosen": -621.6224365234375, + "logps/rejected": -2050.4072265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.526928424835205, + "rewards/margins": 14.537103652954102, + "rewards/rejected": -20.06403160095215, + "step": 21800 + }, + { + "epoch": 1.3, + "learning_rate": 3.4887657789944236e-06, + "logits/chosen": -2.4224298000335693, + "logits/rejected": -1.6774356365203857, + "logps/chosen": -628.6041870117188, + "logps/rejected": -1988.806884765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6277689933776855, + "rewards/margins": 13.838030815124512, + "rewards/rejected": -19.46579933166504, + "step": 21810 + }, + { + "epoch": 1.3, + "learning_rate": 3.487172397822613e-06, + "logits/chosen": -2.450843334197998, + "logits/rejected": -1.691632628440857, + "logps/chosen": -623.0457153320312, + "logps/rejected": -1986.003173828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.525817394256592, + "rewards/margins": 13.890678405761719, + "rewards/rejected": -19.41649627685547, + "step": 21820 + }, + { + "epoch": 1.3, + "learning_rate": 3.4855785414260225e-06, + "logits/chosen": -2.5178489685058594, + "logits/rejected": -1.8130912780761719, + "logps/chosen": -599.7755737304688, + "logps/rejected": -1975.031982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.239758491516113, + "rewards/margins": 14.07331657409668, + "rewards/rejected": -19.31307601928711, + "step": 21830 + }, + { + "epoch": 1.3, + "learning_rate": 3.4839842105719346e-06, + "logits/chosen": -2.5268688201904297, + "logits/rejected": -1.827558159828186, + "logps/chosen": -600.2457885742188, + "logps/rejected": -1955.4879150390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.296050071716309, + "rewards/margins": 13.816554069519043, + "rewards/rejected": -19.11260414123535, + "step": 21840 + }, + { + "epoch": 1.3, + "learning_rate": 3.48238940602786e-06, + "logits/chosen": -2.4914493560791016, + "logits/rejected": -1.6913440227508545, + "logps/chosen": -607.0951538085938, + "logps/rejected": -1964.175537109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.387154579162598, + "rewards/margins": 13.807073593139648, + "rewards/rejected": -19.194225311279297, + "step": 21850 + }, + { + "epoch": 1.3, + "learning_rate": 3.480794128561538e-06, + "logits/chosen": -2.5247490406036377, + "logits/rejected": -1.769840955734253, + "logps/chosen": -610.2566528320312, + "logps/rejected": -2052.892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.372275352478027, + "rewards/margins": 14.710378646850586, + "rewards/rejected": -20.082653045654297, + "step": 21860 + }, + { + "epoch": 1.3, + "learning_rate": 3.4791983789409358e-06, + "logits/chosen": -2.5115880966186523, + "logits/rejected": -1.7921546697616577, + "logps/chosen": -613.1264038085938, + "logps/rejected": -2035.996826171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.42748498916626, + "rewards/margins": 14.474966049194336, + "rewards/rejected": -19.902450561523438, + "step": 21870 + }, + { + "epoch": 1.3, + "learning_rate": 3.4776021579342456e-06, + "logits/chosen": -2.5090112686157227, + "logits/rejected": -1.718409538269043, + "logps/chosen": -594.0076293945312, + "logps/rejected": -2110.121826171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.258066177368164, + "rewards/margins": 15.402490615844727, + "rewards/rejected": -20.66055679321289, + "step": 21880 + }, + { + "epoch": 1.31, + "learning_rate": 3.476005466309889e-06, + "logits/chosen": -2.502563953399658, + "logits/rejected": -1.6544535160064697, + "logps/chosen": -603.8450317382812, + "logps/rejected": -2041.498779296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.388652324676514, + "rewards/margins": 14.576518058776855, + "rewards/rejected": -19.965173721313477, + "step": 21890 + }, + { + "epoch": 1.31, + "learning_rate": 3.474408304836514e-06, + "logits/chosen": -2.3745877742767334, + "logits/rejected": -1.5056684017181396, + "logps/chosen": -715.9598388671875, + "logps/rejected": -2028.020263671875, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.391561031341553, + "rewards/margins": 13.427022933959961, + "rewards/rejected": -19.818584442138672, + "step": 21900 + }, + { + "epoch": 1.31, + "learning_rate": 3.4728106742829937e-06, + "logits/chosen": -2.283843755722046, + "logits/rejected": -1.457067608833313, + "logps/chosen": -867.86767578125, + "logps/rejected": -2283.5576171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.956930637359619, + "rewards/margins": 14.446734428405762, + "rewards/rejected": -22.403667449951172, + "step": 21910 + }, + { + "epoch": 1.31, + "learning_rate": 3.471212575418426e-06, + "logits/chosen": -2.326528549194336, + "logits/rejected": -1.5250182151794434, + "logps/chosen": -865.4011840820312, + "logps/rejected": -2307.13232421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.927760124206543, + "rewards/margins": 14.697015762329102, + "rewards/rejected": -22.62477684020996, + "step": 21920 + }, + { + "epoch": 1.31, + "learning_rate": 3.4696140090121377e-06, + "logits/chosen": -2.343761920928955, + "logits/rejected": -1.4252017736434937, + "logps/chosen": -885.4998168945312, + "logps/rejected": -2415.77197265625, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.140348434448242, + "rewards/margins": 15.555252075195312, + "rewards/rejected": -23.695598602294922, + "step": 21930 + }, + { + "epoch": 1.31, + "learning_rate": 3.4680149758336777e-06, + "logits/chosen": -2.3134641647338867, + "logits/rejected": -1.513756513595581, + "logps/chosen": -1021.12548828125, + "logps/rejected": -2378.44775390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.561820983886719, + "rewards/margins": 13.780973434448242, + "rewards/rejected": -23.342792510986328, + "step": 21940 + }, + { + "epoch": 1.31, + "learning_rate": 3.4664154766528208e-06, + "logits/chosen": -2.3854198455810547, + "logits/rejected": -1.6946290731430054, + "logps/chosen": -1067.85595703125, + "logps/rejected": -2516.2021484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.95546817779541, + "rewards/margins": 14.775651931762695, + "rewards/rejected": -24.73111915588379, + "step": 21950 + }, + { + "epoch": 1.31, + "learning_rate": 3.4648155122395653e-06, + "logits/chosen": -2.3653149604797363, + "logits/rejected": -1.4719345569610596, + "logps/chosen": -1098.5645751953125, + "logps/rejected": -2702.47802734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.187387466430664, + "rewards/margins": 16.391735076904297, + "rewards/rejected": -26.579120635986328, + "step": 21960 + }, + { + "epoch": 1.31, + "learning_rate": 3.4632150833641347e-06, + "logits/chosen": -2.320006847381592, + "logits/rejected": -1.3857637643814087, + "logps/chosen": -1130.782958984375, + "logps/rejected": -2633.400634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.661131858825684, + "rewards/margins": 15.229782104492188, + "rewards/rejected": -25.890911102294922, + "step": 21970 + }, + { + "epoch": 1.31, + "learning_rate": 3.461614190796975e-06, + "logits/chosen": -2.3392319679260254, + "logits/rejected": -1.5992765426635742, + "logps/chosen": -1088.506103515625, + "logps/rejected": -2669.176025390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.160543441772461, + "rewards/margins": 16.079248428344727, + "rewards/rejected": -26.239791870117188, + "step": 21980 + }, + { + "epoch": 1.31, + "learning_rate": 3.460012835308757e-06, + "logits/chosen": -2.393123149871826, + "logits/rejected": -1.6325000524520874, + "logps/chosen": -1076.937744140625, + "logps/rejected": -2779.777587890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -9.990219116210938, + "rewards/margins": 17.367389678955078, + "rewards/rejected": -27.35761070251465, + "step": 21990 + }, + { + "epoch": 1.31, + "learning_rate": 3.458411017670372e-06, + "logits/chosen": -2.2869620323181152, + "logits/rejected": -1.4693018198013306, + "logps/chosen": -1175.9638671875, + "logps/rejected": -2670.00341796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -11.044012069702148, + "rewards/margins": 15.20768928527832, + "rewards/rejected": -26.251699447631836, + "step": 22000 + }, + { + "epoch": 1.31, + "learning_rate": 3.4568087386529347e-06, + "logits/chosen": -2.3615169525146484, + "logits/rejected": -1.6581144332885742, + "logps/chosen": -1114.3067626953125, + "logps/rejected": -2693.06787109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -10.450422286987305, + "rewards/margins": 16.030899047851562, + "rewards/rejected": -26.4813232421875, + "step": 22010 + }, + { + "epoch": 1.31, + "learning_rate": 3.455205999027783e-06, + "logits/chosen": -2.428009510040283, + "logits/rejected": -1.571897268295288, + "logps/chosen": -967.4236450195312, + "logps/rejected": -2540.9111328125, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.955453872680664, + "rewards/margins": 16.020816802978516, + "rewards/rejected": -24.97627067565918, + "step": 22020 + }, + { + "epoch": 1.31, + "learning_rate": 3.4536027995664757e-06, + "logits/chosen": -2.4910871982574463, + "logits/rejected": -1.8584468364715576, + "logps/chosen": -562.7604370117188, + "logps/rejected": -1943.6937255859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.91495418548584, + "rewards/margins": 14.075393676757812, + "rewards/rejected": -18.990346908569336, + "step": 22030 + }, + { + "epoch": 1.31, + "learning_rate": 3.4519991410407926e-06, + "logits/chosen": -2.5808005332946777, + "logits/rejected": -2.0159969329833984, + "logps/chosen": -511.8912658691406, + "logps/rejected": -1832.2396240234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.377809524536133, + "rewards/margins": 13.487892150878906, + "rewards/rejected": -17.865703582763672, + "step": 22040 + }, + { + "epoch": 1.31, + "learning_rate": 3.4503950242227356e-06, + "logits/chosen": -2.547402858734131, + "logits/rejected": -1.9800662994384766, + "logps/chosen": -537.0470581054688, + "logps/rejected": -1749.2945556640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.608798980712891, + "rewards/margins": 12.423788070678711, + "rewards/rejected": -17.032588958740234, + "step": 22050 + }, + { + "epoch": 1.32, + "learning_rate": 3.448790449884526e-06, + "logits/chosen": -2.5164008140563965, + "logits/rejected": -2.0088746547698975, + "logps/chosen": -532.2553100585938, + "logps/rejected": -1823.7720947265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.589027404785156, + "rewards/margins": 13.212834358215332, + "rewards/rejected": -17.801862716674805, + "step": 22060 + }, + { + "epoch": 1.32, + "learning_rate": 3.447185418798606e-06, + "logits/chosen": -2.539248466491699, + "logits/rejected": -2.049926280975342, + "logps/chosen": -511.32470703125, + "logps/rejected": -1833.703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.399058818817139, + "rewards/margins": 13.493232727050781, + "rewards/rejected": -17.892292022705078, + "step": 22070 + }, + { + "epoch": 1.32, + "learning_rate": 3.445579931737637e-06, + "logits/chosen": -2.564631700515747, + "logits/rejected": -1.9497419595718384, + "logps/chosen": -513.2742919921875, + "logps/rejected": -1822.7919921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.479794979095459, + "rewards/margins": 13.307212829589844, + "rewards/rejected": -17.78700828552246, + "step": 22080 + }, + { + "epoch": 1.32, + "learning_rate": 3.443973989474502e-06, + "logits/chosen": -2.5289528369903564, + "logits/rejected": -2.047959327697754, + "logps/chosen": -515.53173828125, + "logps/rejected": -1827.957763671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.439715385437012, + "rewards/margins": 13.389378547668457, + "rewards/rejected": -17.829092025756836, + "step": 22090 + }, + { + "epoch": 1.32, + "learning_rate": 3.4423675927822998e-06, + "logits/chosen": -2.5419414043426514, + "logits/rejected": -1.9536861181259155, + "logps/chosen": -520.2196044921875, + "logps/rejected": -1790.612548828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.505190372467041, + "rewards/margins": 12.961027145385742, + "rewards/rejected": -17.466217041015625, + "step": 22100 + }, + { + "epoch": 1.32, + "learning_rate": 3.44076074243435e-06, + "logits/chosen": -2.560105800628662, + "logits/rejected": -1.9460971355438232, + "logps/chosen": -537.2964477539062, + "logps/rejected": -1778.5855712890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.673906326293945, + "rewards/margins": 12.662272453308105, + "rewards/rejected": -17.336177825927734, + "step": 22110 + }, + { + "epoch": 1.32, + "learning_rate": 3.439153439204191e-06, + "logits/chosen": -2.5657927989959717, + "logits/rejected": -1.9897305965423584, + "logps/chosen": -532.2156982421875, + "logps/rejected": -1846.257080078125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.56626033782959, + "rewards/margins": 13.449617385864258, + "rewards/rejected": -18.015878677368164, + "step": 22120 + }, + { + "epoch": 1.32, + "learning_rate": 3.4375456838655787e-06, + "logits/chosen": -2.52215838432312, + "logits/rejected": -1.9647130966186523, + "logps/chosen": -584.7631225585938, + "logps/rejected": -1828.5777587890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.156736850738525, + "rewards/margins": 12.704373359680176, + "rewards/rejected": -17.86111068725586, + "step": 22130 + }, + { + "epoch": 1.32, + "learning_rate": 3.435937477192486e-06, + "logits/chosen": -2.5120506286621094, + "logits/rejected": -1.9265079498291016, + "logps/chosen": -583.3860473632812, + "logps/rejected": -1815.4830322265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.121037483215332, + "rewards/margins": 12.590047836303711, + "rewards/rejected": -17.71108627319336, + "step": 22140 + }, + { + "epoch": 1.32, + "learning_rate": 3.434328819959103e-06, + "logits/chosen": -2.4952847957611084, + "logits/rejected": -1.837354302406311, + "logps/chosen": -575.6815795898438, + "logps/rejected": -1916.977783203125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.066859245300293, + "rewards/margins": 13.664449691772461, + "rewards/rejected": -18.73130989074707, + "step": 22150 + }, + { + "epoch": 1.32, + "learning_rate": 3.4327197129398387e-06, + "logits/chosen": -2.520498752593994, + "logits/rejected": -1.982235312461853, + "logps/chosen": -584.510009765625, + "logps/rejected": -1932.8294677734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.129602909088135, + "rewards/margins": 13.753324508666992, + "rewards/rejected": -18.8829288482666, + "step": 22160 + }, + { + "epoch": 1.32, + "learning_rate": 3.431110156909316e-06, + "logits/chosen": -2.512807607650757, + "logits/rejected": -1.8174922466278076, + "logps/chosen": -610.3758544921875, + "logps/rejected": -1975.3365478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.393955707550049, + "rewards/margins": 13.925890922546387, + "rewards/rejected": -19.319847106933594, + "step": 22170 + }, + { + "epoch": 1.32, + "learning_rate": 3.429500152642377e-06, + "logits/chosen": -2.5159645080566406, + "logits/rejected": -1.9309337139129639, + "logps/chosen": -597.3361206054688, + "logps/rejected": -1883.749267578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2516188621521, + "rewards/margins": 13.1434907913208, + "rewards/rejected": -18.39510726928711, + "step": 22180 + }, + { + "epoch": 1.32, + "learning_rate": 3.4278897009140753e-06, + "logits/chosen": -2.5349364280700684, + "logits/rejected": -1.8099874258041382, + "logps/chosen": -595.7646484375, + "logps/rejected": -1944.132568359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.23374605178833, + "rewards/margins": 13.754465103149414, + "rewards/rejected": -18.988210678100586, + "step": 22190 + }, + { + "epoch": 1.32, + "learning_rate": 3.4262788024996835e-06, + "logits/chosen": -2.500765323638916, + "logits/rejected": -1.908652663230896, + "logps/chosen": -593.0830078125, + "logps/rejected": -1940.660400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.203087329864502, + "rewards/margins": 13.733325004577637, + "rewards/rejected": -18.936412811279297, + "step": 22200 + }, + { + "epoch": 1.32, + "learning_rate": 3.424667458174688e-06, + "logits/chosen": -2.5042152404785156, + "logits/rejected": -1.8966968059539795, + "logps/chosen": -608.6760864257812, + "logps/rejected": -1896.243896484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.382842063903809, + "rewards/margins": 13.13733196258545, + "rewards/rejected": -18.52017593383789, + "step": 22210 + }, + { + "epoch": 1.32, + "learning_rate": 3.42305566871479e-06, + "logits/chosen": -2.5187156200408936, + "logits/rejected": -1.8639503717422485, + "logps/chosen": -597.6773681640625, + "logps/rejected": -1915.393310546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.234400749206543, + "rewards/margins": 13.477679252624512, + "rewards/rejected": -18.712078094482422, + "step": 22220 + }, + { + "epoch": 1.33, + "learning_rate": 3.421443434895905e-06, + "logits/chosen": -2.5577380657196045, + "logits/rejected": -1.962664008140564, + "logps/chosen": -598.7108764648438, + "logps/rejected": -1944.08203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.247099876403809, + "rewards/margins": 13.746187210083008, + "rewards/rejected": -18.993288040161133, + "step": 22230 + }, + { + "epoch": 1.33, + "learning_rate": 3.419830757494162e-06, + "logits/chosen": -2.474839210510254, + "logits/rejected": -1.8155937194824219, + "logps/chosen": -595.62744140625, + "logps/rejected": -1931.419921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.261076927185059, + "rewards/margins": 13.615216255187988, + "rewards/rejected": -18.876291275024414, + "step": 22240 + }, + { + "epoch": 1.33, + "learning_rate": 3.4182176372859038e-06, + "logits/chosen": -2.518916606903076, + "logits/rejected": -1.9643714427947998, + "logps/chosen": -597.262451171875, + "logps/rejected": -1877.1536865234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.215981483459473, + "rewards/margins": 13.112683296203613, + "rewards/rejected": -18.328662872314453, + "step": 22250 + }, + { + "epoch": 1.33, + "learning_rate": 3.4166040750476868e-06, + "logits/chosen": -2.5445070266723633, + "logits/rejected": -1.869652509689331, + "logps/chosen": -600.2974243164062, + "logps/rejected": -1969.031494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.279083251953125, + "rewards/margins": 13.965374946594238, + "rewards/rejected": -19.244457244873047, + "step": 22260 + }, + { + "epoch": 1.33, + "learning_rate": 3.41499007155628e-06, + "logits/chosen": -2.5034422874450684, + "logits/rejected": -1.8354122638702393, + "logps/chosen": -583.4475708007812, + "logps/rejected": -1875.4302978515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.164216041564941, + "rewards/margins": 13.159208297729492, + "rewards/rejected": -18.323421478271484, + "step": 22270 + }, + { + "epoch": 1.33, + "learning_rate": 3.413375627588664e-06, + "logits/chosen": -2.4966275691986084, + "logits/rejected": -1.8667709827423096, + "logps/chosen": -606.3138427734375, + "logps/rejected": -1858.933837890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.373301029205322, + "rewards/margins": 12.779576301574707, + "rewards/rejected": -18.152877807617188, + "step": 22280 + }, + { + "epoch": 1.33, + "learning_rate": 3.4117607439220336e-06, + "logits/chosen": -2.5391769409179688, + "logits/rejected": -1.9239721298217773, + "logps/chosen": -601.5701904296875, + "logps/rejected": -1964.1357421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.30482816696167, + "rewards/margins": 13.898022651672363, + "rewards/rejected": -19.20285415649414, + "step": 22290 + }, + { + "epoch": 1.33, + "learning_rate": 3.4101454213337916e-06, + "logits/chosen": -2.5204763412475586, + "logits/rejected": -1.8922297954559326, + "logps/chosen": -603.5986938476562, + "logps/rejected": -1996.2412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.330271244049072, + "rewards/margins": 14.17438793182373, + "rewards/rejected": -19.50465965270996, + "step": 22300 + }, + { + "epoch": 1.33, + "learning_rate": 3.4085296606015557e-06, + "logits/chosen": -2.4407219886779785, + "logits/rejected": -1.747958779335022, + "logps/chosen": -602.0638427734375, + "logps/rejected": -1893.7181396484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.316110134124756, + "rewards/margins": 13.175755500793457, + "rewards/rejected": -18.491865158081055, + "step": 22310 + }, + { + "epoch": 1.33, + "learning_rate": 3.406913462503153e-06, + "logits/chosen": -2.5251200199127197, + "logits/rejected": -1.8831008672714233, + "logps/chosen": -579.6535034179688, + "logps/rejected": -1964.338623046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1259918212890625, + "rewards/margins": 14.074946403503418, + "rewards/rejected": -19.200939178466797, + "step": 22320 + }, + { + "epoch": 1.33, + "learning_rate": 3.4052968278166205e-06, + "logits/chosen": -2.5044045448303223, + "logits/rejected": -1.8738094568252563, + "logps/chosen": -607.368896484375, + "logps/rejected": -1980.966064453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.381190776824951, + "rewards/margins": 14.001726150512695, + "rewards/rejected": -19.382915496826172, + "step": 22330 + }, + { + "epoch": 1.33, + "learning_rate": 3.4036797573202067e-06, + "logits/chosen": -2.561440944671631, + "logits/rejected": -2.0141408443450928, + "logps/chosen": -589.0967407226562, + "logps/rejected": -1930.8394775390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.232773780822754, + "rewards/margins": 13.63965129852295, + "rewards/rejected": -18.872426986694336, + "step": 22340 + }, + { + "epoch": 1.33, + "learning_rate": 3.40206225179237e-06, + "logits/chosen": -2.5241236686706543, + "logits/rejected": -1.9389584064483643, + "logps/chosen": -607.1156005859375, + "logps/rejected": -1828.9710693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.375097274780273, + "rewards/margins": 12.478185653686523, + "rewards/rejected": -17.853282928466797, + "step": 22350 + }, + { + "epoch": 1.33, + "learning_rate": 3.400444312011776e-06, + "logits/chosen": -2.520395517349243, + "logits/rejected": -1.8894901275634766, + "logps/chosen": -601.5864868164062, + "logps/rejected": -1915.418212890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.240748882293701, + "rewards/margins": 13.46288776397705, + "rewards/rejected": -18.70363426208496, + "step": 22360 + }, + { + "epoch": 1.33, + "learning_rate": 3.3988259387573023e-06, + "logits/chosen": -2.475290060043335, + "logits/rejected": -1.777512788772583, + "logps/chosen": -584.8731689453125, + "logps/rejected": -1842.4365234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.165062427520752, + "rewards/margins": 12.833447456359863, + "rewards/rejected": -17.99850845336914, + "step": 22370 + }, + { + "epoch": 1.33, + "learning_rate": 3.397207132808033e-06, + "logits/chosen": -2.5259907245635986, + "logits/rejected": -1.887205719947815, + "logps/chosen": -582.1580200195312, + "logps/rejected": -1958.7369384765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0909318923950195, + "rewards/margins": 14.058549880981445, + "rewards/rejected": -19.14948272705078, + "step": 22380 + }, + { + "epoch": 1.34, + "learning_rate": 3.3955878949432615e-06, + "logits/chosen": -2.5071897506713867, + "logits/rejected": -1.8793230056762695, + "logps/chosen": -599.4298706054688, + "logps/rejected": -1982.49609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.259951114654541, + "rewards/margins": 14.11108684539795, + "rewards/rejected": -19.37103843688965, + "step": 22390 + }, + { + "epoch": 1.34, + "learning_rate": 3.3939682259424883e-06, + "logits/chosen": -2.487229824066162, + "logits/rejected": -1.9480682611465454, + "logps/chosen": -588.1074829101562, + "logps/rejected": -1868.3138427734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.186502933502197, + "rewards/margins": 13.068082809448242, + "rewards/rejected": -18.254589080810547, + "step": 22400 + }, + { + "epoch": 1.34, + "learning_rate": 3.3923481265854226e-06, + "logits/chosen": -2.4897308349609375, + "logits/rejected": -1.7957165241241455, + "logps/chosen": -599.3485717773438, + "logps/rejected": -1886.524169921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.317178726196289, + "rewards/margins": 13.107671737670898, + "rewards/rejected": -18.424850463867188, + "step": 22410 + }, + { + "epoch": 1.34, + "learning_rate": 3.3907275976519804e-06, + "logits/chosen": -2.5034241676330566, + "logits/rejected": -1.8244603872299194, + "logps/chosen": -620.5010986328125, + "logps/rejected": -1966.380615234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4704766273498535, + "rewards/margins": 13.742815017700195, + "rewards/rejected": -19.21329116821289, + "step": 22420 + }, + { + "epoch": 1.34, + "learning_rate": 3.3891066399222837e-06, + "logits/chosen": -2.4898664951324463, + "logits/rejected": -1.7883447408676147, + "logps/chosen": -612.3502197265625, + "logps/rejected": -1901.940673828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.472476959228516, + "rewards/margins": 13.111745834350586, + "rewards/rejected": -18.5842227935791, + "step": 22430 + }, + { + "epoch": 1.34, + "learning_rate": 3.387485254176663e-06, + "logits/chosen": -2.51589298248291, + "logits/rejected": -1.9805259704589844, + "logps/chosen": -609.3010864257812, + "logps/rejected": -1931.507080078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.39852237701416, + "rewards/margins": 13.48047924041748, + "rewards/rejected": -18.87900161743164, + "step": 22440 + }, + { + "epoch": 1.34, + "learning_rate": 3.385863441195651e-06, + "logits/chosen": -2.4754631519317627, + "logits/rejected": -1.8865827322006226, + "logps/chosen": -610.5914916992188, + "logps/rejected": -1933.209716796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.423667907714844, + "rewards/margins": 13.45867919921875, + "rewards/rejected": -18.882347106933594, + "step": 22450 + }, + { + "epoch": 1.34, + "learning_rate": 3.38424120175999e-06, + "logits/chosen": -2.5029168128967285, + "logits/rejected": -1.922237753868103, + "logps/chosen": -611.0115966796875, + "logps/rejected": -1926.5521240234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.413222312927246, + "rewards/margins": 13.400491714477539, + "rewards/rejected": -18.81371307373047, + "step": 22460 + }, + { + "epoch": 1.34, + "learning_rate": 3.382618536650626e-06, + "logits/chosen": -2.4574761390686035, + "logits/rejected": -1.751721978187561, + "logps/chosen": -636.8297119140625, + "logps/rejected": -1900.836181640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.646429061889648, + "rewards/margins": 12.929913520812988, + "rewards/rejected": -18.576339721679688, + "step": 22470 + }, + { + "epoch": 1.34, + "learning_rate": 3.380995446648709e-06, + "logits/chosen": -2.4980902671813965, + "logits/rejected": -1.9127938747406006, + "logps/chosen": -608.8382568359375, + "logps/rejected": -1933.572509765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.380022048950195, + "rewards/margins": 13.513345718383789, + "rewards/rejected": -18.893367767333984, + "step": 22480 + }, + { + "epoch": 1.34, + "learning_rate": 3.3793719325355963e-06, + "logits/chosen": -2.533022403717041, + "logits/rejected": -1.9287598133087158, + "logps/chosen": -602.447021484375, + "logps/rejected": -2032.7281494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.315526008605957, + "rewards/margins": 14.562182426452637, + "rewards/rejected": -19.87770652770996, + "step": 22490 + }, + { + "epoch": 1.34, + "learning_rate": 3.377747995092846e-06, + "logits/chosen": -2.5370125770568848, + "logits/rejected": -1.8935184478759766, + "logps/chosen": -591.0535278320312, + "logps/rejected": -1940.8970947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.167373180389404, + "rewards/margins": 13.790980339050293, + "rewards/rejected": -18.95835304260254, + "step": 22500 + }, + { + "epoch": 1.34, + "learning_rate": 3.376123635102222e-06, + "logits/chosen": -2.527818202972412, + "logits/rejected": -1.8150901794433594, + "logps/chosen": -613.5214233398438, + "logps/rejected": -1972.248046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4556169509887695, + "rewards/margins": 13.82043170928955, + "rewards/rejected": -19.276050567626953, + "step": 22510 + }, + { + "epoch": 1.34, + "learning_rate": 3.374498853345691e-06, + "logits/chosen": -2.4975011348724365, + "logits/rejected": -1.8668711185455322, + "logps/chosen": -593.5352783203125, + "logps/rejected": -1948.4410400390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.287064552307129, + "rewards/margins": 13.758868217468262, + "rewards/rejected": -19.045930862426758, + "step": 22520 + }, + { + "epoch": 1.34, + "learning_rate": 3.3728736506054234e-06, + "logits/chosen": -2.5557358264923096, + "logits/rejected": -1.912031888961792, + "logps/chosen": -598.42333984375, + "logps/rejected": -1968.301513671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.224846839904785, + "rewards/margins": 14.006673812866211, + "rewards/rejected": -19.231521606445312, + "step": 22530 + }, + { + "epoch": 1.34, + "learning_rate": 3.3712480276637916e-06, + "logits/chosen": -2.4790709018707275, + "logits/rejected": -1.8188022375106812, + "logps/chosen": -605.2501220703125, + "logps/rejected": -1878.75390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3447418212890625, + "rewards/margins": 13.0072660446167, + "rewards/rejected": -18.352006912231445, + "step": 22540 + }, + { + "epoch": 1.34, + "learning_rate": 3.3696219853033703e-06, + "logits/chosen": -2.4940571784973145, + "logits/rejected": -1.8609809875488281, + "logps/chosen": -599.6315307617188, + "logps/rejected": -1959.5101318359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.313841819763184, + "rewards/margins": 13.839859008789062, + "rewards/rejected": -19.15369987487793, + "step": 22550 + }, + { + "epoch": 1.35, + "learning_rate": 3.3679955243069364e-06, + "logits/chosen": -2.5384902954101562, + "logits/rejected": -1.8686206340789795, + "logps/chosen": -622.38623046875, + "logps/rejected": -1903.764404296875, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.475009441375732, + "rewards/margins": 13.120462417602539, + "rewards/rejected": -18.59547233581543, + "step": 22560 + }, + { + "epoch": 1.35, + "learning_rate": 3.366368645457468e-06, + "logits/chosen": -2.4403975009918213, + "logits/rejected": -1.813977599143982, + "logps/chosen": -689.699462890625, + "logps/rejected": -2032.1259765625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.222969055175781, + "rewards/margins": 13.649436950683594, + "rewards/rejected": -19.87240982055664, + "step": 22570 + }, + { + "epoch": 1.35, + "learning_rate": 3.3647413495381447e-06, + "logits/chosen": -2.391469717025757, + "logits/rejected": -1.7641773223876953, + "logps/chosen": -732.8846435546875, + "logps/rejected": -2063.6904296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.638262748718262, + "rewards/margins": 13.556103706359863, + "rewards/rejected": -20.194366455078125, + "step": 22580 + }, + { + "epoch": 1.35, + "learning_rate": 3.3631136373323468e-06, + "logits/chosen": -2.4549431800842285, + "logits/rejected": -1.6008037328720093, + "logps/chosen": -676.3812255859375, + "logps/rejected": -2045.3994140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.115433692932129, + "rewards/margins": 13.901799201965332, + "rewards/rejected": -20.017230987548828, + "step": 22590 + }, + { + "epoch": 1.35, + "learning_rate": 3.3614855096236553e-06, + "logits/chosen": -2.3992438316345215, + "logits/rejected": -1.720537543296814, + "logps/chosen": -730.53662109375, + "logps/rejected": -2032.531005859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.614490509033203, + "rewards/margins": 13.259801864624023, + "rewards/rejected": -19.874292373657227, + "step": 22600 + }, + { + "epoch": 1.35, + "learning_rate": 3.35985696719585e-06, + "logits/chosen": -2.440725326538086, + "logits/rejected": -1.706229567527771, + "logps/chosen": -686.4691162109375, + "logps/rejected": -2029.8724365234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1280999183654785, + "rewards/margins": 13.732696533203125, + "rewards/rejected": -19.860797882080078, + "step": 22610 + }, + { + "epoch": 1.35, + "learning_rate": 3.3582280108329125e-06, + "logits/chosen": -2.433323621749878, + "logits/rejected": -1.6916835308074951, + "logps/chosen": -671.4424438476562, + "logps/rejected": -2025.6419677734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.01182222366333, + "rewards/margins": 13.79863452911377, + "rewards/rejected": -19.81045913696289, + "step": 22620 + }, + { + "epoch": 1.35, + "learning_rate": 3.3565986413190204e-06, + "logits/chosen": -2.389923572540283, + "logits/rejected": -1.6983144283294678, + "logps/chosen": -688.7901611328125, + "logps/rejected": -2005.873779296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.236837863922119, + "rewards/margins": 13.378305435180664, + "rewards/rejected": -19.615142822265625, + "step": 22630 + }, + { + "epoch": 1.35, + "learning_rate": 3.3549688594385537e-06, + "logits/chosen": -2.4184751510620117, + "logits/rejected": -1.6749861240386963, + "logps/chosen": -707.2222900390625, + "logps/rejected": -2058.02783203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.365988731384277, + "rewards/margins": 13.774714469909668, + "rewards/rejected": -20.140705108642578, + "step": 22640 + }, + { + "epoch": 1.35, + "learning_rate": 3.353338665976089e-06, + "logits/chosen": -2.46227765083313, + "logits/rejected": -1.695159673690796, + "logps/chosen": -665.6925048828125, + "logps/rejected": -2139.681640625, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.911202907562256, + "rewards/margins": 15.035222053527832, + "rewards/rejected": -20.946426391601562, + "step": 22650 + }, + { + "epoch": 1.35, + "learning_rate": 3.3517080617164018e-06, + "logits/chosen": -2.5489983558654785, + "logits/rejected": -2.0586156845092773, + "logps/chosen": -458.770263671875, + "logps/rejected": -1740.89453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8868212699890137, + "rewards/margins": 13.082051277160645, + "rewards/rejected": -16.968875885009766, + "step": 22660 + }, + { + "epoch": 1.35, + "learning_rate": 3.3500770474444638e-06, + "logits/chosen": -2.582979440689087, + "logits/rejected": -2.0129265785217285, + "logps/chosen": -419.575439453125, + "logps/rejected": -1545.311767578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.466975450515747, + "rewards/margins": 11.543838500976562, + "rewards/rejected": -15.010815620422363, + "step": 22670 + }, + { + "epoch": 1.35, + "learning_rate": 3.3484456239454467e-06, + "logits/chosen": -2.593841075897217, + "logits/rejected": -2.069803476333618, + "logps/chosen": -402.01788330078125, + "logps/rejected": -1532.594482421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.298365354537964, + "rewards/margins": 11.587916374206543, + "rewards/rejected": -14.88628101348877, + "step": 22680 + }, + { + "epoch": 1.35, + "learning_rate": 3.346813792004716e-06, + "logits/chosen": -2.5616939067840576, + "logits/rejected": -1.9938294887542725, + "logps/chosen": -407.5888366699219, + "logps/rejected": -1516.0806884765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3488528728485107, + "rewards/margins": 11.37429141998291, + "rewards/rejected": -14.72314453125, + "step": 22690 + }, + { + "epoch": 1.35, + "learning_rate": 3.345181552407838e-06, + "logits/chosen": -2.5494461059570312, + "logits/rejected": -2.0073952674865723, + "logps/chosen": -404.40753173828125, + "logps/rejected": -1533.806396484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3321521282196045, + "rewards/margins": 11.564738273620605, + "rewards/rejected": -14.896890640258789, + "step": 22700 + }, + { + "epoch": 1.35, + "learning_rate": 3.3435489059405713e-06, + "logits/chosen": -2.5566115379333496, + "logits/rejected": -2.0777273178100586, + "logps/chosen": -398.6656188964844, + "logps/rejected": -1485.893798828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3185482025146484, + "rewards/margins": 11.088773727416992, + "rewards/rejected": -14.407320976257324, + "step": 22710 + }, + { + "epoch": 1.35, + "learning_rate": 3.341915853388871e-06, + "logits/chosen": -2.4963901042938232, + "logits/rejected": -1.9390316009521484, + "logps/chosen": -404.6007995605469, + "logps/rejected": -1542.9842529296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3923981189727783, + "rewards/margins": 11.593132019042969, + "rewards/rejected": -14.985529899597168, + "step": 22720 + }, + { + "epoch": 1.36, + "learning_rate": 3.34028239553889e-06, + "logits/chosen": -2.5802125930786133, + "logits/rejected": -2.088883876800537, + "logps/chosen": -400.07391357421875, + "logps/rejected": -1512.74267578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3053154945373535, + "rewards/margins": 11.37558650970459, + "rewards/rejected": -14.680902481079102, + "step": 22730 + }, + { + "epoch": 1.36, + "learning_rate": 3.3386485331769747e-06, + "logits/chosen": -2.57755446434021, + "logits/rejected": -2.039813756942749, + "logps/chosen": -406.99676513671875, + "logps/rejected": -1559.750244140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3681740760803223, + "rewards/margins": 11.789924621582031, + "rewards/rejected": -15.158099174499512, + "step": 22740 + }, + { + "epoch": 1.36, + "learning_rate": 3.3370142670896662e-06, + "logits/chosen": -2.557349681854248, + "logits/rejected": -2.0839455127716064, + "logps/chosen": -404.72222900390625, + "logps/rejected": -1574.4097900390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.368408203125, + "rewards/margins": 11.943464279174805, + "rewards/rejected": -15.311871528625488, + "step": 22750 + }, + { + "epoch": 1.36, + "learning_rate": 3.3353795980637e-06, + "logits/chosen": -2.5570614337921143, + "logits/rejected": -2.0344185829162598, + "logps/chosen": -405.15460205078125, + "logps/rejected": -1535.818115234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.364269256591797, + "rewards/margins": 11.545223236083984, + "rewards/rejected": -14.909492492675781, + "step": 22760 + }, + { + "epoch": 1.36, + "learning_rate": 3.3337445268860065e-06, + "logits/chosen": -2.5759167671203613, + "logits/rejected": -2.087027072906494, + "logps/chosen": -426.28033447265625, + "logps/rejected": -1539.0758056640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6045451164245605, + "rewards/margins": 11.349283218383789, + "rewards/rejected": -14.953828811645508, + "step": 22770 + }, + { + "epoch": 1.36, + "learning_rate": 3.332109054343707e-06, + "logits/chosen": -2.548947334289551, + "logits/rejected": -2.034942626953125, + "logps/chosen": -426.5342712402344, + "logps/rejected": -1586.861083984375, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5433781147003174, + "rewards/margins": 11.876626968383789, + "rewards/rejected": -15.420005798339844, + "step": 22780 + }, + { + "epoch": 1.36, + "learning_rate": 3.330473181224121e-06, + "logits/chosen": -2.5692319869995117, + "logits/rejected": -2.0468571186065674, + "logps/chosen": -405.06634521484375, + "logps/rejected": -1547.07470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.338808536529541, + "rewards/margins": 11.694719314575195, + "rewards/rejected": -15.033526420593262, + "step": 22790 + }, + { + "epoch": 1.36, + "learning_rate": 3.328836908314755e-06, + "logits/chosen": -2.569683313369751, + "logits/rejected": -2.076676368713379, + "logps/chosen": -401.66241455078125, + "logps/rejected": -1611.8876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.298072099685669, + "rewards/margins": 12.372392654418945, + "rewards/rejected": -15.670463562011719, + "step": 22800 + }, + { + "epoch": 1.36, + "learning_rate": 3.3272002364033127e-06, + "logits/chosen": -2.57011079788208, + "logits/rejected": -2.030674457550049, + "logps/chosen": -411.11126708984375, + "logps/rejected": -1569.55126953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3573806285858154, + "rewards/margins": 11.881035804748535, + "rewards/rejected": -15.238415718078613, + "step": 22810 + }, + { + "epoch": 1.36, + "learning_rate": 3.3255631662776866e-06, + "logits/chosen": -2.5918712615966797, + "logits/rejected": -2.0578458309173584, + "logps/chosen": -393.88494873046875, + "logps/rejected": -1626.379638671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.254591464996338, + "rewards/margins": 12.571415901184082, + "rewards/rejected": -15.826006889343262, + "step": 22820 + }, + { + "epoch": 1.36, + "learning_rate": 3.3239256987259635e-06, + "logits/chosen": -2.5556657314300537, + "logits/rejected": -2.0023999214172363, + "logps/chosen": -403.67193603515625, + "logps/rejected": -1562.4368896484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.328594207763672, + "rewards/margins": 11.865789413452148, + "rewards/rejected": -15.194384574890137, + "step": 22830 + }, + { + "epoch": 1.36, + "learning_rate": 3.3222878345364184e-06, + "logits/chosen": -2.5748887062072754, + "logits/rejected": -2.023359775543213, + "logps/chosen": -398.8675842285156, + "logps/rejected": -1597.2496337890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3207614421844482, + "rewards/margins": 12.2044677734375, + "rewards/rejected": -15.525232315063477, + "step": 22840 + }, + { + "epoch": 1.36, + "learning_rate": 3.320649574497521e-06, + "logits/chosen": -2.57901668548584, + "logits/rejected": -2.111314296722412, + "logps/chosen": -398.6393127441406, + "logps/rejected": -1584.567626953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.328023910522461, + "rewards/margins": 12.074630737304688, + "rewards/rejected": -15.402656555175781, + "step": 22850 + }, + { + "epoch": 1.36, + "learning_rate": 3.319010919397929e-06, + "logits/chosen": -2.5648560523986816, + "logits/rejected": -2.0813069343566895, + "logps/chosen": -407.1086730957031, + "logps/rejected": -1589.248779296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3837027549743652, + "rewards/margins": 12.067493438720703, + "rewards/rejected": -15.451194763183594, + "step": 22860 + }, + { + "epoch": 1.36, + "learning_rate": 3.3173718700264906e-06, + "logits/chosen": -2.5644783973693848, + "logits/rejected": -2.0471348762512207, + "logps/chosen": -402.4000549316406, + "logps/rejected": -1541.121826171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3235363960266113, + "rewards/margins": 11.652020454406738, + "rewards/rejected": -14.975555419921875, + "step": 22870 + }, + { + "epoch": 1.36, + "learning_rate": 3.315732427172244e-06, + "logits/chosen": -2.5630040168762207, + "logits/rejected": -2.057568073272705, + "logps/chosen": -406.6698303222656, + "logps/rejected": -1576.476806640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.373198986053467, + "rewards/margins": 11.954028129577637, + "rewards/rejected": -15.327229499816895, + "step": 22880 + }, + { + "epoch": 1.36, + "learning_rate": 3.3140925916244184e-06, + "logits/chosen": -2.5678887367248535, + "logits/rejected": -2.057999849319458, + "logps/chosen": -394.22967529296875, + "logps/rejected": -1634.439208984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2604598999023438, + "rewards/margins": 12.629770278930664, + "rewards/rejected": -15.890230178833008, + "step": 22890 + }, + { + "epoch": 1.37, + "learning_rate": 3.3124523641724293e-06, + "logits/chosen": -2.5761334896087646, + "logits/rejected": -1.9541776180267334, + "logps/chosen": -397.0384521484375, + "logps/rejected": -1644.365966796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2867228984832764, + "rewards/margins": 12.719038009643555, + "rewards/rejected": -16.005762100219727, + "step": 22900 + }, + { + "epoch": 1.37, + "learning_rate": 3.310811745605882e-06, + "logits/chosen": -2.551801919937134, + "logits/rejected": -2.034160614013672, + "logps/chosen": -403.4352111816406, + "logps/rejected": -1565.8551025390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.339865207672119, + "rewards/margins": 11.877331733703613, + "rewards/rejected": -15.217196464538574, + "step": 22910 + }, + { + "epoch": 1.37, + "learning_rate": 3.3091707367145707e-06, + "logits/chosen": -2.5935099124908447, + "logits/rejected": -2.0102970600128174, + "logps/chosen": -414.6192321777344, + "logps/rejected": -1607.3880615234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.4109625816345215, + "rewards/margins": 12.220705032348633, + "rewards/rejected": -15.631668090820312, + "step": 22920 + }, + { + "epoch": 1.37, + "learning_rate": 3.307529338288477e-06, + "logits/chosen": -2.546821117401123, + "logits/rejected": -1.9935051202774048, + "logps/chosen": -426.716064453125, + "logps/rejected": -1563.915283203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.6279029846191406, + "rewards/margins": 11.574747085571289, + "rewards/rejected": -15.202649116516113, + "step": 22930 + }, + { + "epoch": 1.37, + "learning_rate": 3.30588755111777e-06, + "logits/chosen": -2.561418056488037, + "logits/rejected": -1.9954618215560913, + "logps/chosen": -402.78924560546875, + "logps/rejected": -1632.2894287109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.2497105598449707, + "rewards/margins": 12.62114429473877, + "rewards/rejected": -15.870854377746582, + "step": 22940 + }, + { + "epoch": 1.37, + "learning_rate": 3.304245375992807e-06, + "logits/chosen": -2.5960915088653564, + "logits/rejected": -2.0130696296691895, + "logps/chosen": -404.5988464355469, + "logps/rejected": -1589.4503173828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3658573627471924, + "rewards/margins": 12.097318649291992, + "rewards/rejected": -15.463174819946289, + "step": 22950 + }, + { + "epoch": 1.37, + "learning_rate": 3.3026028137041295e-06, + "logits/chosen": -2.561286449432373, + "logits/rejected": -2.07425856590271, + "logps/chosen": -405.1501159667969, + "logps/rejected": -1570.6629638671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3822274208068848, + "rewards/margins": 11.887992858886719, + "rewards/rejected": -15.270219802856445, + "step": 22960 + }, + { + "epoch": 1.37, + "learning_rate": 3.300959865042468e-06, + "logits/chosen": -2.575077533721924, + "logits/rejected": -2.108003854751587, + "logps/chosen": -411.85552978515625, + "logps/rejected": -1621.437255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3228015899658203, + "rewards/margins": 12.446929931640625, + "rewards/rejected": -15.769732475280762, + "step": 22970 + }, + { + "epoch": 1.37, + "learning_rate": 3.299316530798738e-06, + "logits/chosen": -2.5161643028259277, + "logits/rejected": -1.9302895069122314, + "logps/chosen": -410.17938232421875, + "logps/rejected": -1613.8079833984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.390472412109375, + "rewards/margins": 12.296277046203613, + "rewards/rejected": -15.686749458312988, + "step": 22980 + }, + { + "epoch": 1.37, + "learning_rate": 3.29767281176404e-06, + "logits/chosen": -2.591540813446045, + "logits/rejected": -1.9594815969467163, + "logps/chosen": -401.5811767578125, + "logps/rejected": -1657.3978271484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3113245964050293, + "rewards/margins": 12.825309753417969, + "rewards/rejected": -16.13663673400879, + "step": 22990 + }, + { + "epoch": 1.37, + "learning_rate": 3.2960287087296607e-06, + "logits/chosen": -2.58805513381958, + "logits/rejected": -2.1093249320983887, + "logps/chosen": -404.4593200683594, + "logps/rejected": -1616.112548828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.337430953979492, + "rewards/margins": 12.390612602233887, + "rewards/rejected": -15.728045463562012, + "step": 23000 + }, + { + "epoch": 1.37, + "learning_rate": 3.2943842224870705e-06, + "logits/chosen": -2.571833848953247, + "logits/rejected": -2.0762009620666504, + "logps/chosen": -429.4159240722656, + "logps/rejected": -1604.2471923828125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5852596759796143, + "rewards/margins": 12.018656730651855, + "rewards/rejected": -15.603915214538574, + "step": 23010 + }, + { + "epoch": 1.37, + "learning_rate": 3.2927393538279258e-06, + "logits/chosen": -2.517657518386841, + "logits/rejected": -1.9103240966796875, + "logps/chosen": -488.3755798339844, + "logps/rejected": -1705.8441162109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.187191963195801, + "rewards/margins": 12.426435470581055, + "rewards/rejected": -16.613628387451172, + "step": 23020 + }, + { + "epoch": 1.37, + "learning_rate": 3.2910941035440662e-06, + "logits/chosen": -2.509183883666992, + "logits/rejected": -1.8564789295196533, + "logps/chosen": -496.9072265625, + "logps/rejected": -1656.259765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.260279655456543, + "rewards/margins": 11.865986824035645, + "rewards/rejected": -16.126266479492188, + "step": 23030 + }, + { + "epoch": 1.37, + "learning_rate": 3.2894484724275156e-06, + "logits/chosen": -2.5247108936309814, + "logits/rejected": -1.9120748043060303, + "logps/chosen": -492.987060546875, + "logps/rejected": -1772.322021484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.234239101409912, + "rewards/margins": 13.043130874633789, + "rewards/rejected": -17.277368545532227, + "step": 23040 + }, + { + "epoch": 1.37, + "learning_rate": 3.2878024612704805e-06, + "logits/chosen": -2.4808382987976074, + "logits/rejected": -1.8272984027862549, + "logps/chosen": -502.87188720703125, + "logps/rejected": -1769.597900390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.337499618530273, + "rewards/margins": 12.909093856811523, + "rewards/rejected": -17.246593475341797, + "step": 23050 + }, + { + "epoch": 1.38, + "learning_rate": 3.2861560708653505e-06, + "logits/chosen": -2.5128512382507324, + "logits/rejected": -1.8542802333831787, + "logps/chosen": -488.984375, + "logps/rejected": -1625.591796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.155520439147949, + "rewards/margins": 11.664273262023926, + "rewards/rejected": -15.819796562194824, + "step": 23060 + }, + { + "epoch": 1.38, + "learning_rate": 3.284509302004699e-06, + "logits/chosen": -2.519557476043701, + "logits/rejected": -1.9084560871124268, + "logps/chosen": -466.72137451171875, + "logps/rejected": -1676.884765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.979466676712036, + "rewards/margins": 12.339518547058105, + "rewards/rejected": -16.318984985351562, + "step": 23070 + }, + { + "epoch": 1.38, + "learning_rate": 3.2828621554812786e-06, + "logits/chosen": -2.472146511077881, + "logits/rejected": -1.853529930114746, + "logps/chosen": -487.3328552246094, + "logps/rejected": -1728.648681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.181683540344238, + "rewards/margins": 12.672070503234863, + "rewards/rejected": -16.8537540435791, + "step": 23080 + }, + { + "epoch": 1.38, + "learning_rate": 3.281214632088027e-06, + "logits/chosen": -2.4499053955078125, + "logits/rejected": -1.7400232553482056, + "logps/chosen": -495.3985290527344, + "logps/rejected": -1745.664794921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.197909355163574, + "rewards/margins": 12.809946060180664, + "rewards/rejected": -17.007854461669922, + "step": 23090 + }, + { + "epoch": 1.38, + "learning_rate": 3.2795667326180604e-06, + "logits/chosen": -2.4896633625030518, + "logits/rejected": -1.8380295038223267, + "logps/chosen": -481.0726623535156, + "logps/rejected": -1682.0443115234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.107535362243652, + "rewards/margins": 12.267046928405762, + "rewards/rejected": -16.374584197998047, + "step": 23100 + }, + { + "epoch": 1.38, + "learning_rate": 3.2779184578646795e-06, + "logits/chosen": -2.518826961517334, + "logits/rejected": -1.936408281326294, + "logps/chosen": -460.0380859375, + "logps/rejected": -1745.766357421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.863534450531006, + "rewards/margins": 13.154142379760742, + "rewards/rejected": -17.01767921447754, + "step": 23110 + }, + { + "epoch": 1.38, + "learning_rate": 3.276269808621363e-06, + "logits/chosen": -2.44789457321167, + "logits/rejected": -1.7464263439178467, + "logps/chosen": -501.29937744140625, + "logps/rejected": -1710.264404296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.308804512023926, + "rewards/margins": 12.334776878356934, + "rewards/rejected": -16.64358139038086, + "step": 23120 + }, + { + "epoch": 1.38, + "learning_rate": 3.2746207856817695e-06, + "logits/chosen": -2.5275051593780518, + "logits/rejected": -1.9004859924316406, + "logps/chosen": -468.0972595214844, + "logps/rejected": -1703.9339599609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9766955375671387, + "rewards/margins": 12.623342514038086, + "rewards/rejected": -16.600038528442383, + "step": 23130 + }, + { + "epoch": 1.38, + "learning_rate": 3.272971389839741e-06, + "logits/chosen": -2.5724873542785645, + "logits/rejected": -1.9101293087005615, + "logps/chosen": -460.3026428222656, + "logps/rejected": -1718.4007568359375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8793842792510986, + "rewards/margins": 12.859883308410645, + "rewards/rejected": -16.739269256591797, + "step": 23140 + }, + { + "epoch": 1.38, + "learning_rate": 3.271321621889295e-06, + "logits/chosen": -2.543950319290161, + "logits/rejected": -1.9241876602172852, + "logps/chosen": -456.4091796875, + "logps/rejected": -1736.569091796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.795410633087158, + "rewards/margins": 13.120524406433105, + "rewards/rejected": -16.915935516357422, + "step": 23150 + }, + { + "epoch": 1.38, + "learning_rate": 3.2696714826246295e-06, + "logits/chosen": -2.500357151031494, + "logits/rejected": -1.8222770690917969, + "logps/chosen": -459.5062561035156, + "logps/rejected": -1709.5931396484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9107346534729004, + "rewards/margins": 12.733328819274902, + "rewards/rejected": -16.644062042236328, + "step": 23160 + }, + { + "epoch": 1.38, + "learning_rate": 3.2680209728401226e-06, + "logits/chosen": -2.512936592102051, + "logits/rejected": -1.7947914600372314, + "logps/chosen": -465.7005310058594, + "logps/rejected": -1714.854248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9335713386535645, + "rewards/margins": 12.780099868774414, + "rewards/rejected": -16.713672637939453, + "step": 23170 + }, + { + "epoch": 1.38, + "learning_rate": 3.266370093330329e-06, + "logits/chosen": -2.525031805038452, + "logits/rejected": -1.8172515630722046, + "logps/chosen": -464.253173828125, + "logps/rejected": -1732.2388916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9380555152893066, + "rewards/margins": 12.929468154907227, + "rewards/rejected": -16.867525100708008, + "step": 23180 + }, + { + "epoch": 1.38, + "learning_rate": 3.2647188448899813e-06, + "logits/chosen": -2.516066551208496, + "logits/rejected": -1.8543329238891602, + "logps/chosen": -465.99200439453125, + "logps/rejected": -1697.3837890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.97973895072937, + "rewards/margins": 12.552478790283203, + "rewards/rejected": -16.53221893310547, + "step": 23190 + }, + { + "epoch": 1.38, + "learning_rate": 3.2630672283139924e-06, + "logits/chosen": -2.5615198612213135, + "logits/rejected": -1.8992207050323486, + "logps/chosen": -464.142333984375, + "logps/rejected": -1679.4075927734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9621777534484863, + "rewards/margins": 12.395570755004883, + "rewards/rejected": -16.357746124267578, + "step": 23200 + }, + { + "epoch": 1.38, + "learning_rate": 3.2614152443974495e-06, + "logits/chosen": -2.5101513862609863, + "logits/rejected": -1.8235204219818115, + "logps/chosen": -500.96826171875, + "logps/rejected": -1707.0667724609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.272391319274902, + "rewards/margins": 12.35162353515625, + "rewards/rejected": -16.624013900756836, + "step": 23210 + }, + { + "epoch": 1.38, + "learning_rate": 3.2597628939356174e-06, + "logits/chosen": -2.534212589263916, + "logits/rejected": -1.922376036643982, + "logps/chosen": -456.0462951660156, + "logps/rejected": -1677.5045166015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.810433864593506, + "rewards/margins": 12.514303207397461, + "rewards/rejected": -16.324737548828125, + "step": 23220 + }, + { + "epoch": 1.39, + "learning_rate": 3.258110177723938e-06, + "logits/chosen": -2.51676344871521, + "logits/rejected": -1.824745774269104, + "logps/chosen": -449.4241638183594, + "logps/rejected": -1783.263427734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.706780195236206, + "rewards/margins": 13.676986694335938, + "rewards/rejected": -17.38376808166504, + "step": 23230 + }, + { + "epoch": 1.39, + "learning_rate": 3.256457096558029e-06, + "logits/chosen": -2.464231014251709, + "logits/rejected": -1.808868408203125, + "logps/chosen": -457.89508056640625, + "logps/rejected": -1812.715576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7796790599823, + "rewards/margins": 13.891744613647461, + "rewards/rejected": -17.67142677307129, + "step": 23240 + }, + { + "epoch": 1.39, + "learning_rate": 3.254803651233683e-06, + "logits/chosen": -2.5603811740875244, + "logits/rejected": -1.8625314235687256, + "logps/chosen": -447.220458984375, + "logps/rejected": -1773.698974609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7487831115722656, + "rewards/margins": 13.543134689331055, + "rewards/rejected": -17.29191780090332, + "step": 23250 + }, + { + "epoch": 1.39, + "learning_rate": 3.253149842546869e-06, + "logits/chosen": -2.5669682025909424, + "logits/rejected": -1.8774818181991577, + "logps/chosen": -444.2223205566406, + "logps/rejected": -1629.09033203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7361743450164795, + "rewards/margins": 12.115045547485352, + "rewards/rejected": -15.851221084594727, + "step": 23260 + }, + { + "epoch": 1.39, + "learning_rate": 3.25149567129373e-06, + "logits/chosen": -2.5550739765167236, + "logits/rejected": -1.9303572177886963, + "logps/chosen": -451.7452697753906, + "logps/rejected": -1735.2806396484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.774890422821045, + "rewards/margins": 13.133809089660645, + "rewards/rejected": -16.908700942993164, + "step": 23270 + }, + { + "epoch": 1.39, + "learning_rate": 3.249841138270585e-06, + "logits/chosen": -2.4358572959899902, + "logits/rejected": -1.8584048748016357, + "logps/chosen": -461.76763916015625, + "logps/rejected": -1636.978759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.868471622467041, + "rewards/margins": 12.06761360168457, + "rewards/rejected": -15.936083793640137, + "step": 23280 + }, + { + "epoch": 1.39, + "learning_rate": 3.2481862442739255e-06, + "logits/chosen": -2.576411008834839, + "logits/rejected": -1.9185537099838257, + "logps/chosen": -441.0987854003906, + "logps/rejected": -1752.3736572265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.701798677444458, + "rewards/margins": 13.366543769836426, + "rewards/rejected": -17.068342208862305, + "step": 23290 + }, + { + "epoch": 1.39, + "learning_rate": 3.246530990100417e-06, + "logits/chosen": -2.528543710708618, + "logits/rejected": -1.9272606372833252, + "logps/chosen": -446.56500244140625, + "logps/rejected": -1734.473876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7390377521514893, + "rewards/margins": 13.15492057800293, + "rewards/rejected": -16.893957138061523, + "step": 23300 + }, + { + "epoch": 1.39, + "learning_rate": 3.2448753765469e-06, + "logits/chosen": -2.5634167194366455, + "logits/rejected": -1.9849439859390259, + "logps/chosen": -442.753662109375, + "logps/rejected": -1727.4703369140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7399818897247314, + "rewards/margins": 13.101529121398926, + "rewards/rejected": -16.841510772705078, + "step": 23310 + }, + { + "epoch": 1.39, + "learning_rate": 3.243219404410386e-06, + "logits/chosen": -2.5651135444641113, + "logits/rejected": -1.944016695022583, + "logps/chosen": -453.02313232421875, + "logps/rejected": -1720.0400390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7706668376922607, + "rewards/margins": 12.973451614379883, + "rewards/rejected": -16.74411964416504, + "step": 23320 + }, + { + "epoch": 1.39, + "learning_rate": 3.2415630744880605e-06, + "logits/chosen": -2.5508904457092285, + "logits/rejected": -1.9758306741714478, + "logps/chosen": -452.2828674316406, + "logps/rejected": -1724.885009765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.829493999481201, + "rewards/margins": 12.974166870117188, + "rewards/rejected": -16.803661346435547, + "step": 23330 + }, + { + "epoch": 1.39, + "learning_rate": 3.23990638757728e-06, + "logits/chosen": -2.5344009399414062, + "logits/rejected": -1.8488948345184326, + "logps/chosen": -453.18548583984375, + "logps/rejected": -1768.7359619140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7531704902648926, + "rewards/margins": 13.496519088745117, + "rewards/rejected": -17.24968910217285, + "step": 23340 + }, + { + "epoch": 1.39, + "learning_rate": 3.2382493444755743e-06, + "logits/chosen": -2.52325439453125, + "logits/rejected": -1.9623000621795654, + "logps/chosen": -448.97216796875, + "logps/rejected": -1768.272705078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8164429664611816, + "rewards/margins": 13.417378425598145, + "rewards/rejected": -17.233821868896484, + "step": 23350 + }, + { + "epoch": 1.39, + "learning_rate": 3.2365919459806435e-06, + "logits/chosen": -2.558109760284424, + "logits/rejected": -1.9492309093475342, + "logps/chosen": -467.9266662597656, + "logps/rejected": -1708.0, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9292709827423096, + "rewards/margins": 12.717656135559082, + "rewards/rejected": -16.646926879882812, + "step": 23360 + }, + { + "epoch": 1.39, + "learning_rate": 3.2349341928903588e-06, + "logits/chosen": -2.5748226642608643, + "logits/rejected": -2.0269908905029297, + "logps/chosen": -469.49884033203125, + "logps/rejected": -1799.576171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9696991443634033, + "rewards/margins": 13.588541984558105, + "rewards/rejected": -17.558242797851562, + "step": 23370 + }, + { + "epoch": 1.39, + "learning_rate": 3.2332760860027633e-06, + "logits/chosen": -2.539881706237793, + "logits/rejected": -1.8643966913223267, + "logps/chosen": -443.578369140625, + "logps/rejected": -1717.153076171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7760391235351562, + "rewards/margins": 12.953226089477539, + "rewards/rejected": -16.729267120361328, + "step": 23380 + }, + { + "epoch": 1.39, + "learning_rate": 3.2316176261160692e-06, + "logits/chosen": -2.4824271202087402, + "logits/rejected": -1.885606050491333, + "logps/chosen": -456.43609619140625, + "logps/rejected": -1697.5286865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8093135356903076, + "rewards/margins": 12.738791465759277, + "rewards/rejected": -16.548105239868164, + "step": 23390 + }, + { + "epoch": 1.4, + "learning_rate": 3.2299588140286597e-06, + "logits/chosen": -2.5156378746032715, + "logits/rejected": -1.8621689081192017, + "logps/chosen": -474.07861328125, + "logps/rejected": -1711.5540771484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.033383369445801, + "rewards/margins": 12.639749526977539, + "rewards/rejected": -16.673133850097656, + "step": 23400 + }, + { + "epoch": 1.4, + "learning_rate": 3.2282996505390864e-06, + "logits/chosen": -2.5968151092529297, + "logits/rejected": -1.9651577472686768, + "logps/chosen": -446.3209533691406, + "logps/rejected": -1706.2838134765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7553699016571045, + "rewards/margins": 12.86375904083252, + "rewards/rejected": -16.619129180908203, + "step": 23410 + }, + { + "epoch": 1.4, + "learning_rate": 3.226640136446071e-06, + "logits/chosen": -2.552607774734497, + "logits/rejected": -1.942180871963501, + "logps/chosen": -455.20330810546875, + "logps/rejected": -1758.2279052734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.814815044403076, + "rewards/margins": 13.321032524108887, + "rewards/rejected": -17.135848999023438, + "step": 23420 + }, + { + "epoch": 1.4, + "learning_rate": 3.2249802725485026e-06, + "logits/chosen": -2.567371129989624, + "logits/rejected": -1.9167039394378662, + "logps/chosen": -445.487060546875, + "logps/rejected": -1776.3607177734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7516746520996094, + "rewards/margins": 13.570945739746094, + "rewards/rejected": -17.322620391845703, + "step": 23430 + }, + { + "epoch": 1.4, + "learning_rate": 3.22332005964544e-06, + "logits/chosen": -2.5149731636047363, + "logits/rejected": -1.972051978111267, + "logps/chosen": -464.2388610839844, + "logps/rejected": -1741.2886962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9335548877716064, + "rewards/margins": 13.043569564819336, + "rewards/rejected": -16.977123260498047, + "step": 23440 + }, + { + "epoch": 1.4, + "learning_rate": 3.22165949853611e-06, + "logits/chosen": -2.537611961364746, + "logits/rejected": -1.9649957418441772, + "logps/chosen": -446.90350341796875, + "logps/rejected": -1694.176513671875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7367217540740967, + "rewards/margins": 12.752273559570312, + "rewards/rejected": -16.488994598388672, + "step": 23450 + }, + { + "epoch": 1.4, + "learning_rate": 3.2199985900199064e-06, + "logits/chosen": -2.5346755981445312, + "logits/rejected": -1.9120298624038696, + "logps/chosen": -446.85443115234375, + "logps/rejected": -1775.4212646484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.814452648162842, + "rewards/margins": 13.489007949829102, + "rewards/rejected": -17.3034610748291, + "step": 23460 + }, + { + "epoch": 1.4, + "learning_rate": 3.2183373348963915e-06, + "logits/chosen": -2.512908935546875, + "logits/rejected": -1.8670200109481812, + "logps/chosen": -474.27984619140625, + "logps/rejected": -1637.0772705078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.036097049713135, + "rewards/margins": 11.898773193359375, + "rewards/rejected": -15.934870719909668, + "step": 23470 + }, + { + "epoch": 1.4, + "learning_rate": 3.216675733965292e-06, + "logits/chosen": -2.4617209434509277, + "logits/rejected": -1.9526726007461548, + "logps/chosen": -449.64886474609375, + "logps/rejected": -1792.3994140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8073318004608154, + "rewards/margins": 13.67243766784668, + "rewards/rejected": -17.47977066040039, + "step": 23480 + }, + { + "epoch": 1.4, + "learning_rate": 3.215013788026504e-06, + "logits/chosen": -2.524714231491089, + "logits/rejected": -1.899031400680542, + "logps/chosen": -445.48480224609375, + "logps/rejected": -1762.943359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7665812969207764, + "rewards/margins": 13.4115571975708, + "rewards/rejected": -17.178138732910156, + "step": 23490 + }, + { + "epoch": 1.4, + "learning_rate": 3.213351497880087e-06, + "logits/chosen": -2.5228805541992188, + "logits/rejected": -1.8350721597671509, + "logps/chosen": -444.483154296875, + "logps/rejected": -1788.2642822265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7387924194335938, + "rewards/margins": 13.701681137084961, + "rewards/rejected": -17.440473556518555, + "step": 23500 + }, + { + "epoch": 1.4, + "learning_rate": 3.2116888643262677e-06, + "logits/chosen": -2.50816011428833, + "logits/rejected": -1.8120313882827759, + "logps/chosen": -451.9710998535156, + "logps/rejected": -1714.218505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8058738708496094, + "rewards/margins": 12.888311386108398, + "rewards/rejected": -16.69418716430664, + "step": 23510 + }, + { + "epoch": 1.4, + "learning_rate": 3.2100258881654387e-06, + "logits/chosen": -2.482562780380249, + "logits/rejected": -1.8218348026275635, + "logps/chosen": -456.4662170410156, + "logps/rejected": -1786.022705078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.862114667892456, + "rewards/margins": 13.547773361206055, + "rewards/rejected": -17.409887313842773, + "step": 23520 + }, + { + "epoch": 1.4, + "learning_rate": 3.208362570198156e-06, + "logits/chosen": -2.538726329803467, + "logits/rejected": -1.9263079166412354, + "logps/chosen": -460.52691650390625, + "logps/rejected": -1791.751708984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8472511768341064, + "rewards/margins": 13.607295036315918, + "rewards/rejected": -17.454545974731445, + "step": 23530 + }, + { + "epoch": 1.4, + "learning_rate": 3.206698911225142e-06, + "logits/chosen": -2.5006558895111084, + "logits/rejected": -1.8646612167358398, + "logps/chosen": -466.55474853515625, + "logps/rejected": -1686.659423828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9901340007781982, + "rewards/margins": 12.433496475219727, + "rewards/rejected": -16.42363166809082, + "step": 23540 + }, + { + "epoch": 1.4, + "learning_rate": 3.20503491204728e-06, + "logits/chosen": -2.4935405254364014, + "logits/rejected": -1.8912235498428345, + "logps/chosen": -449.7535705566406, + "logps/rejected": -1681.267822265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.88936185836792, + "rewards/margins": 12.47186279296875, + "rewards/rejected": -16.361225128173828, + "step": 23550 + }, + { + "epoch": 1.4, + "learning_rate": 3.2033705734656208e-06, + "logits/chosen": -2.4909369945526123, + "logits/rejected": -1.9619948863983154, + "logps/chosen": -467.86285400390625, + "logps/rejected": -1738.736083984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9586567878723145, + "rewards/margins": 12.98620891571045, + "rewards/rejected": -16.944866180419922, + "step": 23560 + }, + { + "epoch": 1.41, + "learning_rate": 3.2017058962813763e-06, + "logits/chosen": -2.589371919631958, + "logits/rejected": -1.870009422302246, + "logps/chosen": -461.33837890625, + "logps/rejected": -1770.2825927734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.914729595184326, + "rewards/margins": 13.350397109985352, + "rewards/rejected": -17.265125274658203, + "step": 23570 + }, + { + "epoch": 1.41, + "learning_rate": 3.200040881295922e-06, + "logits/chosen": -2.515446424484253, + "logits/rejected": -1.8979063034057617, + "logps/chosen": -444.4019470214844, + "logps/rejected": -1809.469482421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.7970428466796875, + "rewards/margins": 13.847661972045898, + "rewards/rejected": -17.644704818725586, + "step": 23580 + }, + { + "epoch": 1.41, + "learning_rate": 3.198375529310797e-06, + "logits/chosen": -2.5779740810394287, + "logits/rejected": -1.9136337041854858, + "logps/chosen": -459.03289794921875, + "logps/rejected": -1728.9730224609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9137282371520996, + "rewards/margins": 12.941553115844727, + "rewards/rejected": -16.855281829833984, + "step": 23590 + }, + { + "epoch": 1.41, + "learning_rate": 3.1967098411277015e-06, + "logits/chosen": -2.543996572494507, + "logits/rejected": -2.0481667518615723, + "logps/chosen": -458.52099609375, + "logps/rejected": -1710.8209228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9506239891052246, + "rewards/margins": 12.720965385437012, + "rewards/rejected": -16.671588897705078, + "step": 23600 + }, + { + "epoch": 1.41, + "learning_rate": 3.1950438175484965e-06, + "logits/chosen": -2.5002388954162598, + "logits/rejected": -1.9602086544036865, + "logps/chosen": -444.47747802734375, + "logps/rejected": -1766.5218505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.753687620162964, + "rewards/margins": 13.479162216186523, + "rewards/rejected": -17.23284912109375, + "step": 23610 + }, + { + "epoch": 1.41, + "learning_rate": 3.1933774593752067e-06, + "logits/chosen": -2.554352283477783, + "logits/rejected": -2.0184874534606934, + "logps/chosen": -450.598876953125, + "logps/rejected": -1765.881103515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.836674451828003, + "rewards/margins": 13.37395191192627, + "rewards/rejected": -17.210628509521484, + "step": 23620 + }, + { + "epoch": 1.41, + "learning_rate": 3.1917107674100165e-06, + "logits/chosen": -2.5589027404785156, + "logits/rejected": -1.9641177654266357, + "logps/chosen": -451.06622314453125, + "logps/rejected": -1760.4127197265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.816059112548828, + "rewards/margins": 13.348546981811523, + "rewards/rejected": -17.16460609436035, + "step": 23630 + }, + { + "epoch": 1.41, + "learning_rate": 3.1900437424552726e-06, + "logits/chosen": -2.513188123703003, + "logits/rejected": -1.9236904382705688, + "logps/chosen": -467.1893005371094, + "logps/rejected": -1777.442626953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.997586488723755, + "rewards/margins": 13.34099006652832, + "rewards/rejected": -17.338573455810547, + "step": 23640 + }, + { + "epoch": 1.41, + "learning_rate": 3.1883763853134793e-06, + "logits/chosen": -2.5634002685546875, + "logits/rejected": -1.9593349695205688, + "logps/chosen": -454.061279296875, + "logps/rejected": -1728.6461181640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8725650310516357, + "rewards/margins": 12.974995613098145, + "rewards/rejected": -16.84756088256836, + "step": 23650 + }, + { + "epoch": 1.41, + "learning_rate": 3.186708696787303e-06, + "logits/chosen": -2.53391695022583, + "logits/rejected": -1.8797305822372437, + "logps/chosen": -462.17169189453125, + "logps/rejected": -1720.138916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9060490131378174, + "rewards/margins": 12.843978881835938, + "rewards/rejected": -16.75002670288086, + "step": 23660 + }, + { + "epoch": 1.41, + "learning_rate": 3.1850406776795682e-06, + "logits/chosen": -2.518758773803711, + "logits/rejected": -1.861128568649292, + "logps/chosen": -446.01898193359375, + "logps/rejected": -1755.8121337890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.748539686203003, + "rewards/margins": 13.372495651245117, + "rewards/rejected": -17.121036529541016, + "step": 23670 + }, + { + "epoch": 1.41, + "learning_rate": 3.183372328793261e-06, + "logits/chosen": -2.496669292449951, + "logits/rejected": -1.9053184986114502, + "logps/chosen": -471.698486328125, + "logps/rejected": -1689.447998046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.082655906677246, + "rewards/margins": 12.3712797164917, + "rewards/rejected": -16.453935623168945, + "step": 23680 + }, + { + "epoch": 1.41, + "learning_rate": 3.1817036509315235e-06, + "logits/chosen": -2.5918068885803223, + "logits/rejected": -1.8315922021865845, + "logps/chosen": -437.6092834472656, + "logps/rejected": -1727.048583984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.699368715286255, + "rewards/margins": 13.139117240905762, + "rewards/rejected": -16.838485717773438, + "step": 23690 + }, + { + "epoch": 1.41, + "learning_rate": 3.1800346448976567e-06, + "logits/chosen": -2.5125420093536377, + "logits/rejected": -1.8287742137908936, + "logps/chosen": -453.6888122558594, + "logps/rejected": -1786.0706787109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8661227226257324, + "rewards/margins": 13.552736282348633, + "rewards/rejected": -17.41885757446289, + "step": 23700 + }, + { + "epoch": 1.41, + "learning_rate": 3.1783653114951204e-06, + "logits/chosen": -2.5577216148376465, + "logits/rejected": -1.8863589763641357, + "logps/chosen": -452.11126708984375, + "logps/rejected": -1794.0455322265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8528473377227783, + "rewards/margins": 13.649365425109863, + "rewards/rejected": -17.50221061706543, + "step": 23710 + }, + { + "epoch": 1.41, + "learning_rate": 3.1766956515275317e-06, + "logits/chosen": -2.4867939949035645, + "logits/rejected": -1.9035335779190063, + "logps/chosen": -459.18780517578125, + "logps/rejected": -1754.931640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.879460573196411, + "rewards/margins": 13.222926139831543, + "rewards/rejected": -17.102388381958008, + "step": 23720 + }, + { + "epoch": 1.42, + "learning_rate": 3.1750256657986643e-06, + "logits/chosen": -2.538179397583008, + "logits/rejected": -1.790460228919983, + "logps/chosen": -463.9297790527344, + "logps/rejected": -1697.516845703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.948493242263794, + "rewards/margins": 12.592262268066406, + "rewards/rejected": -16.540754318237305, + "step": 23730 + }, + { + "epoch": 1.42, + "learning_rate": 3.1733553551124507e-06, + "logits/chosen": -2.5336709022521973, + "logits/rejected": -1.9038877487182617, + "logps/chosen": -462.63995361328125, + "logps/rejected": -1776.260986328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.877779006958008, + "rewards/margins": 13.437074661254883, + "rewards/rejected": -17.314851760864258, + "step": 23740 + }, + { + "epoch": 1.42, + "learning_rate": 3.1716847202729785e-06, + "logits/chosen": -2.504006862640381, + "logits/rejected": -1.8596302270889282, + "logps/chosen": -477.69293212890625, + "logps/rejected": -1775.3902587890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.031951427459717, + "rewards/margins": 13.27055835723877, + "rewards/rejected": -17.302509307861328, + "step": 23750 + }, + { + "epoch": 1.42, + "learning_rate": 3.1700137620844897e-06, + "logits/chosen": -2.59401798248291, + "logits/rejected": -1.998146653175354, + "logps/chosen": -465.6825256347656, + "logps/rejected": -1738.0341796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9903526306152344, + "rewards/margins": 12.95518970489502, + "rewards/rejected": -16.945541381835938, + "step": 23760 + }, + { + "epoch": 1.42, + "learning_rate": 3.168342481351384e-06, + "logits/chosen": -2.518256902694702, + "logits/rejected": -1.8810533285140991, + "logps/chosen": -454.43865966796875, + "logps/rejected": -1758.151123046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8254940509796143, + "rewards/margins": 13.328187942504883, + "rewards/rejected": -17.1536808013916, + "step": 23770 + }, + { + "epoch": 1.42, + "learning_rate": 3.166670878878217e-06, + "logits/chosen": -2.482184648513794, + "logits/rejected": -1.9547497034072876, + "logps/chosen": -446.2154235839844, + "logps/rejected": -1693.9761962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.794710636138916, + "rewards/margins": 12.694648742675781, + "rewards/rejected": -16.48935890197754, + "step": 23780 + }, + { + "epoch": 1.42, + "learning_rate": 3.164998955469697e-06, + "logits/chosen": -2.490281581878662, + "logits/rejected": -1.894174337387085, + "logps/chosen": -459.9903869628906, + "logps/rejected": -1750.65625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8908333778381348, + "rewards/margins": 13.173144340515137, + "rewards/rejected": -17.063980102539062, + "step": 23790 + }, + { + "epoch": 1.42, + "learning_rate": 3.1633267119306888e-06, + "logits/chosen": -2.546914577484131, + "logits/rejected": -1.971827745437622, + "logps/chosen": -451.8561096191406, + "logps/rejected": -1734.6451416015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8223624229431152, + "rewards/margins": 13.07978630065918, + "rewards/rejected": -16.902149200439453, + "step": 23800 + }, + { + "epoch": 1.42, + "learning_rate": 3.1616541490662093e-06, + "logits/chosen": -2.5222315788269043, + "logits/rejected": -1.9012664556503296, + "logps/chosen": -455.36260986328125, + "logps/rejected": -1706.699462890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9017245769500732, + "rewards/margins": 12.723159790039062, + "rewards/rejected": -16.624887466430664, + "step": 23810 + }, + { + "epoch": 1.42, + "learning_rate": 3.1599812676814314e-06, + "logits/chosen": -2.5497019290924072, + "logits/rejected": -1.887427568435669, + "logps/chosen": -467.9232482910156, + "logps/rejected": -1742.5989990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9798920154571533, + "rewards/margins": 13.006860733032227, + "rewards/rejected": -16.986751556396484, + "step": 23820 + }, + { + "epoch": 1.42, + "learning_rate": 3.158308068581679e-06, + "logits/chosen": -2.5213794708251953, + "logits/rejected": -1.9746901988983154, + "logps/chosen": -455.80853271484375, + "logps/rejected": -1747.021240234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.862424850463867, + "rewards/margins": 13.167132377624512, + "rewards/rejected": -17.029560089111328, + "step": 23830 + }, + { + "epoch": 1.42, + "learning_rate": 3.1566345525724316e-06, + "logits/chosen": -2.526240825653076, + "logits/rejected": -1.9286739826202393, + "logps/chosen": -461.39892578125, + "logps/rejected": -1722.44140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9318299293518066, + "rewards/margins": 12.8384370803833, + "rewards/rejected": -16.770267486572266, + "step": 23840 + }, + { + "epoch": 1.42, + "learning_rate": 3.1549607204593185e-06, + "logits/chosen": -2.497985363006592, + "logits/rejected": -1.7830257415771484, + "logps/chosen": -468.56585693359375, + "logps/rejected": -1714.3294677734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.004703998565674, + "rewards/margins": 12.699288368225098, + "rewards/rejected": -16.703990936279297, + "step": 23850 + }, + { + "epoch": 1.42, + "learning_rate": 3.153286573048123e-06, + "logits/chosen": -2.528900623321533, + "logits/rejected": -1.9433482885360718, + "logps/chosen": -458.93658447265625, + "logps/rejected": -1747.134765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.8726096153259277, + "rewards/margins": 13.161112785339355, + "rewards/rejected": -17.033723831176758, + "step": 23860 + }, + { + "epoch": 1.42, + "learning_rate": 3.15161211114478e-06, + "logits/chosen": -2.4984869956970215, + "logits/rejected": -1.869342565536499, + "logps/chosen": -469.685791015625, + "logps/rejected": -1760.785888671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9673545360565186, + "rewards/margins": 13.191032409667969, + "rewards/rejected": -17.158388137817383, + "step": 23870 + }, + { + "epoch": 1.42, + "learning_rate": 3.1499373355553746e-06, + "logits/chosen": -2.566141128540039, + "logits/rejected": -1.9030561447143555, + "logps/chosen": -453.9566955566406, + "logps/rejected": -1772.563720703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.848339796066284, + "rewards/margins": 13.436650276184082, + "rewards/rejected": -17.284988403320312, + "step": 23880 + }, + { + "epoch": 1.42, + "learning_rate": 3.148262247086144e-06, + "logits/chosen": -2.5076441764831543, + "logits/rejected": -1.8514295816421509, + "logps/chosen": -470.78338623046875, + "logps/rejected": -1776.7437744140625, + "loss": 0.0121, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -4.0107197761535645, + "rewards/margins": 13.313380241394043, + "rewards/rejected": -17.324100494384766, + "step": 23890 + }, + { + "epoch": 1.43, + "learning_rate": 3.1465868465434763e-06, + "logits/chosen": -2.563997983932495, + "logits/rejected": -1.9933452606201172, + "logps/chosen": -462.45587158203125, + "logps/rejected": -1700.2861328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.9175631999969482, + "rewards/margins": 12.634532928466797, + "rewards/rejected": -16.552095413208008, + "step": 23900 + }, + { + "epoch": 1.43, + "learning_rate": 3.1449111347339084e-06, + "logits/chosen": -2.5493030548095703, + "logits/rejected": -2.002668857574463, + "logps/chosen": -481.26788330078125, + "logps/rejected": -1593.90380859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.113992214202881, + "rewards/margins": 11.384958267211914, + "rewards/rejected": -15.498950004577637, + "step": 23910 + }, + { + "epoch": 1.43, + "learning_rate": 3.1432351124641293e-06, + "logits/chosen": -2.5080149173736572, + "logits/rejected": -1.9644756317138672, + "logps/chosen": -514.6719360351562, + "logps/rejected": -1673.709716796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.402683258056641, + "rewards/margins": 11.8866605758667, + "rewards/rejected": -16.28934097290039, + "step": 23920 + }, + { + "epoch": 1.43, + "learning_rate": 3.141558780540974e-06, + "logits/chosen": -2.5797481536865234, + "logits/rejected": -1.9364945888519287, + "logps/chosen": -483.98443603515625, + "logps/rejected": -1720.884521484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.096108436584473, + "rewards/margins": 12.670186996459961, + "rewards/rejected": -16.766298294067383, + "step": 23930 + }, + { + "epoch": 1.43, + "learning_rate": 3.139882139771431e-06, + "logits/chosen": -2.486712694168091, + "logits/rejected": -1.8932561874389648, + "logps/chosen": -474.6849670410156, + "logps/rejected": -1695.655517578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0691375732421875, + "rewards/margins": 12.445955276489258, + "rewards/rejected": -16.515092849731445, + "step": 23940 + }, + { + "epoch": 1.43, + "learning_rate": 3.138205190962634e-06, + "logits/chosen": -2.5165793895721436, + "logits/rejected": -1.9561328887939453, + "logps/chosen": -483.0594177246094, + "logps/rejected": -1737.975341796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.134366989135742, + "rewards/margins": 12.819524765014648, + "rewards/rejected": -16.95389175415039, + "step": 23950 + }, + { + "epoch": 1.43, + "learning_rate": 3.1365279349218656e-06, + "logits/chosen": -2.529567003250122, + "logits/rejected": -1.9100357294082642, + "logps/chosen": -496.3538513183594, + "logps/rejected": -1771.83984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.208673477172852, + "rewards/margins": 13.070467948913574, + "rewards/rejected": -17.27914047241211, + "step": 23960 + }, + { + "epoch": 1.43, + "learning_rate": 3.134850372456558e-06, + "logits/chosen": -2.5764942169189453, + "logits/rejected": -1.8983339071273804, + "logps/chosen": -488.9043884277344, + "logps/rejected": -1700.37109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.195429801940918, + "rewards/margins": 12.366350173950195, + "rewards/rejected": -16.561779022216797, + "step": 23970 + }, + { + "epoch": 1.43, + "learning_rate": 3.1331725043742887e-06, + "logits/chosen": -2.498382329940796, + "logits/rejected": -1.902661681175232, + "logps/chosen": -491.27789306640625, + "logps/rejected": -1694.8031005859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.218688011169434, + "rewards/margins": 12.294944763183594, + "rewards/rejected": -16.51363182067871, + "step": 23980 + }, + { + "epoch": 1.43, + "learning_rate": 3.1314943314827845e-06, + "logits/chosen": -2.5582525730133057, + "logits/rejected": -1.9565610885620117, + "logps/chosen": -483.5174255371094, + "logps/rejected": -1739.5654296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.057422637939453, + "rewards/margins": 12.88971996307373, + "rewards/rejected": -16.947141647338867, + "step": 23990 + }, + { + "epoch": 1.43, + "learning_rate": 3.1298158545899167e-06, + "logits/chosen": -2.525223970413208, + "logits/rejected": -1.89467453956604, + "logps/chosen": -502.0806579589844, + "logps/rejected": -1741.759521484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.288689613342285, + "rewards/margins": 12.669095993041992, + "rewards/rejected": -16.957786560058594, + "step": 24000 + }, + { + "epoch": 1.43, + "learning_rate": 3.1281370745037044e-06, + "logits/chosen": -2.518725872039795, + "logits/rejected": -1.911375641822815, + "logps/chosen": -499.98956298828125, + "logps/rejected": -1788.4359130859375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.322179317474365, + "rewards/margins": 13.11341381072998, + "rewards/rejected": -17.43559455871582, + "step": 24010 + }, + { + "epoch": 1.43, + "learning_rate": 3.1264579920323123e-06, + "logits/chosen": -2.4855103492736816, + "logits/rejected": -1.8640092611312866, + "logps/chosen": -537.7661743164062, + "logps/rejected": -1785.660400390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.602899551391602, + "rewards/margins": 12.802253723144531, + "rewards/rejected": -17.405155181884766, + "step": 24020 + }, + { + "epoch": 1.43, + "learning_rate": 3.1247786079840513e-06, + "logits/chosen": -2.491881847381592, + "logits/rejected": -1.8462343215942383, + "logps/chosen": -520.7423706054688, + "logps/rejected": -1787.9833984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5175933837890625, + "rewards/margins": 12.925863265991211, + "rewards/rejected": -17.443456649780273, + "step": 24030 + }, + { + "epoch": 1.43, + "learning_rate": 3.1230989231673765e-06, + "logits/chosen": -2.4911422729492188, + "logits/rejected": -1.8807827234268188, + "logps/chosen": -522.6182861328125, + "logps/rejected": -1854.4202880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.515149116516113, + "rewards/margins": 13.588793754577637, + "rewards/rejected": -18.10394287109375, + "step": 24040 + }, + { + "epoch": 1.43, + "learning_rate": 3.1214189383908877e-06, + "logits/chosen": -2.5041611194610596, + "logits/rejected": -2.030428647994995, + "logps/chosen": -503.1253967285156, + "logps/rejected": -1809.878173828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.360642433166504, + "rewards/margins": 13.298619270324707, + "rewards/rejected": -17.65926170349121, + "step": 24050 + }, + { + "epoch": 1.43, + "learning_rate": 3.11973865446333e-06, + "logits/chosen": -2.4954657554626465, + "logits/rejected": -1.8566300868988037, + "logps/chosen": -517.2653198242188, + "logps/rejected": -1859.7626953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.477973937988281, + "rewards/margins": 13.676523208618164, + "rewards/rejected": -18.154499053955078, + "step": 24060 + }, + { + "epoch": 1.44, + "learning_rate": 3.118058072193592e-06, + "logits/chosen": -2.492169141769409, + "logits/rejected": -1.8338905572891235, + "logps/chosen": -517.349365234375, + "logps/rejected": -1765.27734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.439965724945068, + "rewards/margins": 12.765384674072266, + "rewards/rejected": -17.20534896850586, + "step": 24070 + }, + { + "epoch": 1.44, + "learning_rate": 3.116377192390706e-06, + "logits/chosen": -2.495713710784912, + "logits/rejected": -1.8045564889907837, + "logps/chosen": -529.39599609375, + "logps/rejected": -1754.2554931640625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.57501745223999, + "rewards/margins": 12.517862319946289, + "rewards/rejected": -17.092880249023438, + "step": 24080 + }, + { + "epoch": 1.44, + "learning_rate": 3.1146960158638475e-06, + "logits/chosen": -2.489287853240967, + "logits/rejected": -1.7957191467285156, + "logps/chosen": -545.3616943359375, + "logps/rejected": -1880.685791015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.738951683044434, + "rewards/margins": 13.608131408691406, + "rewards/rejected": -18.347082138061523, + "step": 24090 + }, + { + "epoch": 1.44, + "learning_rate": 3.113014543422334e-06, + "logits/chosen": -2.481339693069458, + "logits/rejected": -1.8394544124603271, + "logps/chosen": -534.1414184570312, + "logps/rejected": -1810.8583984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.604584693908691, + "rewards/margins": 13.05077838897705, + "rewards/rejected": -17.655364990234375, + "step": 24100 + }, + { + "epoch": 1.44, + "learning_rate": 3.1113327758756277e-06, + "logits/chosen": -2.5259013175964355, + "logits/rejected": -1.8679399490356445, + "logps/chosen": -544.7511596679688, + "logps/rejected": -1805.774658203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.741352558135986, + "rewards/margins": 12.873875617980957, + "rewards/rejected": -17.6152286529541, + "step": 24110 + }, + { + "epoch": 1.44, + "learning_rate": 3.109650714033331e-06, + "logits/chosen": -2.4870338439941406, + "logits/rejected": -1.7905693054199219, + "logps/chosen": -566.1954345703125, + "logps/rejected": -1878.576171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.908766746520996, + "rewards/margins": 13.437028884887695, + "rewards/rejected": -18.345794677734375, + "step": 24120 + }, + { + "epoch": 1.44, + "learning_rate": 3.107968358705189e-06, + "logits/chosen": -2.4614200592041016, + "logits/rejected": -1.7395700216293335, + "logps/chosen": -540.0886840820312, + "logps/rejected": -1876.1802978515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.655145168304443, + "rewards/margins": 13.653726577758789, + "rewards/rejected": -18.30887222290039, + "step": 24130 + }, + { + "epoch": 1.44, + "learning_rate": 3.1062857107010856e-06, + "logits/chosen": -2.5089545249938965, + "logits/rejected": -1.8873481750488281, + "logps/chosen": -561.6438598632812, + "logps/rejected": -1988.1953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.896478176116943, + "rewards/margins": 14.530024528503418, + "rewards/rejected": -19.426504135131836, + "step": 24140 + }, + { + "epoch": 1.44, + "learning_rate": 3.10460277083105e-06, + "logits/chosen": -2.4904563426971436, + "logits/rejected": -1.7516788244247437, + "logps/chosen": -558.6611328125, + "logps/rejected": -1956.0146484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.83712911605835, + "rewards/margins": 14.291186332702637, + "rewards/rejected": -19.128314971923828, + "step": 24150 + }, + { + "epoch": 1.44, + "learning_rate": 3.1029195399052468e-06, + "logits/chosen": -2.474637508392334, + "logits/rejected": -1.7869831323623657, + "logps/chosen": -543.3914184570312, + "logps/rejected": -1946.5244140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.732945442199707, + "rewards/margins": 14.297735214233398, + "rewards/rejected": -19.030681610107422, + "step": 24160 + }, + { + "epoch": 1.44, + "learning_rate": 3.1012360187339863e-06, + "logits/chosen": -2.471844434738159, + "logits/rejected": -1.7532323598861694, + "logps/chosen": -549.2154541015625, + "logps/rejected": -1916.7689208984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.787574768066406, + "rewards/margins": 13.929420471191406, + "rewards/rejected": -18.71699333190918, + "step": 24170 + }, + { + "epoch": 1.44, + "learning_rate": 3.099552208127713e-06, + "logits/chosen": -2.4991626739501953, + "logits/rejected": -1.7998358011245728, + "logps/chosen": -551.6047973632812, + "logps/rejected": -1915.03125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.778668403625488, + "rewards/margins": 13.921699523925781, + "rewards/rejected": -18.700368881225586, + "step": 24180 + }, + { + "epoch": 1.44, + "learning_rate": 3.0978681088970146e-06, + "logits/chosen": -2.571807384490967, + "logits/rejected": -1.8681665658950806, + "logps/chosen": -565.1124267578125, + "logps/rejected": -1853.4173583984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.908452033996582, + "rewards/margins": 13.189752578735352, + "rewards/rejected": -18.098201751708984, + "step": 24190 + }, + { + "epoch": 1.44, + "learning_rate": 3.096183721852616e-06, + "logits/chosen": -2.483914852142334, + "logits/rejected": -1.7386096715927124, + "logps/chosen": -543.9157104492188, + "logps/rejected": -1934.690673828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.729595184326172, + "rewards/margins": 14.163572311401367, + "rewards/rejected": -18.893169403076172, + "step": 24200 + }, + { + "epoch": 1.44, + "learning_rate": 3.0944990478053816e-06, + "logits/chosen": -2.5178418159484863, + "logits/rejected": -1.8352620601654053, + "logps/chosen": -539.2071533203125, + "logps/rejected": -1900.546142578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.64587926864624, + "rewards/margins": 13.921908378601074, + "rewards/rejected": -18.567787170410156, + "step": 24210 + }, + { + "epoch": 1.44, + "learning_rate": 3.0928140875663138e-06, + "logits/chosen": -2.5182507038116455, + "logits/rejected": -1.87898850440979, + "logps/chosen": -529.9676513671875, + "logps/rejected": -1963.3251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.565457344055176, + "rewards/margins": 14.631451606750488, + "rewards/rejected": -19.196908950805664, + "step": 24220 + }, + { + "epoch": 1.44, + "learning_rate": 3.0911288419465507e-06, + "logits/chosen": -2.5341403484344482, + "logits/rejected": -1.8977348804473877, + "logps/chosen": -546.8150634765625, + "logps/rejected": -1910.1898193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.765666484832764, + "rewards/margins": 13.8817777633667, + "rewards/rejected": -18.647443771362305, + "step": 24230 + }, + { + "epoch": 1.45, + "learning_rate": 3.089443311757371e-06, + "logits/chosen": -2.4902749061584473, + "logits/rejected": -1.8665615320205688, + "logps/chosen": -565.0280151367188, + "logps/rejected": -1780.3255615234375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.923811435699463, + "rewards/margins": 12.438108444213867, + "rewards/rejected": -17.361919403076172, + "step": 24240 + }, + { + "epoch": 1.45, + "learning_rate": 3.087757497810188e-06, + "logits/chosen": -2.502257823944092, + "logits/rejected": -1.7512845993041992, + "logps/chosen": -597.3643798828125, + "logps/rejected": -1968.640380859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.179213047027588, + "rewards/margins": 14.063051223754883, + "rewards/rejected": -19.242263793945312, + "step": 24250 + }, + { + "epoch": 1.45, + "learning_rate": 3.0860714009165536e-06, + "logits/chosen": -2.498239040374756, + "logits/rejected": -1.8254592418670654, + "logps/chosen": -596.6505126953125, + "logps/rejected": -1952.63671875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.218903541564941, + "rewards/margins": 13.866277694702148, + "rewards/rejected": -19.085180282592773, + "step": 24260 + }, + { + "epoch": 1.45, + "learning_rate": 3.0843850218881545e-06, + "logits/chosen": -2.4840705394744873, + "logits/rejected": -1.6420040130615234, + "logps/chosen": -602.2725219726562, + "logps/rejected": -1941.358642578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3236188888549805, + "rewards/margins": 13.655957221984863, + "rewards/rejected": -18.979576110839844, + "step": 24270 + }, + { + "epoch": 1.45, + "learning_rate": 3.0826983615368135e-06, + "logits/chosen": -2.4427897930145264, + "logits/rejected": -1.6933482885360718, + "logps/chosen": -605.003662109375, + "logps/rejected": -1972.8402099609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.375432014465332, + "rewards/margins": 13.91395378112793, + "rewards/rejected": -19.289386749267578, + "step": 24280 + }, + { + "epoch": 1.45, + "learning_rate": 3.081011420674488e-06, + "logits/chosen": -2.459822177886963, + "logits/rejected": -1.5756657123565674, + "logps/chosen": -587.5833740234375, + "logps/rejected": -1953.0198974609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.16888427734375, + "rewards/margins": 13.919901847839355, + "rewards/rejected": -19.088787078857422, + "step": 24290 + }, + { + "epoch": 1.45, + "learning_rate": 3.0793242001132725e-06, + "logits/chosen": -2.466434955596924, + "logits/rejected": -1.7967363595962524, + "logps/chosen": -607.7078857421875, + "logps/rejected": -1970.370849609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3025736808776855, + "rewards/margins": 13.96031665802002, + "rewards/rejected": -19.262889862060547, + "step": 24300 + }, + { + "epoch": 1.45, + "learning_rate": 3.0776367006653945e-06, + "logits/chosen": -2.4797163009643555, + "logits/rejected": -1.7488294839859009, + "logps/chosen": -603.3485717773438, + "logps/rejected": -1893.4619140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.332121849060059, + "rewards/margins": 13.155741691589355, + "rewards/rejected": -18.487865447998047, + "step": 24310 + }, + { + "epoch": 1.45, + "learning_rate": 3.0759489231432157e-06, + "logits/chosen": -2.4687435626983643, + "logits/rejected": -1.7834465503692627, + "logps/chosen": -593.8497924804688, + "logps/rejected": -2006.2562255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.257931709289551, + "rewards/margins": 14.375391960144043, + "rewards/rejected": -19.633325576782227, + "step": 24320 + }, + { + "epoch": 1.45, + "learning_rate": 3.074260868359233e-06, + "logits/chosen": -2.5032436847686768, + "logits/rejected": -1.7517030239105225, + "logps/chosen": -631.0421142578125, + "logps/rejected": -2027.3375244140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.540457248687744, + "rewards/margins": 14.288457870483398, + "rewards/rejected": -19.828914642333984, + "step": 24330 + }, + { + "epoch": 1.45, + "learning_rate": 3.072572537126075e-06, + "logits/chosen": -2.4521450996398926, + "logits/rejected": -1.7466113567352295, + "logps/chosen": -594.7490844726562, + "logps/rejected": -1929.8671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.214987277984619, + "rewards/margins": 13.658535957336426, + "rewards/rejected": -18.873523712158203, + "step": 24340 + }, + { + "epoch": 1.45, + "learning_rate": 3.0708839302565058e-06, + "logits/chosen": -2.458073377609253, + "logits/rejected": -1.7602074146270752, + "logps/chosen": -612.5962524414062, + "logps/rejected": -2009.353759765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.441810607910156, + "rewards/margins": 14.21467399597168, + "rewards/rejected": -19.656482696533203, + "step": 24350 + }, + { + "epoch": 1.45, + "learning_rate": 3.0691950485634192e-06, + "logits/chosen": -2.4753847122192383, + "logits/rejected": -1.755014419555664, + "logps/chosen": -607.099365234375, + "logps/rejected": -1900.0550537109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.417043685913086, + "rewards/margins": 13.1441011428833, + "rewards/rejected": -18.56114387512207, + "step": 24360 + }, + { + "epoch": 1.45, + "learning_rate": 3.0675058928598435e-06, + "logits/chosen": -2.494844436645508, + "logits/rejected": -1.7345912456512451, + "logps/chosen": -601.6976318359375, + "logps/rejected": -1964.5667724609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.301575183868408, + "rewards/margins": 13.894453048706055, + "rewards/rejected": -19.196029663085938, + "step": 24370 + }, + { + "epoch": 1.45, + "learning_rate": 3.0658164639589383e-06, + "logits/chosen": -2.4574954509735107, + "logits/rejected": -1.9329935312271118, + "logps/chosen": -617.87646484375, + "logps/rejected": -1898.637451171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5389909744262695, + "rewards/margins": 13.006922721862793, + "rewards/rejected": -18.545913696289062, + "step": 24380 + }, + { + "epoch": 1.45, + "learning_rate": 3.0641267626739946e-06, + "logits/chosen": -2.475858211517334, + "logits/rejected": -1.7028687000274658, + "logps/chosen": -612.4434814453125, + "logps/rejected": -2059.601806640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.368228912353516, + "rewards/margins": 14.78381061553955, + "rewards/rejected": -20.152040481567383, + "step": 24390 + }, + { + "epoch": 1.45, + "learning_rate": 3.0624367898184355e-06, + "logits/chosen": -2.485988140106201, + "logits/rejected": -1.6848993301391602, + "logps/chosen": -607.367431640625, + "logps/rejected": -1980.911376953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.354449272155762, + "rewards/margins": 14.021273612976074, + "rewards/rejected": -19.375720977783203, + "step": 24400 + }, + { + "epoch": 1.46, + "learning_rate": 3.0607465462058115e-06, + "logits/chosen": -2.463693141937256, + "logits/rejected": -1.8416227102279663, + "logps/chosen": -603.5363159179688, + "logps/rejected": -1909.8551025390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.337491035461426, + "rewards/margins": 13.31285285949707, + "rewards/rejected": -18.65034294128418, + "step": 24410 + }, + { + "epoch": 1.46, + "learning_rate": 3.059056032649808e-06, + "logits/chosen": -2.472679853439331, + "logits/rejected": -1.7807295322418213, + "logps/chosen": -587.8018798828125, + "logps/rejected": -1976.8072509765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.175008773803711, + "rewards/margins": 14.147453308105469, + "rewards/rejected": -19.32246208190918, + "step": 24420 + }, + { + "epoch": 1.46, + "learning_rate": 3.0573652499642375e-06, + "logits/chosen": -2.412935972213745, + "logits/rejected": -1.7078607082366943, + "logps/chosen": -608.2554321289062, + "logps/rejected": -1990.6751708984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.367774963378906, + "rewards/margins": 14.092475891113281, + "rewards/rejected": -19.460250854492188, + "step": 24430 + }, + { + "epoch": 1.46, + "learning_rate": 3.055674198963043e-06, + "logits/chosen": -2.488710880279541, + "logits/rejected": -1.73443603515625, + "logps/chosen": -587.2740478515625, + "logps/rejected": -2014.8118896484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.147235870361328, + "rewards/margins": 14.566001892089844, + "rewards/rejected": -19.713237762451172, + "step": 24440 + }, + { + "epoch": 1.46, + "learning_rate": 3.0539828804602955e-06, + "logits/chosen": -2.48884916305542, + "logits/rejected": -1.7190879583358765, + "logps/chosen": -571.92431640625, + "logps/rejected": -1979.6011962890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.038017749786377, + "rewards/margins": 14.308329582214355, + "rewards/rejected": -19.34634780883789, + "step": 24450 + }, + { + "epoch": 1.46, + "learning_rate": 3.0522912952701966e-06, + "logits/chosen": -2.5189452171325684, + "logits/rejected": -1.6418536901474, + "logps/chosen": -586.8314208984375, + "logps/rejected": -2063.307861328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.202699184417725, + "rewards/margins": 14.990087509155273, + "rewards/rejected": -20.192790985107422, + "step": 24460 + }, + { + "epoch": 1.46, + "learning_rate": 3.0505994442070756e-06, + "logits/chosen": -2.511765956878662, + "logits/rejected": -1.7538912296295166, + "logps/chosen": -602.0714111328125, + "logps/rejected": -1975.626953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.334647178649902, + "rewards/margins": 13.981943130493164, + "rewards/rejected": -19.31658935546875, + "step": 24470 + }, + { + "epoch": 1.46, + "learning_rate": 3.0489073280853886e-06, + "logits/chosen": -2.4906997680664062, + "logits/rejected": -1.7915284633636475, + "logps/chosen": -557.958251953125, + "logps/rejected": -2001.733642578125, + "loss": 0.0135, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.892163276672363, + "rewards/margins": 14.688501358032227, + "rewards/rejected": -19.580663681030273, + "step": 24480 + }, + { + "epoch": 1.46, + "learning_rate": 3.0472149477197217e-06, + "logits/chosen": -2.58473801612854, + "logits/rejected": -1.9293848276138306, + "logps/chosen": -514.0826416015625, + "logps/rejected": -1877.607666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.413661003112793, + "rewards/margins": 13.91314697265625, + "rewards/rejected": -18.32680892944336, + "step": 24490 + }, + { + "epoch": 1.46, + "learning_rate": 3.045522303924785e-06, + "logits/chosen": -2.593608856201172, + "logits/rejected": -1.9803253412246704, + "logps/chosen": -504.0347595214844, + "logps/rejected": -1847.1005859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.336209774017334, + "rewards/margins": 13.690391540527344, + "rewards/rejected": -18.026601791381836, + "step": 24500 + }, + { + "epoch": 1.46, + "learning_rate": 3.043829397515419e-06, + "logits/chosen": -2.6110005378723145, + "logits/rejected": -1.976454496383667, + "logps/chosen": -513.5895385742188, + "logps/rejected": -1871.525634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.397180557250977, + "rewards/margins": 13.868818283081055, + "rewards/rejected": -18.266000747680664, + "step": 24510 + }, + { + "epoch": 1.46, + "learning_rate": 3.0421362293065875e-06, + "logits/chosen": -2.5643787384033203, + "logits/rejected": -1.8267923593521118, + "logps/chosen": -509.30242919921875, + "logps/rejected": -1780.3297119140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.414221286773682, + "rewards/margins": 12.950523376464844, + "rewards/rejected": -17.364744186401367, + "step": 24520 + }, + { + "epoch": 1.46, + "learning_rate": 3.0404428001133825e-06, + "logits/chosen": -2.5822830200195312, + "logits/rejected": -2.0095858573913574, + "logps/chosen": -498.1888122558594, + "logps/rejected": -1830.5302734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.264558792114258, + "rewards/margins": 13.602838516235352, + "rewards/rejected": -17.86739730834961, + "step": 24530 + }, + { + "epoch": 1.46, + "learning_rate": 3.03874911075102e-06, + "logits/chosen": -2.6096644401550293, + "logits/rejected": -2.0187525749206543, + "logps/chosen": -510.85760498046875, + "logps/rejected": -1784.8785400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.407971382141113, + "rewards/margins": 12.995002746582031, + "rewards/rejected": -17.40297508239746, + "step": 24540 + }, + { + "epoch": 1.46, + "learning_rate": 3.037055162034842e-06, + "logits/chosen": -2.5749363899230957, + "logits/rejected": -1.8873634338378906, + "logps/chosen": -503.33709716796875, + "logps/rejected": -1760.748291015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.300703048706055, + "rewards/margins": 12.864784240722656, + "rewards/rejected": -17.165485382080078, + "step": 24550 + }, + { + "epoch": 1.46, + "learning_rate": 3.0353609547803166e-06, + "logits/chosen": -2.6120238304138184, + "logits/rejected": -2.019428253173828, + "logps/chosen": -520.010498046875, + "logps/rejected": -1830.5609130859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.451628684997559, + "rewards/margins": 13.403810501098633, + "rewards/rejected": -17.855436325073242, + "step": 24560 + }, + { + "epoch": 1.47, + "learning_rate": 3.0336664898030344e-06, + "logits/chosen": -2.602670192718506, + "logits/rejected": -1.9888889789581299, + "logps/chosen": -521.9708251953125, + "logps/rejected": -1896.067138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4437360763549805, + "rewards/margins": 14.05174446105957, + "rewards/rejected": -18.4954833984375, + "step": 24570 + }, + { + "epoch": 1.47, + "learning_rate": 3.031971767918711e-06, + "logits/chosen": -2.604419231414795, + "logits/rejected": -1.9763152599334717, + "logps/chosen": -516.198974609375, + "logps/rejected": -1827.915771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.369152545928955, + "rewards/margins": 13.461918830871582, + "rewards/rejected": -17.831071853637695, + "step": 24580 + }, + { + "epoch": 1.47, + "learning_rate": 3.030276789943185e-06, + "logits/chosen": -2.585637331008911, + "logits/rejected": -2.0544471740722656, + "logps/chosen": -531.2344970703125, + "logps/rejected": -1838.145263671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.642889976501465, + "rewards/margins": 13.299046516418457, + "rewards/rejected": -17.941936492919922, + "step": 24590 + }, + { + "epoch": 1.47, + "learning_rate": 3.0285815566924186e-06, + "logits/chosen": -2.6089112758636475, + "logits/rejected": -2.023508310317993, + "logps/chosen": -507.10546875, + "logps/rejected": -1876.339111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.322040557861328, + "rewards/margins": 13.983105659484863, + "rewards/rejected": -18.305147171020508, + "step": 24600 + }, + { + "epoch": 1.47, + "learning_rate": 3.026886068982498e-06, + "logits/chosen": -2.6127848625183105, + "logits/rejected": -1.9699945449829102, + "logps/chosen": -492.21319580078125, + "logps/rejected": -1891.749755859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.191805839538574, + "rewards/margins": 14.276728630065918, + "rewards/rejected": -18.468534469604492, + "step": 24610 + }, + { + "epoch": 1.47, + "learning_rate": 3.0251903276296305e-06, + "logits/chosen": -2.5790438652038574, + "logits/rejected": -2.0213475227355957, + "logps/chosen": -503.53253173828125, + "logps/rejected": -1790.4736328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.42635440826416, + "rewards/margins": 13.052770614624023, + "rewards/rejected": -17.479124069213867, + "step": 24620 + }, + { + "epoch": 1.47, + "learning_rate": 3.023494333450146e-06, + "logits/chosen": -2.5937037467956543, + "logits/rejected": -2.0258827209472656, + "logps/chosen": -504.05389404296875, + "logps/rejected": -1825.5390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.348501205444336, + "rewards/margins": 13.463136672973633, + "rewards/rejected": -17.8116397857666, + "step": 24630 + }, + { + "epoch": 1.47, + "learning_rate": 3.0217980872604956e-06, + "logits/chosen": -2.6104648113250732, + "logits/rejected": -1.9239991903305054, + "logps/chosen": -519.5091552734375, + "logps/rejected": -1808.3021240234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.484814643859863, + "rewards/margins": 13.146321296691895, + "rewards/rejected": -17.631134033203125, + "step": 24640 + }, + { + "epoch": 1.47, + "learning_rate": 3.0201015898772536e-06, + "logits/chosen": -2.5598855018615723, + "logits/rejected": -1.9118146896362305, + "logps/chosen": -518.4773559570312, + "logps/rejected": -1790.685791015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.499848365783691, + "rewards/margins": 12.960836410522461, + "rewards/rejected": -17.460683822631836, + "step": 24650 + }, + { + "epoch": 1.47, + "learning_rate": 3.018404842117112e-06, + "logits/chosen": -2.575645923614502, + "logits/rejected": -1.9319385290145874, + "logps/chosen": -513.9312744140625, + "logps/rejected": -1806.023681640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.375608444213867, + "rewards/margins": 13.243273735046387, + "rewards/rejected": -17.618881225585938, + "step": 24660 + }, + { + "epoch": 1.47, + "learning_rate": 3.016707844796887e-06, + "logits/chosen": -2.622048854827881, + "logits/rejected": -2.0697197914123535, + "logps/chosen": -501.9599609375, + "logps/rejected": -1842.6187744140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.289297580718994, + "rewards/margins": 13.701242446899414, + "rewards/rejected": -17.99053955078125, + "step": 24670 + }, + { + "epoch": 1.47, + "learning_rate": 3.0150105987335115e-06, + "logits/chosen": -2.5876152515411377, + "logits/rejected": -1.979387879371643, + "logps/chosen": -517.1676025390625, + "logps/rejected": -1786.3359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.510659694671631, + "rewards/margins": 12.905946731567383, + "rewards/rejected": -17.416606903076172, + "step": 24680 + }, + { + "epoch": 1.47, + "learning_rate": 3.01331310474404e-06, + "logits/chosen": -2.639159679412842, + "logits/rejected": -1.9994995594024658, + "logps/chosen": -503.8443298339844, + "logps/rejected": -1921.7008056640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.327841281890869, + "rewards/margins": 14.453384399414062, + "rewards/rejected": -18.781225204467773, + "step": 24690 + }, + { + "epoch": 1.47, + "learning_rate": 3.011615363645647e-06, + "logits/chosen": -2.6252455711364746, + "logits/rejected": -2.0412025451660156, + "logps/chosen": -504.63848876953125, + "logps/rejected": -1847.7822265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.404056549072266, + "rewards/margins": 13.636711120605469, + "rewards/rejected": -18.040767669677734, + "step": 24700 + }, + { + "epoch": 1.47, + "learning_rate": 3.009917376255624e-06, + "logits/chosen": -2.548442840576172, + "logits/rejected": -1.9387998580932617, + "logps/chosen": -528.8177490234375, + "logps/rejected": -1793.691162109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.582554817199707, + "rewards/margins": 12.91557502746582, + "rewards/rejected": -17.498130798339844, + "step": 24710 + }, + { + "epoch": 1.47, + "learning_rate": 3.0082191433913825e-06, + "logits/chosen": -2.5592551231384277, + "logits/rejected": -1.8877537250518799, + "logps/chosen": -535.0689086914062, + "logps/rejected": -1855.482421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6259260177612305, + "rewards/margins": 13.484888076782227, + "rewards/rejected": -18.110815048217773, + "step": 24720 + }, + { + "epoch": 1.47, + "learning_rate": 3.006520665870452e-06, + "logits/chosen": -2.598306894302368, + "logits/rejected": -2.008117198944092, + "logps/chosen": -506.4111328125, + "logps/rejected": -1939.4447021484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.349876880645752, + "rewards/margins": 14.595659255981445, + "rewards/rejected": -18.945537567138672, + "step": 24730 + }, + { + "epoch": 1.48, + "learning_rate": 3.0048219445104796e-06, + "logits/chosen": -2.556095838546753, + "logits/rejected": -1.9518150091171265, + "logps/chosen": -522.7710571289062, + "logps/rejected": -1791.5228271484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.572066307067871, + "rewards/margins": 12.909971237182617, + "rewards/rejected": -17.482036590576172, + "step": 24740 + }, + { + "epoch": 1.48, + "learning_rate": 3.0031229801292293e-06, + "logits/chosen": -2.57838773727417, + "logits/rejected": -1.9194223880767822, + "logps/chosen": -515.6143798828125, + "logps/rejected": -1861.7083740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.458006858825684, + "rewards/margins": 13.709065437316895, + "rewards/rejected": -18.167070388793945, + "step": 24750 + }, + { + "epoch": 1.48, + "learning_rate": 3.001423773544583e-06, + "logits/chosen": -2.552030086517334, + "logits/rejected": -1.9791889190673828, + "logps/chosen": -537.775634765625, + "logps/rejected": -1891.8046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.638034343719482, + "rewards/margins": 13.83881950378418, + "rewards/rejected": -18.47685432434082, + "step": 24760 + }, + { + "epoch": 1.48, + "learning_rate": 2.9997243255745385e-06, + "logits/chosen": -2.6009621620178223, + "logits/rejected": -1.9517189264297485, + "logps/chosen": -509.10528564453125, + "logps/rejected": -1740.733154296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.374263763427734, + "rewards/margins": 12.600736618041992, + "rewards/rejected": -16.974998474121094, + "step": 24770 + }, + { + "epoch": 1.48, + "learning_rate": 2.99802463703721e-06, + "logits/chosen": -2.6103410720825195, + "logits/rejected": -2.0109734535217285, + "logps/chosen": -524.3043212890625, + "logps/rejected": -1829.947021484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.527031898498535, + "rewards/margins": 13.328407287597656, + "rewards/rejected": -17.85544204711914, + "step": 24780 + }, + { + "epoch": 1.48, + "learning_rate": 2.9963247087508272e-06, + "logits/chosen": -2.605431079864502, + "logits/rejected": -2.0195937156677246, + "logps/chosen": -531.60595703125, + "logps/rejected": -1848.653076171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.60469388961792, + "rewards/margins": 13.4276704788208, + "rewards/rejected": -18.032363891601562, + "step": 24790 + }, + { + "epoch": 1.48, + "learning_rate": 2.9946245415337367e-06, + "logits/chosen": -2.6312596797943115, + "logits/rejected": -1.9821017980575562, + "logps/chosen": -506.24761962890625, + "logps/rejected": -1875.465576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.377021312713623, + "rewards/margins": 13.931005477905273, + "rewards/rejected": -18.308025360107422, + "step": 24800 + }, + { + "epoch": 1.48, + "learning_rate": 2.9929241362043976e-06, + "logits/chosen": -2.585299491882324, + "logits/rejected": -1.9277629852294922, + "logps/chosen": -524.9064331054688, + "logps/rejected": -1872.0595703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.578917503356934, + "rewards/margins": 13.694753646850586, + "rewards/rejected": -18.273670196533203, + "step": 24810 + }, + { + "epoch": 1.48, + "learning_rate": 2.9912234935813855e-06, + "logits/chosen": -2.6663641929626465, + "logits/rejected": -2.023648738861084, + "logps/chosen": -519.0140991210938, + "logps/rejected": -1896.992919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5033650398254395, + "rewards/margins": 14.031644821166992, + "rewards/rejected": -18.535011291503906, + "step": 24820 + }, + { + "epoch": 1.48, + "learning_rate": 2.9895226144833907e-06, + "logits/chosen": -2.595224142074585, + "logits/rejected": -1.9935401678085327, + "logps/chosen": -514.1012573242188, + "logps/rejected": -1837.923095703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.418954372406006, + "rewards/margins": 13.516650199890137, + "rewards/rejected": -17.935604095458984, + "step": 24830 + }, + { + "epoch": 1.48, + "learning_rate": 2.9878214997292155e-06, + "logits/chosen": -2.5934948921203613, + "logits/rejected": -1.9753806591033936, + "logps/chosen": -502.50860595703125, + "logps/rejected": -1803.447265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.353327751159668, + "rewards/margins": 13.244410514831543, + "rewards/rejected": -17.59773826599121, + "step": 24840 + }, + { + "epoch": 1.48, + "learning_rate": 2.9861201501377755e-06, + "logits/chosen": -2.5953469276428223, + "logits/rejected": -1.9482675790786743, + "logps/chosen": -520.4183349609375, + "logps/rejected": -1836.280029296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.458642482757568, + "rewards/margins": 13.459246635437012, + "rewards/rejected": -17.917888641357422, + "step": 24850 + }, + { + "epoch": 1.48, + "learning_rate": 2.984418566528102e-06, + "logits/chosen": -2.555109739303589, + "logits/rejected": -1.7785968780517578, + "logps/chosen": -525.494873046875, + "logps/rejected": -1911.770263671875, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.548425674438477, + "rewards/margins": 14.118586540222168, + "rewards/rejected": -18.66701316833496, + "step": 24860 + }, + { + "epoch": 1.48, + "learning_rate": 2.9827167497193367e-06, + "logits/chosen": -2.512787342071533, + "logits/rejected": -1.7226041555404663, + "logps/chosen": -596.2574462890625, + "logps/rejected": -2057.72802734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.292929649353027, + "rewards/margins": 14.84161376953125, + "rewards/rejected": -20.13454246520996, + "step": 24870 + }, + { + "epoch": 1.48, + "learning_rate": 2.981014700530734e-06, + "logits/chosen": -2.524829387664795, + "logits/rejected": -1.6102463006973267, + "logps/chosen": -608.7459716796875, + "logps/rejected": -1987.596435546875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4455108642578125, + "rewards/margins": 13.978734970092773, + "rewards/rejected": -19.424243927001953, + "step": 24880 + }, + { + "epoch": 1.48, + "learning_rate": 2.9793124197816613e-06, + "logits/chosen": -2.4920148849487305, + "logits/rejected": -1.727016806602478, + "logps/chosen": -662.7921752929688, + "logps/rejected": -2091.51171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.980954170227051, + "rewards/margins": 14.495803833007812, + "rewards/rejected": -20.476757049560547, + "step": 24890 + }, + { + "epoch": 1.48, + "learning_rate": 2.9776099082915954e-06, + "logits/chosen": -2.5117907524108887, + "logits/rejected": -1.709674596786499, + "logps/chosen": -663.1458740234375, + "logps/rejected": -1943.6236572265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.923468112945557, + "rewards/margins": 13.086601257324219, + "rewards/rejected": -19.01007080078125, + "step": 24900 + }, + { + "epoch": 1.49, + "learning_rate": 2.9759071668801254e-06, + "logits/chosen": -2.5013465881347656, + "logits/rejected": -1.6517565250396729, + "logps/chosen": -655.090087890625, + "logps/rejected": -2145.578857421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.821039199829102, + "rewards/margins": 15.187210083007812, + "rewards/rejected": -21.008249282836914, + "step": 24910 + }, + { + "epoch": 1.49, + "learning_rate": 2.9742041963669514e-06, + "logits/chosen": -2.458097457885742, + "logits/rejected": -1.6963294744491577, + "logps/chosen": -663.551513671875, + "logps/rejected": -1993.7398681640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.890705585479736, + "rewards/margins": 13.59844970703125, + "rewards/rejected": -19.489154815673828, + "step": 24920 + }, + { + "epoch": 1.49, + "learning_rate": 2.9725009975718845e-06, + "logits/chosen": -2.5489306449890137, + "logits/rejected": -1.7866023778915405, + "logps/chosen": -663.6116943359375, + "logps/rejected": -2060.62841796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.883182048797607, + "rewards/margins": 14.282092094421387, + "rewards/rejected": -20.16527557373047, + "step": 24930 + }, + { + "epoch": 1.49, + "learning_rate": 2.9707975713148423e-06, + "logits/chosen": -2.5590462684631348, + "logits/rejected": -1.773455023765564, + "logps/chosen": -639.1353759765625, + "logps/rejected": -2164.556884765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.676341533660889, + "rewards/margins": 15.527229309082031, + "rewards/rejected": -21.203571319580078, + "step": 24940 + }, + { + "epoch": 1.49, + "learning_rate": 2.9690939184158557e-06, + "logits/chosen": -2.5299735069274902, + "logits/rejected": -1.8447353839874268, + "logps/chosen": -638.658203125, + "logps/rejected": -1986.123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.626654624938965, + "rewards/margins": 13.80284309387207, + "rewards/rejected": -19.42949676513672, + "step": 24950 + }, + { + "epoch": 1.49, + "learning_rate": 2.9673900396950622e-06, + "logits/chosen": -2.4459352493286133, + "logits/rejected": -1.6037871837615967, + "logps/chosen": -672.4859619140625, + "logps/rejected": -2063.2109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.965609550476074, + "rewards/margins": 14.224347114562988, + "rewards/rejected": -20.18995475769043, + "step": 24960 + }, + { + "epoch": 1.49, + "learning_rate": 2.9656859359727095e-06, + "logits/chosen": -2.5657296180725098, + "logits/rejected": -1.8316208124160767, + "logps/chosen": -639.4462890625, + "logps/rejected": -2007.3746337890625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6648454666137695, + "rewards/margins": 13.988225936889648, + "rewards/rejected": -19.653072357177734, + "step": 24970 + }, + { + "epoch": 1.49, + "learning_rate": 2.963981608069154e-06, + "logits/chosen": -2.5118980407714844, + "logits/rejected": -1.6952180862426758, + "logps/chosen": -633.1537475585938, + "logps/rejected": -2054.529541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.631985664367676, + "rewards/margins": 14.468576431274414, + "rewards/rejected": -20.100561141967773, + "step": 24980 + }, + { + "epoch": 1.49, + "learning_rate": 2.9622770568048577e-06, + "logits/chosen": -2.571885347366333, + "logits/rejected": -1.9012653827667236, + "logps/chosen": -613.1573486328125, + "logps/rejected": -2039.2464599609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.404243469238281, + "rewards/margins": 14.534723281860352, + "rewards/rejected": -19.938966751098633, + "step": 24990 + }, + { + "epoch": 1.49, + "learning_rate": 2.9605722830003926e-06, + "logits/chosen": -2.4965591430664062, + "logits/rejected": -1.7751762866973877, + "logps/chosen": -605.2136840820312, + "logps/rejected": -1968.169921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3588738441467285, + "rewards/margins": 13.864160537719727, + "rewards/rejected": -19.223033905029297, + "step": 25000 + }, + { + "epoch": 1.49, + "learning_rate": 2.958867287476436e-06, + "logits/chosen": -2.5125374794006348, + "logits/rejected": -1.8407185077667236, + "logps/chosen": -616.8413696289062, + "logps/rejected": -2097.51953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4311699867248535, + "rewards/margins": 15.094369888305664, + "rewards/rejected": -20.52553939819336, + "step": 25010 + }, + { + "epoch": 1.49, + "learning_rate": 2.9571620710537726e-06, + "logits/chosen": -2.5205092430114746, + "logits/rejected": -1.7434555292129517, + "logps/chosen": -626.37353515625, + "logps/rejected": -2022.7359619140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.60182523727417, + "rewards/margins": 14.174827575683594, + "rewards/rejected": -19.776653289794922, + "step": 25020 + }, + { + "epoch": 1.49, + "learning_rate": 2.955456634553294e-06, + "logits/chosen": -2.5138115882873535, + "logits/rejected": -1.7598835229873657, + "logps/chosen": -650.6315307617188, + "logps/rejected": -2089.1494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8724565505981445, + "rewards/margins": 14.561935424804688, + "rewards/rejected": -20.434391021728516, + "step": 25030 + }, + { + "epoch": 1.49, + "learning_rate": 2.953750978795998e-06, + "logits/chosen": -2.5260589122772217, + "logits/rejected": -1.768599510192871, + "logps/chosen": -590.8729858398438, + "logps/rejected": -2060.40283203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.20516300201416, + "rewards/margins": 14.956751823425293, + "rewards/rejected": -20.161914825439453, + "step": 25040 + }, + { + "epoch": 1.49, + "learning_rate": 2.9520451046029862e-06, + "logits/chosen": -2.514847755432129, + "logits/rejected": -1.8034393787384033, + "logps/chosen": -615.9581909179688, + "logps/rejected": -2127.664306640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.485663414001465, + "rewards/margins": 15.340304374694824, + "rewards/rejected": -20.82596778869629, + "step": 25050 + }, + { + "epoch": 1.49, + "learning_rate": 2.9503390127954673e-06, + "logits/chosen": -2.503606081008911, + "logits/rejected": -1.713356614112854, + "logps/chosen": -640.1488037109375, + "logps/rejected": -2087.08447265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.697963237762451, + "rewards/margins": 14.725191116333008, + "rewards/rejected": -20.423154830932617, + "step": 25060 + }, + { + "epoch": 1.49, + "learning_rate": 2.9486327041947533e-06, + "logits/chosen": -2.5625505447387695, + "logits/rejected": -1.8540875911712646, + "logps/chosen": -607.5926513671875, + "logps/rejected": -2061.997314453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.383309364318848, + "rewards/margins": 14.796979904174805, + "rewards/rejected": -20.18029022216797, + "step": 25070 + }, + { + "epoch": 1.5, + "learning_rate": 2.9469261796222608e-06, + "logits/chosen": -2.5634257793426514, + "logits/rejected": -1.7426786422729492, + "logps/chosen": -613.7000122070312, + "logps/rejected": -2039.441650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.426351547241211, + "rewards/margins": 14.524019241333008, + "rewards/rejected": -19.950368881225586, + "step": 25080 + }, + { + "epoch": 1.5, + "learning_rate": 2.9452194398995114e-06, + "logits/chosen": -2.5219197273254395, + "logits/rejected": -1.8199241161346436, + "logps/chosen": -650.2040405273438, + "logps/rejected": -1999.1207275390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.770579814910889, + "rewards/margins": 13.78471851348877, + "rewards/rejected": -19.555299758911133, + "step": 25090 + }, + { + "epoch": 1.5, + "learning_rate": 2.943512485848129e-06, + "logits/chosen": -2.5114715099334717, + "logits/rejected": -1.7366832494735718, + "logps/chosen": -618.5730590820312, + "logps/rejected": -1929.787109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4859619140625, + "rewards/margins": 13.372830390930176, + "rewards/rejected": -18.85879135131836, + "step": 25100 + }, + { + "epoch": 1.5, + "learning_rate": 2.9418053182898428e-06, + "logits/chosen": -2.5386364459991455, + "logits/rejected": -1.788083791732788, + "logps/chosen": -620.1588134765625, + "logps/rejected": -1973.461669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.491566181182861, + "rewards/margins": 13.818794250488281, + "rewards/rejected": -19.310359954833984, + "step": 25110 + }, + { + "epoch": 1.5, + "learning_rate": 2.9400979380464805e-06, + "logits/chosen": -2.572131872177124, + "logits/rejected": -1.87667977809906, + "logps/chosen": -609.852294921875, + "logps/rejected": -2052.76220703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.370213508605957, + "rewards/margins": 14.714741706848145, + "rewards/rejected": -20.084957122802734, + "step": 25120 + }, + { + "epoch": 1.5, + "learning_rate": 2.938390345939977e-06, + "logits/chosen": -2.5688259601593018, + "logits/rejected": -1.8509410619735718, + "logps/chosen": -621.3579711914062, + "logps/rejected": -2028.0111083984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.508975982666016, + "rewards/margins": 14.324533462524414, + "rewards/rejected": -19.833507537841797, + "step": 25130 + }, + { + "epoch": 1.5, + "learning_rate": 2.936682542792367e-06, + "logits/chosen": -2.541842222213745, + "logits/rejected": -1.8848384618759155, + "logps/chosen": -610.576171875, + "logps/rejected": -2068.7919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.392984867095947, + "rewards/margins": 14.852457046508789, + "rewards/rejected": -20.245441436767578, + "step": 25140 + }, + { + "epoch": 1.5, + "learning_rate": 2.9349745294257854e-06, + "logits/chosen": -2.573237895965576, + "logits/rejected": -1.906643271446228, + "logps/chosen": -594.937744140625, + "logps/rejected": -1988.3277587890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.291370868682861, + "rewards/margins": 14.155177116394043, + "rewards/rejected": -19.44654655456543, + "step": 25150 + }, + { + "epoch": 1.5, + "learning_rate": 2.9332663066624716e-06, + "logits/chosen": -2.485691547393799, + "logits/rejected": -1.7707312107086182, + "logps/chosen": -611.3036499023438, + "logps/rejected": -2037.106201171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.452343940734863, + "rewards/margins": 14.47212028503418, + "rewards/rejected": -19.924463272094727, + "step": 25160 + }, + { + "epoch": 1.5, + "learning_rate": 2.9315578753247632e-06, + "logits/chosen": -2.587263822555542, + "logits/rejected": -1.9333938360214233, + "logps/chosen": -595.7578125, + "logps/rejected": -2028.9771728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.25488805770874, + "rewards/margins": 14.584773063659668, + "rewards/rejected": -19.83966064453125, + "step": 25170 + }, + { + "epoch": 1.5, + "learning_rate": 2.929849236235099e-06, + "logits/chosen": -2.5527663230895996, + "logits/rejected": -1.8734394311904907, + "logps/chosen": -614.6629028320312, + "logps/rejected": -2145.42626953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.412468910217285, + "rewards/margins": 15.592294692993164, + "rewards/rejected": -21.004762649536133, + "step": 25180 + }, + { + "epoch": 1.5, + "learning_rate": 2.9281403902160177e-06, + "logits/chosen": -2.535842180252075, + "logits/rejected": -1.8295456171035767, + "logps/chosen": -599.9041748046875, + "logps/rejected": -2089.1669921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.361631393432617, + "rewards/margins": 15.087259292602539, + "rewards/rejected": -20.448890686035156, + "step": 25190 + }, + { + "epoch": 1.5, + "learning_rate": 2.9264313380901586e-06, + "logits/chosen": -2.578202486038208, + "logits/rejected": -1.8551743030548096, + "logps/chosen": -611.7566528320312, + "logps/rejected": -1976.3978271484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.438323020935059, + "rewards/margins": 13.893075942993164, + "rewards/rejected": -19.331396102905273, + "step": 25200 + }, + { + "epoch": 1.5, + "learning_rate": 2.924722080680259e-06, + "logits/chosen": -2.5287628173828125, + "logits/rejected": -1.7482516765594482, + "logps/chosen": -604.0877075195312, + "logps/rejected": -1968.0478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.313597679138184, + "rewards/margins": 13.936752319335938, + "rewards/rejected": -19.250350952148438, + "step": 25210 + }, + { + "epoch": 1.5, + "learning_rate": 2.9230126188091545e-06, + "logits/chosen": -2.5596938133239746, + "logits/rejected": -1.957973837852478, + "logps/chosen": -613.7003173828125, + "logps/rejected": -1986.5240478515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.443167686462402, + "rewards/margins": 13.982511520385742, + "rewards/rejected": -19.42568016052246, + "step": 25220 + }, + { + "epoch": 1.5, + "learning_rate": 2.921302953299781e-06, + "logits/chosen": -2.529261827468872, + "logits/rejected": -1.908822774887085, + "logps/chosen": -609.4354858398438, + "logps/rejected": -2045.3687744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.369729995727539, + "rewards/margins": 14.647926330566406, + "rewards/rejected": -20.017656326293945, + "step": 25230 + }, + { + "epoch": 1.51, + "learning_rate": 2.9195930849751707e-06, + "logits/chosen": -2.511260509490967, + "logits/rejected": -1.7863152027130127, + "logps/chosen": -628.9655151367188, + "logps/rejected": -2110.699951171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.623652458190918, + "rewards/margins": 15.0370454788208, + "rewards/rejected": -20.660696029663086, + "step": 25240 + }, + { + "epoch": 1.51, + "learning_rate": 2.9178830146584547e-06, + "logits/chosen": -2.54658842086792, + "logits/rejected": -1.8329026699066162, + "logps/chosen": -611.87548828125, + "logps/rejected": -2033.055419921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.433197975158691, + "rewards/margins": 14.456100463867188, + "rewards/rejected": -19.889299392700195, + "step": 25250 + }, + { + "epoch": 1.51, + "learning_rate": 2.916172743172861e-06, + "logits/chosen": -2.5317282676696777, + "logits/rejected": -1.8099462985992432, + "logps/chosen": -602.1143798828125, + "logps/rejected": -2041.9840087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.227929592132568, + "rewards/margins": 14.737678527832031, + "rewards/rejected": -19.965608596801758, + "step": 25260 + }, + { + "epoch": 1.51, + "learning_rate": 2.914462271341714e-06, + "logits/chosen": -2.577422618865967, + "logits/rejected": -1.9040515422821045, + "logps/chosen": -627.4253540039062, + "logps/rejected": -2080.717529296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.555980682373047, + "rewards/margins": 14.813909530639648, + "rewards/rejected": -20.369890213012695, + "step": 25270 + }, + { + "epoch": 1.51, + "learning_rate": 2.9127515999884355e-06, + "logits/chosen": -2.5717031955718994, + "logits/rejected": -1.9397083520889282, + "logps/chosen": -598.8350219726562, + "logps/rejected": -2056.70947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.308040142059326, + "rewards/margins": 14.818936347961426, + "rewards/rejected": -20.126977920532227, + "step": 25280 + }, + { + "epoch": 1.51, + "learning_rate": 2.911040729936542e-06, + "logits/chosen": -2.5474092960357666, + "logits/rejected": -1.8359298706054688, + "logps/chosen": -611.943603515625, + "logps/rejected": -2067.45751953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.401292324066162, + "rewards/margins": 14.805047988891602, + "rewards/rejected": -20.20633888244629, + "step": 25290 + }, + { + "epoch": 1.51, + "learning_rate": 2.9093296620096457e-06, + "logits/chosen": -2.5675175189971924, + "logits/rejected": -1.8801603317260742, + "logps/chosen": -639.47216796875, + "logps/rejected": -2137.376220703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.671220302581787, + "rewards/margins": 15.261106491088867, + "rewards/rejected": -20.932327270507812, + "step": 25300 + }, + { + "epoch": 1.51, + "learning_rate": 2.9076183970314555e-06, + "logits/chosen": -2.582846164703369, + "logits/rejected": -1.8302900791168213, + "logps/chosen": -624.48046875, + "logps/rejected": -2057.7451171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.512694358825684, + "rewards/margins": 14.628923416137695, + "rewards/rejected": -20.141616821289062, + "step": 25310 + }, + { + "epoch": 1.51, + "learning_rate": 2.905906935825774e-06, + "logits/chosen": -2.542541980743408, + "logits/rejected": -1.7804327011108398, + "logps/chosen": -619.4090576171875, + "logps/rejected": -2035.403564453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4714508056640625, + "rewards/margins": 14.440610885620117, + "rewards/rejected": -19.912063598632812, + "step": 25320 + }, + { + "epoch": 1.51, + "learning_rate": 2.9041952792164987e-06, + "logits/chosen": -2.5267481803894043, + "logits/rejected": -1.7685855627059937, + "logps/chosen": -616.9445190429688, + "logps/rejected": -2145.16064453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.448470592498779, + "rewards/margins": 15.55842399597168, + "rewards/rejected": -21.006893157958984, + "step": 25330 + }, + { + "epoch": 1.51, + "learning_rate": 2.9024834280276215e-06, + "logits/chosen": -2.5696568489074707, + "logits/rejected": -1.8874595165252686, + "logps/chosen": -613.7722778320312, + "logps/rejected": -1982.310791015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.437476634979248, + "rewards/margins": 13.93730354309082, + "rewards/rejected": -19.374778747558594, + "step": 25340 + }, + { + "epoch": 1.51, + "learning_rate": 2.900771383083227e-06, + "logits/chosen": -2.548893451690674, + "logits/rejected": -1.881034255027771, + "logps/chosen": -636.7547607421875, + "logps/rejected": -2083.9921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.638357639312744, + "rewards/margins": 14.76335620880127, + "rewards/rejected": -20.401714324951172, + "step": 25350 + }, + { + "epoch": 1.51, + "learning_rate": 2.8990591452074933e-06, + "logits/chosen": -2.5035207271575928, + "logits/rejected": -1.8632566928863525, + "logps/chosen": -633.8713989257812, + "logps/rejected": -2017.4761962890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5947675704956055, + "rewards/margins": 14.146194458007812, + "rewards/rejected": -19.740962982177734, + "step": 25360 + }, + { + "epoch": 1.51, + "learning_rate": 2.897346715224693e-06, + "logits/chosen": -2.542178153991699, + "logits/rejected": -1.861159324645996, + "logps/chosen": -636.0904541015625, + "logps/rejected": -2165.153564453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.668332576751709, + "rewards/margins": 15.54945182800293, + "rewards/rejected": -21.217784881591797, + "step": 25370 + }, + { + "epoch": 1.51, + "learning_rate": 2.895634093959189e-06, + "logits/chosen": -2.4894673824310303, + "logits/rejected": -1.8241851329803467, + "logps/chosen": -631.77587890625, + "logps/rejected": -2025.723876953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.629080772399902, + "rewards/margins": 14.175517082214355, + "rewards/rejected": -19.80459976196289, + "step": 25380 + }, + { + "epoch": 1.51, + "learning_rate": 2.8939212822354373e-06, + "logits/chosen": -2.5632965564727783, + "logits/rejected": -1.8925085067749023, + "logps/chosen": -640.4028930664062, + "logps/rejected": -2056.3046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.705738544464111, + "rewards/margins": 14.408662796020508, + "rewards/rejected": -20.11440086364746, + "step": 25390 + }, + { + "epoch": 1.51, + "learning_rate": 2.892208280877985e-06, + "logits/chosen": -2.523160219192505, + "logits/rejected": -1.826586365699768, + "logps/chosen": -633.426025390625, + "logps/rejected": -2110.822265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6421990394592285, + "rewards/margins": 15.027727127075195, + "rewards/rejected": -20.669925689697266, + "step": 25400 + }, + { + "epoch": 1.52, + "learning_rate": 2.8904950907114715e-06, + "logits/chosen": -2.5445752143859863, + "logits/rejected": -1.8941891193389893, + "logps/chosen": -613.9300537109375, + "logps/rejected": -2103.64453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.469842433929443, + "rewards/margins": 15.133813858032227, + "rewards/rejected": -20.603656768798828, + "step": 25410 + }, + { + "epoch": 1.52, + "learning_rate": 2.888781712560626e-06, + "logits/chosen": -2.542649507522583, + "logits/rejected": -1.8219749927520752, + "logps/chosen": -622.7211303710938, + "logps/rejected": -2080.79296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.540044784545898, + "rewards/margins": 14.81628704071045, + "rewards/rejected": -20.356334686279297, + "step": 25420 + }, + { + "epoch": 1.52, + "learning_rate": 2.8870681472502695e-06, + "logits/chosen": -2.5906565189361572, + "logits/rejected": -1.8707077503204346, + "logps/chosen": -625.3233642578125, + "logps/rejected": -2124.31494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.533006191253662, + "rewards/margins": 15.263418197631836, + "rewards/rejected": -20.796422958374023, + "step": 25430 + }, + { + "epoch": 1.52, + "learning_rate": 2.885354395605311e-06, + "logits/chosen": -2.584428548812866, + "logits/rejected": -1.8349673748016357, + "logps/chosen": -624.9132690429688, + "logps/rejected": -2039.001708984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.495282173156738, + "rewards/margins": 14.459269523620605, + "rewards/rejected": -19.954553604125977, + "step": 25440 + }, + { + "epoch": 1.52, + "learning_rate": 2.883640458450752e-06, + "logits/chosen": -2.5446085929870605, + "logits/rejected": -1.7908395528793335, + "logps/chosen": -601.1107177734375, + "logps/rejected": -2156.39599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.354315757751465, + "rewards/margins": 15.762982368469238, + "rewards/rejected": -21.117298126220703, + "step": 25450 + }, + { + "epoch": 1.52, + "learning_rate": 2.881926336611681e-06, + "logits/chosen": -2.573852300643921, + "logits/rejected": -1.8203222751617432, + "logps/chosen": -613.9166259765625, + "logps/rejected": -2146.062744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.444628715515137, + "rewards/margins": 15.57036018371582, + "rewards/rejected": -21.01498794555664, + "step": 25460 + }, + { + "epoch": 1.52, + "learning_rate": 2.880212030913276e-06, + "logits/chosen": -2.5310170650482178, + "logits/rejected": -1.860640287399292, + "logps/chosen": -630.9690551757812, + "logps/rejected": -2061.87353515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.633906364440918, + "rewards/margins": 14.520530700683594, + "rewards/rejected": -20.154438018798828, + "step": 25470 + }, + { + "epoch": 1.52, + "learning_rate": 2.8784975421808054e-06, + "logits/chosen": -2.5655250549316406, + "logits/rejected": -1.9222224950790405, + "logps/chosen": -613.7086181640625, + "logps/rejected": -2094.152587890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.482189655303955, + "rewards/margins": 15.016828536987305, + "rewards/rejected": -20.499019622802734, + "step": 25480 + }, + { + "epoch": 1.52, + "learning_rate": 2.8767828712396218e-06, + "logits/chosen": -2.553748846054077, + "logits/rejected": -1.7745596170425415, + "logps/chosen": -628.0318603515625, + "logps/rejected": -2006.029296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.58158016204834, + "rewards/margins": 14.03338623046875, + "rewards/rejected": -19.614965438842773, + "step": 25490 + }, + { + "epoch": 1.52, + "learning_rate": 2.875068018915169e-06, + "logits/chosen": -2.5507164001464844, + "logits/rejected": -1.8776553869247437, + "logps/chosen": -611.5848999023438, + "logps/rejected": -1965.0950927734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.438872337341309, + "rewards/margins": 13.775497436523438, + "rewards/rejected": -19.214370727539062, + "step": 25500 + }, + { + "epoch": 1.52, + "learning_rate": 2.873352986032977e-06, + "logits/chosen": -2.532740592956543, + "logits/rejected": -1.9023301601409912, + "logps/chosen": -629.1527099609375, + "logps/rejected": -2008.825439453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.628189563751221, + "rewards/margins": 14.022283554077148, + "rewards/rejected": -19.65047264099121, + "step": 25510 + }, + { + "epoch": 1.52, + "learning_rate": 2.871637773418662e-06, + "logits/chosen": -2.5584607124328613, + "logits/rejected": -1.9304683208465576, + "logps/chosen": -634.9510498046875, + "logps/rejected": -2093.315185546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.639827728271484, + "rewards/margins": 14.846652030944824, + "rewards/rejected": -20.486480712890625, + "step": 25520 + }, + { + "epoch": 1.52, + "learning_rate": 2.8699223818979274e-06, + "logits/chosen": -2.585991382598877, + "logits/rejected": -1.984379529953003, + "logps/chosen": -622.35498046875, + "logps/rejected": -2088.276123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.494224548339844, + "rewards/margins": 14.936655044555664, + "rewards/rejected": -20.43088150024414, + "step": 25530 + }, + { + "epoch": 1.52, + "learning_rate": 2.8682068122965632e-06, + "logits/chosen": -2.530574321746826, + "logits/rejected": -1.8678476810455322, + "logps/chosen": -616.0535888671875, + "logps/rejected": -2042.203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.4520955085754395, + "rewards/margins": 14.527628898620605, + "rewards/rejected": -19.979724884033203, + "step": 25540 + }, + { + "epoch": 1.52, + "learning_rate": 2.8664910654404445e-06, + "logits/chosen": -2.5938351154327393, + "logits/rejected": -1.9094161987304688, + "logps/chosen": -625.3798828125, + "logps/rejected": -2100.962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.548039436340332, + "rewards/margins": 15.019401550292969, + "rewards/rejected": -20.567440032958984, + "step": 25550 + }, + { + "epoch": 1.52, + "learning_rate": 2.8647751421555313e-06, + "logits/chosen": -2.5603113174438477, + "logits/rejected": -1.9062700271606445, + "logps/chosen": -620.5293579101562, + "logps/rejected": -2073.094970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.462475776672363, + "rewards/margins": 14.828906059265137, + "rewards/rejected": -20.291378021240234, + "step": 25560 + }, + { + "epoch": 1.52, + "learning_rate": 2.8630590432678694e-06, + "logits/chosen": -2.5986456871032715, + "logits/rejected": -1.9594370126724243, + "logps/chosen": -621.4139404296875, + "logps/rejected": -2103.12060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.508350372314453, + "rewards/margins": 15.058026313781738, + "rewards/rejected": -20.566375732421875, + "step": 25570 + }, + { + "epoch": 1.53, + "learning_rate": 2.8613427696035885e-06, + "logits/chosen": -2.4863121509552, + "logits/rejected": -1.7374919652938843, + "logps/chosen": -606.546630859375, + "logps/rejected": -2070.800537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.417147636413574, + "rewards/margins": 14.846399307250977, + "rewards/rejected": -20.2635440826416, + "step": 25580 + }, + { + "epoch": 1.53, + "learning_rate": 2.859626321988903e-06, + "logits/chosen": -2.534411907196045, + "logits/rejected": -1.8232452869415283, + "logps/chosen": -640.2349853515625, + "logps/rejected": -2210.59326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.708590030670166, + "rewards/margins": 15.953729629516602, + "rewards/rejected": -21.662317276000977, + "step": 25590 + }, + { + "epoch": 1.53, + "learning_rate": 2.8579097012501108e-06, + "logits/chosen": -2.5099234580993652, + "logits/rejected": -1.8327281475067139, + "logps/chosen": -644.3458251953125, + "logps/rejected": -2111.13720703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.774910926818848, + "rewards/margins": 14.897418022155762, + "rewards/rejected": -20.67232894897461, + "step": 25600 + }, + { + "epoch": 1.53, + "learning_rate": 2.8561929082135925e-06, + "logits/chosen": -2.5488498210906982, + "logits/rejected": -1.837891936302185, + "logps/chosen": -620.7623291015625, + "logps/rejected": -2159.56494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.46594762802124, + "rewards/margins": 15.674827575683594, + "rewards/rejected": -21.14077377319336, + "step": 25610 + }, + { + "epoch": 1.53, + "learning_rate": 2.8544759437058135e-06, + "logits/chosen": -2.506117582321167, + "logits/rejected": -1.8524996042251587, + "logps/chosen": -622.0333251953125, + "logps/rejected": -2053.569091796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.511847496032715, + "rewards/margins": 14.57774543762207, + "rewards/rejected": -20.089590072631836, + "step": 25620 + }, + { + "epoch": 1.53, + "learning_rate": 2.8527588085533184e-06, + "logits/chosen": -2.5583596229553223, + "logits/rejected": -1.8844659328460693, + "logps/chosen": -628.187744140625, + "logps/rejected": -1980.3199462890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.548740386962891, + "rewards/margins": 13.80088996887207, + "rewards/rejected": -19.34963035583496, + "step": 25630 + }, + { + "epoch": 1.53, + "learning_rate": 2.8510415035827394e-06, + "logits/chosen": -2.546630382537842, + "logits/rejected": -1.871336579322815, + "logps/chosen": -621.2403564453125, + "logps/rejected": -2169.4365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.500617980957031, + "rewards/margins": 15.7475004196167, + "rewards/rejected": -21.248119354248047, + "step": 25640 + }, + { + "epoch": 1.53, + "learning_rate": 2.8493240296207835e-06, + "logits/chosen": -2.5540928840637207, + "logits/rejected": -1.8088347911834717, + "logps/chosen": -617.7047729492188, + "logps/rejected": -2177.17626953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.473719596862793, + "rewards/margins": 15.848295211791992, + "rewards/rejected": -21.32201385498047, + "step": 25650 + }, + { + "epoch": 1.53, + "learning_rate": 2.847606387494245e-06, + "logits/chosen": -2.515106678009033, + "logits/rejected": -1.8609449863433838, + "logps/chosen": -631.3516845703125, + "logps/rejected": -2085.3310546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.597093105316162, + "rewards/margins": 14.825777053833008, + "rewards/rejected": -20.422870635986328, + "step": 25660 + }, + { + "epoch": 1.53, + "learning_rate": 2.8458885780299956e-06, + "logits/chosen": -2.5692501068115234, + "logits/rejected": -1.8366388082504272, + "logps/chosen": -619.34423828125, + "logps/rejected": -2022.525634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5083513259887695, + "rewards/margins": 14.286643981933594, + "rewards/rejected": -19.79499626159668, + "step": 25670 + }, + { + "epoch": 1.53, + "learning_rate": 2.844170602054989e-06, + "logits/chosen": -2.556790590286255, + "logits/rejected": -1.715447187423706, + "logps/chosen": -607.0555419921875, + "logps/rejected": -2061.267333984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.369542121887207, + "rewards/margins": 14.803723335266113, + "rewards/rejected": -20.173267364501953, + "step": 25680 + }, + { + "epoch": 1.53, + "learning_rate": 2.8424524603962588e-06, + "logits/chosen": -2.551392078399658, + "logits/rejected": -1.8959795236587524, + "logps/chosen": -643.2562866210938, + "logps/rejected": -2026.369140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.728809356689453, + "rewards/margins": 14.103769302368164, + "rewards/rejected": -19.832578659057617, + "step": 25690 + }, + { + "epoch": 1.53, + "learning_rate": 2.8407341538809192e-06, + "logits/chosen": -2.5531764030456543, + "logits/rejected": -1.7580238580703735, + "logps/chosen": -647.478515625, + "logps/rejected": -2066.85693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.757789134979248, + "rewards/margins": 14.466547966003418, + "rewards/rejected": -20.22433853149414, + "step": 25700 + }, + { + "epoch": 1.53, + "learning_rate": 2.8390156833361616e-06, + "logits/chosen": -2.5619797706604004, + "logits/rejected": -1.7875845432281494, + "logps/chosen": -632.98291015625, + "logps/rejected": -2091.0458984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.622527122497559, + "rewards/margins": 14.838888168334961, + "rewards/rejected": -20.461416244506836, + "step": 25710 + }, + { + "epoch": 1.53, + "learning_rate": 2.837297049589259e-06, + "logits/chosen": -2.5720181465148926, + "logits/rejected": -1.9108387231826782, + "logps/chosen": -627.5384521484375, + "logps/rejected": -2045.274169921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.591106414794922, + "rewards/margins": 14.416943550109863, + "rewards/rejected": -20.0080509185791, + "step": 25720 + }, + { + "epoch": 1.53, + "learning_rate": 2.8355782534675603e-06, + "logits/chosen": -2.5933384895324707, + "logits/rejected": -1.817470908164978, + "logps/chosen": -616.2100219726562, + "logps/rejected": -2022.9566650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.50900411605835, + "rewards/margins": 14.27903938293457, + "rewards/rejected": -19.788043975830078, + "step": 25730 + }, + { + "epoch": 1.53, + "learning_rate": 2.833859295798495e-06, + "logits/chosen": -2.5331544876098633, + "logits/rejected": -1.7288792133331299, + "logps/chosen": -627.0507202148438, + "logps/rejected": -2051.639892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.575649261474609, + "rewards/margins": 14.505337715148926, + "rewards/rejected": -20.08098793029785, + "step": 25740 + }, + { + "epoch": 1.54, + "learning_rate": 2.832140177409569e-06, + "logits/chosen": -2.5623419284820557, + "logits/rejected": -1.7832568883895874, + "logps/chosen": -636.9949951171875, + "logps/rejected": -2086.361328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.629823684692383, + "rewards/margins": 14.791979789733887, + "rewards/rejected": -20.421802520751953, + "step": 25750 + }, + { + "epoch": 1.54, + "learning_rate": 2.830420899128366e-06, + "logits/chosen": -2.5241475105285645, + "logits/rejected": -1.7779327630996704, + "logps/chosen": -625.2091674804688, + "logps/rejected": -2121.17919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.543976783752441, + "rewards/margins": 15.227758407592773, + "rewards/rejected": -20.7717342376709, + "step": 25760 + }, + { + "epoch": 1.54, + "learning_rate": 2.828701461782546e-06, + "logits/chosen": -2.54502010345459, + "logits/rejected": -1.9407259225845337, + "logps/chosen": -642.059814453125, + "logps/rejected": -2044.358642578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.75085973739624, + "rewards/margins": 14.2528076171875, + "rewards/rejected": -20.0036678314209, + "step": 25770 + }, + { + "epoch": 1.54, + "learning_rate": 2.826981866199847e-06, + "logits/chosen": -2.536264181137085, + "logits/rejected": -1.7927982807159424, + "logps/chosen": -645.201171875, + "logps/rejected": -2131.1123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.76552152633667, + "rewards/margins": 15.082501411437988, + "rewards/rejected": -20.848024368286133, + "step": 25780 + }, + { + "epoch": 1.54, + "learning_rate": 2.8252621132080817e-06, + "logits/chosen": -2.541018486022949, + "logits/rejected": -1.7952463626861572, + "logps/chosen": -619.9064331054688, + "logps/rejected": -2042.356689453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.493107795715332, + "rewards/margins": 14.492063522338867, + "rewards/rejected": -19.985172271728516, + "step": 25790 + }, + { + "epoch": 1.54, + "learning_rate": 2.8235422036351384e-06, + "logits/chosen": -2.556593179702759, + "logits/rejected": -1.8971706628799438, + "logps/chosen": -656.1596069335938, + "logps/rejected": -2125.09130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.853053092956543, + "rewards/margins": 14.957852363586426, + "rewards/rejected": -20.8109073638916, + "step": 25800 + }, + { + "epoch": 1.54, + "learning_rate": 2.8218221383089835e-06, + "logits/chosen": -2.5651564598083496, + "logits/rejected": -1.9042762517929077, + "logps/chosen": -632.527587890625, + "logps/rejected": -2066.86572265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6170525550842285, + "rewards/margins": 14.593034744262695, + "rewards/rejected": -20.210086822509766, + "step": 25810 + }, + { + "epoch": 1.54, + "learning_rate": 2.820101918057655e-06, + "logits/chosen": -2.511505603790283, + "logits/rejected": -1.8818743228912354, + "logps/chosen": -629.9519653320312, + "logps/rejected": -2079.99267578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.582190036773682, + "rewards/margins": 14.775205612182617, + "rewards/rejected": -20.357397079467773, + "step": 25820 + }, + { + "epoch": 1.54, + "learning_rate": 2.818381543709267e-06, + "logits/chosen": -2.5468554496765137, + "logits/rejected": -1.845607042312622, + "logps/chosen": -613.4219360351562, + "logps/rejected": -2106.518798828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.453192710876465, + "rewards/margins": 15.17332649230957, + "rewards/rejected": -20.626516342163086, + "step": 25830 + }, + { + "epoch": 1.54, + "learning_rate": 2.8166610160920082e-06, + "logits/chosen": -2.5291457176208496, + "logits/rejected": -1.8620325326919556, + "logps/chosen": -626.9110717773438, + "logps/rejected": -2067.251220703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.575301170349121, + "rewards/margins": 14.6643705368042, + "rewards/rejected": -20.23967170715332, + "step": 25840 + }, + { + "epoch": 1.54, + "learning_rate": 2.81494033603414e-06, + "logits/chosen": -2.5294909477233887, + "logits/rejected": -1.8585846424102783, + "logps/chosen": -631.4614868164062, + "logps/rejected": -2188.0078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.547123432159424, + "rewards/margins": 15.893094062805176, + "rewards/rejected": -21.440217971801758, + "step": 25850 + }, + { + "epoch": 1.54, + "learning_rate": 2.813219504363998e-06, + "logits/chosen": -2.5448691844940186, + "logits/rejected": -1.9046125411987305, + "logps/chosen": -630.0167236328125, + "logps/rejected": -2122.49462890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.610383987426758, + "rewards/margins": 15.171361923217773, + "rewards/rejected": -20.7817440032959, + "step": 25860 + }, + { + "epoch": 1.54, + "learning_rate": 2.811498521909991e-06, + "logits/chosen": -2.5402541160583496, + "logits/rejected": -1.8354276418685913, + "logps/chosen": -639.9490966796875, + "logps/rejected": -2161.70751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.616918087005615, + "rewards/margins": 15.560409545898438, + "rewards/rejected": -21.177326202392578, + "step": 25870 + }, + { + "epoch": 1.54, + "learning_rate": 2.8097773895005992e-06, + "logits/chosen": -2.5622098445892334, + "logits/rejected": -1.9094823598861694, + "logps/chosen": -628.8680419921875, + "logps/rejected": -2098.02099609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.545065879821777, + "rewards/margins": 14.976776123046875, + "rewards/rejected": -20.52184295654297, + "step": 25880 + }, + { + "epoch": 1.54, + "learning_rate": 2.8080561079643758e-06, + "logits/chosen": -2.542921543121338, + "logits/rejected": -1.8828290700912476, + "logps/chosen": -671.5943603515625, + "logps/rejected": -2066.925537109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.004670143127441, + "rewards/margins": 14.221609115600586, + "rewards/rejected": -20.22627830505371, + "step": 25890 + }, + { + "epoch": 1.54, + "learning_rate": 2.8063346781299466e-06, + "logits/chosen": -2.5028040409088135, + "logits/rejected": -1.7871729135513306, + "logps/chosen": -630.842529296875, + "logps/rejected": -2151.52294921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.590022087097168, + "rewards/margins": 15.491212844848633, + "rewards/rejected": -21.081233978271484, + "step": 25900 + }, + { + "epoch": 1.55, + "learning_rate": 2.8046131008260074e-06, + "logits/chosen": -2.498596668243408, + "logits/rejected": -1.7849948406219482, + "logps/chosen": -641.7122802734375, + "logps/rejected": -2160.756591796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.71860408782959, + "rewards/margins": 15.451353073120117, + "rewards/rejected": -21.169958114624023, + "step": 25910 + }, + { + "epoch": 1.55, + "learning_rate": 2.802891376881325e-06, + "logits/chosen": -2.6018431186676025, + "logits/rejected": -1.8464558124542236, + "logps/chosen": -606.263427734375, + "logps/rejected": -2185.218017578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.377284049987793, + "rewards/margins": 16.027935028076172, + "rewards/rejected": -21.40521812438965, + "step": 25920 + }, + { + "epoch": 1.55, + "learning_rate": 2.801169507124737e-06, + "logits/chosen": -2.555922031402588, + "logits/rejected": -1.8828067779541016, + "logps/chosen": -610.9158325195312, + "logps/rejected": -2088.039794921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.379183292388916, + "rewards/margins": 15.058794021606445, + "rewards/rejected": -20.437978744506836, + "step": 25930 + }, + { + "epoch": 1.55, + "learning_rate": 2.799447492385153e-06, + "logits/chosen": -2.5473246574401855, + "logits/rejected": -1.948357343673706, + "logps/chosen": -646.8180541992188, + "logps/rejected": -2065.20751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.725636959075928, + "rewards/margins": 14.494192123413086, + "rewards/rejected": -20.219829559326172, + "step": 25940 + }, + { + "epoch": 1.55, + "learning_rate": 2.7977253334915495e-06, + "logits/chosen": -2.5260424613952637, + "logits/rejected": -1.8404743671417236, + "logps/chosen": -622.3569946289062, + "logps/rejected": -2056.40869140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.575369834899902, + "rewards/margins": 14.556757926940918, + "rewards/rejected": -20.132129669189453, + "step": 25950 + }, + { + "epoch": 1.55, + "learning_rate": 2.7960030312729742e-06, + "logits/chosen": -2.591135263442993, + "logits/rejected": -1.985479712486267, + "logps/chosen": -602.1378173828125, + "logps/rejected": -2166.608642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.346865653991699, + "rewards/margins": 15.870892524719238, + "rewards/rejected": -21.217754364013672, + "step": 25960 + }, + { + "epoch": 1.55, + "learning_rate": 2.794280586558543e-06, + "logits/chosen": -2.51173734664917, + "logits/rejected": -1.7757914066314697, + "logps/chosen": -633.3289794921875, + "logps/rejected": -2022.4814453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.598567962646484, + "rewards/margins": 14.196199417114258, + "rewards/rejected": -19.794769287109375, + "step": 25970 + }, + { + "epoch": 1.55, + "learning_rate": 2.7925580001774422e-06, + "logits/chosen": -2.558990240097046, + "logits/rejected": -1.8675193786621094, + "logps/chosen": -644.4606323242188, + "logps/rejected": -2105.911865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.749826908111572, + "rewards/margins": 14.863920211791992, + "rewards/rejected": -20.61374855041504, + "step": 25980 + }, + { + "epoch": 1.55, + "learning_rate": 2.790835272958923e-06, + "logits/chosen": -2.5293939113616943, + "logits/rejected": -1.7752635478973389, + "logps/chosen": -636.233642578125, + "logps/rejected": -2119.442626953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6862311363220215, + "rewards/margins": 15.069506645202637, + "rewards/rejected": -20.7557373046875, + "step": 25990 + }, + { + "epoch": 1.55, + "learning_rate": 2.7891124057323075e-06, + "logits/chosen": -2.5963971614837646, + "logits/rejected": -1.847054123878479, + "logps/chosen": -609.0415649414062, + "logps/rejected": -2084.333740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.418876647949219, + "rewards/margins": 14.984899520874023, + "rewards/rejected": -20.403778076171875, + "step": 26000 + }, + { + "epoch": 1.55, + "learning_rate": 2.787389399326984e-06, + "logits/chosen": -2.5022287368774414, + "logits/rejected": -1.7984157800674438, + "logps/chosen": -701.4876708984375, + "logps/rejected": -2160.06787109375, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.298511981964111, + "rewards/margins": 14.840924263000488, + "rewards/rejected": -21.139436721801758, + "step": 26010 + }, + { + "epoch": 1.55, + "learning_rate": 2.7856662545724067e-06, + "logits/chosen": -2.4512112140655518, + "logits/rejected": -1.6188831329345703, + "logps/chosen": -813.9778442382812, + "logps/rejected": -2180.08251953125, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.431632995605469, + "rewards/margins": 13.930317878723145, + "rewards/rejected": -21.361948013305664, + "step": 26020 + }, + { + "epoch": 1.55, + "learning_rate": 2.783942972298098e-06, + "logits/chosen": -2.450787305831909, + "logits/rejected": -1.598054051399231, + "logps/chosen": -769.6063232421875, + "logps/rejected": -2216.920654296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.929068565368652, + "rewards/margins": 14.796772956848145, + "rewards/rejected": -21.725841522216797, + "step": 26030 + }, + { + "epoch": 1.55, + "learning_rate": 2.7822195533336466e-06, + "logits/chosen": -2.4876911640167236, + "logits/rejected": -1.7374372482299805, + "logps/chosen": -796.423828125, + "logps/rejected": -2130.822021484375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.231493949890137, + "rewards/margins": 13.623806953430176, + "rewards/rejected": -20.855300903320312, + "step": 26040 + }, + { + "epoch": 1.55, + "learning_rate": 2.7804959985087055e-06, + "logits/chosen": -2.466561794281006, + "logits/rejected": -1.6196342706680298, + "logps/chosen": -750.8328857421875, + "logps/rejected": -2189.64453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.797375679016113, + "rewards/margins": 14.650274276733398, + "rewards/rejected": -21.447650909423828, + "step": 26050 + }, + { + "epoch": 1.55, + "learning_rate": 2.7787723086529945e-06, + "logits/chosen": -2.5158793926239014, + "logits/rejected": -1.721046805381775, + "logps/chosen": -736.6946411132812, + "logps/rejected": -2141.91064453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.687957763671875, + "rewards/margins": 14.281817436218262, + "rewards/rejected": -20.969776153564453, + "step": 26060 + }, + { + "epoch": 1.55, + "learning_rate": 2.7770484845962976e-06, + "logits/chosen": -2.568408250808716, + "logits/rejected": -1.7620378732681274, + "logps/chosen": -745.3790893554688, + "logps/rejected": -2212.04248046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7178053855896, + "rewards/margins": 14.946415901184082, + "rewards/rejected": -21.66421890258789, + "step": 26070 + }, + { + "epoch": 1.56, + "learning_rate": 2.775324527168463e-06, + "logits/chosen": -2.5153415203094482, + "logits/rejected": -1.7101634740829468, + "logps/chosen": -703.1721801757812, + "logps/rejected": -2108.941162109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.322958469390869, + "rewards/margins": 14.314905166625977, + "rewards/rejected": -20.637861251831055, + "step": 26080 + }, + { + "epoch": 1.56, + "learning_rate": 2.773600437199406e-06, + "logits/chosen": -2.4652202129364014, + "logits/rejected": -1.7040637731552124, + "logps/chosen": -728.9530029296875, + "logps/rejected": -2108.11669921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.622197151184082, + "rewards/margins": 14.015172004699707, + "rewards/rejected": -20.63736915588379, + "step": 26090 + }, + { + "epoch": 1.56, + "learning_rate": 2.7718762155191015e-06, + "logits/chosen": -2.5247750282287598, + "logits/rejected": -1.7620251178741455, + "logps/chosen": -703.6787109375, + "logps/rejected": -2179.7578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.376276016235352, + "rewards/margins": 14.979168891906738, + "rewards/rejected": -21.355443954467773, + "step": 26100 + }, + { + "epoch": 1.56, + "learning_rate": 2.7701518629575896e-06, + "logits/chosen": -2.481851100921631, + "logits/rejected": -1.7610971927642822, + "logps/chosen": -764.0119018554688, + "logps/rejected": -2195.173583984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.887962341308594, + "rewards/margins": 14.61901569366455, + "rewards/rejected": -21.506977081298828, + "step": 26110 + }, + { + "epoch": 1.56, + "learning_rate": 2.768427380344975e-06, + "logits/chosen": -2.504711627960205, + "logits/rejected": -1.7090572118759155, + "logps/chosen": -714.5076904296875, + "logps/rejected": -2066.216796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.461683750152588, + "rewards/margins": 13.765459060668945, + "rewards/rejected": -20.227142333984375, + "step": 26120 + }, + { + "epoch": 1.56, + "learning_rate": 2.766702768511423e-06, + "logits/chosen": -2.4780185222625732, + "logits/rejected": -1.7054221630096436, + "logps/chosen": -732.8038330078125, + "logps/rejected": -2194.9560546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6310224533081055, + "rewards/margins": 14.86157512664795, + "rewards/rejected": -21.492597579956055, + "step": 26130 + }, + { + "epoch": 1.56, + "learning_rate": 2.764978028287161e-06, + "logits/chosen": -2.4678521156311035, + "logits/rejected": -1.7463696002960205, + "logps/chosen": -714.8494262695312, + "logps/rejected": -2116.805908203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.423226356506348, + "rewards/margins": 14.3021879196167, + "rewards/rejected": -20.725412368774414, + "step": 26140 + }, + { + "epoch": 1.56, + "learning_rate": 2.7632531605024796e-06, + "logits/chosen": -2.486067771911621, + "logits/rejected": -1.6775791645050049, + "logps/chosen": -734.749267578125, + "logps/rejected": -2078.15380859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6087141036987305, + "rewards/margins": 13.714075088500977, + "rewards/rejected": -20.32278823852539, + "step": 26150 + }, + { + "epoch": 1.56, + "learning_rate": 2.7615281659877304e-06, + "logits/chosen": -2.444579601287842, + "logits/rejected": -1.49117112159729, + "logps/chosen": -723.8541259765625, + "logps/rejected": -2092.323486328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.48062801361084, + "rewards/margins": 13.997472763061523, + "rewards/rejected": -20.478099822998047, + "step": 26160 + }, + { + "epoch": 1.56, + "learning_rate": 2.7598030455733254e-06, + "logits/chosen": -2.48372483253479, + "logits/rejected": -1.789494276046753, + "logps/chosen": -720.4764404296875, + "logps/rejected": -2056.17041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.538102626800537, + "rewards/margins": 13.580950736999512, + "rewards/rejected": -20.11905288696289, + "step": 26170 + }, + { + "epoch": 1.56, + "learning_rate": 2.758077800089738e-06, + "logits/chosen": -2.5023934841156006, + "logits/rejected": -1.6538660526275635, + "logps/chosen": -740.0347900390625, + "logps/rejected": -2151.642578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.656530857086182, + "rewards/margins": 14.409380912780762, + "rewards/rejected": -21.0659122467041, + "step": 26180 + }, + { + "epoch": 1.56, + "learning_rate": 2.7563524303675005e-06, + "logits/chosen": -2.4790353775024414, + "logits/rejected": -1.740210771560669, + "logps/chosen": -704.5325927734375, + "logps/rejected": -2107.009521484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.334046840667725, + "rewards/margins": 14.296506881713867, + "rewards/rejected": -20.630552291870117, + "step": 26190 + }, + { + "epoch": 1.56, + "learning_rate": 2.7546269372372065e-06, + "logits/chosen": -2.5092058181762695, + "logits/rejected": -1.745919942855835, + "logps/chosen": -728.7036743164062, + "logps/rejected": -2111.9501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.553582191467285, + "rewards/margins": 14.130642890930176, + "rewards/rejected": -20.684223175048828, + "step": 26200 + }, + { + "epoch": 1.56, + "learning_rate": 2.752901321529508e-06, + "logits/chosen": -2.4970779418945312, + "logits/rejected": -1.7387828826904297, + "logps/chosen": -708.947265625, + "logps/rejected": -2101.89599609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.448302268981934, + "rewards/margins": 14.122535705566406, + "rewards/rejected": -20.57084083557129, + "step": 26210 + }, + { + "epoch": 1.56, + "learning_rate": 2.7511755840751165e-06, + "logits/chosen": -2.5524942874908447, + "logits/rejected": -1.791619896888733, + "logps/chosen": -704.868408203125, + "logps/rejected": -2145.496337890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.339493274688721, + "rewards/margins": 14.661796569824219, + "rewards/rejected": -21.00128746032715, + "step": 26220 + }, + { + "epoch": 1.56, + "learning_rate": 2.749449725704802e-06, + "logits/chosen": -2.4769959449768066, + "logits/rejected": -1.76957106590271, + "logps/chosen": -724.6694946289062, + "logps/rejected": -2106.695068359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.515591621398926, + "rewards/margins": 14.098838806152344, + "rewards/rejected": -20.614431381225586, + "step": 26230 + }, + { + "epoch": 1.56, + "learning_rate": 2.7477237472493917e-06, + "logits/chosen": -2.5281310081481934, + "logits/rejected": -1.691719651222229, + "logps/chosen": -695.1613159179688, + "logps/rejected": -2102.653076171875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.23598051071167, + "rewards/margins": 14.345962524414062, + "rewards/rejected": -20.58194351196289, + "step": 26240 + }, + { + "epoch": 1.57, + "learning_rate": 2.7459976495397738e-06, + "logits/chosen": -2.498015880584717, + "logits/rejected": -1.748234748840332, + "logps/chosen": -695.8943481445312, + "logps/rejected": -2140.6982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.229950904846191, + "rewards/margins": 14.724270820617676, + "rewards/rejected": -20.954219818115234, + "step": 26250 + }, + { + "epoch": 1.57, + "learning_rate": 2.7442714334068902e-06, + "logits/chosen": -2.5165772438049316, + "logits/rejected": -1.7533279657363892, + "logps/chosen": -687.2047119140625, + "logps/rejected": -2133.55712890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.173638820648193, + "rewards/margins": 14.709573745727539, + "rewards/rejected": -20.88321304321289, + "step": 26260 + }, + { + "epoch": 1.57, + "learning_rate": 2.7425450996817427e-06, + "logits/chosen": -2.4756546020507812, + "logits/rejected": -1.7182992696762085, + "logps/chosen": -686.7769165039062, + "logps/rejected": -2137.739501953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.170814514160156, + "rewards/margins": 14.754717826843262, + "rewards/rejected": -20.925533294677734, + "step": 26270 + }, + { + "epoch": 1.57, + "learning_rate": 2.7408186491953862e-06, + "logits/chosen": -2.496995449066162, + "logits/rejected": -1.6749197244644165, + "logps/chosen": -664.4503173828125, + "logps/rejected": -2051.4501953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9423723220825195, + "rewards/margins": 14.127278327941895, + "rewards/rejected": -20.069650650024414, + "step": 26280 + }, + { + "epoch": 1.57, + "learning_rate": 2.7390920827789358e-06, + "logits/chosen": -2.506004810333252, + "logits/rejected": -1.7931712865829468, + "logps/chosen": -701.8387451171875, + "logps/rejected": -2100.45263671875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3102827072143555, + "rewards/margins": 14.229528427124023, + "rewards/rejected": -20.539812088012695, + "step": 26290 + }, + { + "epoch": 1.57, + "learning_rate": 2.7373654012635594e-06, + "logits/chosen": -2.5016961097717285, + "logits/rejected": -1.6965618133544922, + "logps/chosen": -675.9497680664062, + "logps/rejected": -2047.9879150390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.043055057525635, + "rewards/margins": 13.997800827026367, + "rewards/rejected": -20.040857315063477, + "step": 26300 + }, + { + "epoch": 1.57, + "learning_rate": 2.735638605480482e-06, + "logits/chosen": -2.48819899559021, + "logits/rejected": -1.7767791748046875, + "logps/chosen": -675.5988159179688, + "logps/rejected": -2050.140869140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.057031631469727, + "rewards/margins": 13.997854232788086, + "rewards/rejected": -20.054887771606445, + "step": 26310 + }, + { + "epoch": 1.57, + "learning_rate": 2.7339116962609826e-06, + "logits/chosen": -2.496124744415283, + "logits/rejected": -1.7542927265167236, + "logps/chosen": -705.5115966796875, + "logps/rejected": -2046.1181640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.404162406921387, + "rewards/margins": 13.601875305175781, + "rewards/rejected": -20.00603675842285, + "step": 26320 + }, + { + "epoch": 1.57, + "learning_rate": 2.7321846744363956e-06, + "logits/chosen": -2.558100938796997, + "logits/rejected": -1.7833553552627563, + "logps/chosen": -670.850341796875, + "logps/rejected": -2055.14599609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.989642143249512, + "rewards/margins": 14.097253799438477, + "rewards/rejected": -20.086896896362305, + "step": 26330 + }, + { + "epoch": 1.57, + "learning_rate": 2.730457540838109e-06, + "logits/chosen": -2.566539764404297, + "logits/rejected": -1.7523292303085327, + "logps/chosen": -666.1284790039062, + "logps/rejected": -2064.4228515625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.989565849304199, + "rewards/margins": 14.217799186706543, + "rewards/rejected": -20.207365036010742, + "step": 26340 + }, + { + "epoch": 1.57, + "learning_rate": 2.728730296297566e-06, + "logits/chosen": -2.5361487865448, + "logits/rejected": -1.8030836582183838, + "logps/chosen": -655.807373046875, + "logps/rejected": -2101.76416015625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.891304969787598, + "rewards/margins": 14.677291870117188, + "rewards/rejected": -20.568593978881836, + "step": 26350 + }, + { + "epoch": 1.57, + "learning_rate": 2.727002941646261e-06, + "logits/chosen": -2.5374717712402344, + "logits/rejected": -1.7549575567245483, + "logps/chosen": -659.4302978515625, + "logps/rejected": -1992.8482666015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.828588008880615, + "rewards/margins": 13.659322738647461, + "rewards/rejected": -19.4879093170166, + "step": 26360 + }, + { + "epoch": 1.57, + "learning_rate": 2.725275477715743e-06, + "logits/chosen": -2.520993709564209, + "logits/rejected": -1.8006690740585327, + "logps/chosen": -649.8729248046875, + "logps/rejected": -2020.1929931640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8153977394104, + "rewards/margins": 13.947954177856445, + "rewards/rejected": -19.763355255126953, + "step": 26370 + }, + { + "epoch": 1.57, + "learning_rate": 2.7235479053376124e-06, + "logits/chosen": -2.4653258323669434, + "logits/rejected": -1.8604612350463867, + "logps/chosen": -642.826416015625, + "logps/rejected": -2074.89599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.773848533630371, + "rewards/margins": 14.544229507446289, + "rewards/rejected": -20.318077087402344, + "step": 26380 + }, + { + "epoch": 1.57, + "learning_rate": 2.7218202253435234e-06, + "logits/chosen": -2.5423922538757324, + "logits/rejected": -1.7586396932601929, + "logps/chosen": -636.7875366210938, + "logps/rejected": -2179.5859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.592055320739746, + "rewards/margins": 15.74688720703125, + "rewards/rejected": -21.338939666748047, + "step": 26390 + }, + { + "epoch": 1.57, + "learning_rate": 2.7200924385651805e-06, + "logits/chosen": -2.5296971797943115, + "logits/rejected": -1.8857465982437134, + "logps/chosen": -637.0465698242188, + "logps/rejected": -2144.732666015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.605169296264648, + "rewards/margins": 15.386474609375, + "rewards/rejected": -20.99164390563965, + "step": 26400 + }, + { + "epoch": 1.57, + "learning_rate": 2.7183645458343407e-06, + "logits/chosen": -2.467458724975586, + "logits/rejected": -1.7834323644638062, + "logps/chosen": -659.732177734375, + "logps/rejected": -2104.2255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.894711971282959, + "rewards/margins": 14.701440811157227, + "rewards/rejected": -20.59615135192871, + "step": 26410 + }, + { + "epoch": 1.58, + "learning_rate": 2.7166365479828107e-06, + "logits/chosen": -2.4939777851104736, + "logits/rejected": -1.805511236190796, + "logps/chosen": -650.0393676757812, + "logps/rejected": -2035.0804443359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.767185211181641, + "rewards/margins": 14.156784057617188, + "rewards/rejected": -19.923969268798828, + "step": 26420 + }, + { + "epoch": 1.58, + "learning_rate": 2.7149084458424497e-06, + "logits/chosen": -2.5371944904327393, + "logits/rejected": -1.8472362756729126, + "logps/chosen": -657.1799926757812, + "logps/rejected": -2096.8037109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.880747318267822, + "rewards/margins": 14.648076057434082, + "rewards/rejected": -20.52882194519043, + "step": 26430 + }, + { + "epoch": 1.58, + "learning_rate": 2.713180240245166e-06, + "logits/chosen": -2.539539098739624, + "logits/rejected": -1.7568156719207764, + "logps/chosen": -649.8516845703125, + "logps/rejected": -2133.866943359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.704499244689941, + "rewards/margins": 15.194859504699707, + "rewards/rejected": -20.899356842041016, + "step": 26440 + }, + { + "epoch": 1.58, + "learning_rate": 2.7114519320229172e-06, + "logits/chosen": -2.5515995025634766, + "logits/rejected": -1.7955467700958252, + "logps/chosen": -654.161376953125, + "logps/rejected": -2066.35009765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.819704055786133, + "rewards/margins": 14.410616874694824, + "rewards/rejected": -20.23031997680664, + "step": 26450 + }, + { + "epoch": 1.58, + "learning_rate": 2.70972352200771e-06, + "logits/chosen": -2.5195529460906982, + "logits/rejected": -1.7508046627044678, + "logps/chosen": -645.3291625976562, + "logps/rejected": -2048.80224609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7593889236450195, + "rewards/margins": 14.296534538269043, + "rewards/rejected": -20.055925369262695, + "step": 26460 + }, + { + "epoch": 1.58, + "learning_rate": 2.7079950110316028e-06, + "logits/chosen": -2.5697412490844727, + "logits/rejected": -1.83827805519104, + "logps/chosen": -648.7833251953125, + "logps/rejected": -2109.4580078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.781604766845703, + "rewards/margins": 14.868426322937012, + "rewards/rejected": -20.6500301361084, + "step": 26470 + }, + { + "epoch": 1.58, + "learning_rate": 2.7062663999267002e-06, + "logits/chosen": -2.561314344406128, + "logits/rejected": -1.8798269033432007, + "logps/chosen": -640.1805419921875, + "logps/rejected": -2046.804443359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.690484523773193, + "rewards/margins": 14.328378677368164, + "rewards/rejected": -20.018863677978516, + "step": 26480 + }, + { + "epoch": 1.58, + "learning_rate": 2.7045376895251544e-06, + "logits/chosen": -2.525716543197632, + "logits/rejected": -1.7821521759033203, + "logps/chosen": -646.1390991210938, + "logps/rejected": -2042.296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.779331207275391, + "rewards/margins": 14.187365531921387, + "rewards/rejected": -19.966699600219727, + "step": 26490 + }, + { + "epoch": 1.58, + "learning_rate": 2.7028088806591685e-06, + "logits/chosen": -2.5458719730377197, + "logits/rejected": -1.8613097667694092, + "logps/chosen": -643.9950561523438, + "logps/rejected": -2184.4501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7664899826049805, + "rewards/margins": 15.625335693359375, + "rewards/rejected": -21.39182472229004, + "step": 26500 + }, + { + "epoch": 1.58, + "learning_rate": 2.7010799741609895e-06, + "logits/chosen": -2.5648765563964844, + "logits/rejected": -1.8194774389266968, + "logps/chosen": -655.3461303710938, + "logps/rejected": -2084.316162109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.777928829193115, + "rewards/margins": 14.605688095092773, + "rewards/rejected": -20.383617401123047, + "step": 26510 + }, + { + "epoch": 1.58, + "learning_rate": 2.6993509708629133e-06, + "logits/chosen": -2.540865421295166, + "logits/rejected": -1.845945119857788, + "logps/chosen": -641.9934692382812, + "logps/rejected": -2048.95166015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.680923938751221, + "rewards/margins": 14.345568656921387, + "rewards/rejected": -20.0264949798584, + "step": 26520 + }, + { + "epoch": 1.58, + "learning_rate": 2.6976218715972836e-06, + "logits/chosen": -2.5436859130859375, + "logits/rejected": -2.0442473888397217, + "logps/chosen": -628.0389404296875, + "logps/rejected": -1953.616943359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.59365701675415, + "rewards/margins": 13.49987506866455, + "rewards/rejected": -19.09353256225586, + "step": 26530 + }, + { + "epoch": 1.58, + "learning_rate": 2.6958926771964866e-06, + "logits/chosen": -2.520061492919922, + "logits/rejected": -1.7937800884246826, + "logps/chosen": -643.7100830078125, + "logps/rejected": -2095.683837890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.74096155166626, + "rewards/margins": 14.768579483032227, + "rewards/rejected": -20.509540557861328, + "step": 26540 + }, + { + "epoch": 1.58, + "learning_rate": 2.694163388492957e-06, + "logits/chosen": -2.4808452129364014, + "logits/rejected": -1.8373744487762451, + "logps/chosen": -649.1008911132812, + "logps/rejected": -2030.1614990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.794039249420166, + "rewards/margins": 14.06877613067627, + "rewards/rejected": -19.862812042236328, + "step": 26550 + }, + { + "epoch": 1.58, + "learning_rate": 2.6924340063191745e-06, + "logits/chosen": -2.5097384452819824, + "logits/rejected": -1.8280150890350342, + "logps/chosen": -643.3300170898438, + "logps/rejected": -1968.762939453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.725370407104492, + "rewards/margins": 13.517416000366211, + "rewards/rejected": -19.242786407470703, + "step": 26560 + }, + { + "epoch": 1.58, + "learning_rate": 2.690704531507664e-06, + "logits/chosen": -2.517413854598999, + "logits/rejected": -1.78667414188385, + "logps/chosen": -647.7485961914062, + "logps/rejected": -2007.0472412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.791871547698975, + "rewards/margins": 13.827093124389648, + "rewards/rejected": -19.61896324157715, + "step": 26570 + }, + { + "epoch": 1.58, + "learning_rate": 2.6889749648909946e-06, + "logits/chosen": -2.525158405303955, + "logits/rejected": -1.854827642440796, + "logps/chosen": -653.6370849609375, + "logps/rejected": -2035.42578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.866288185119629, + "rewards/margins": 14.046035766601562, + "rewards/rejected": -19.912322998046875, + "step": 26580 + }, + { + "epoch": 1.59, + "learning_rate": 2.6872453073017796e-06, + "logits/chosen": -2.541532039642334, + "logits/rejected": -1.783786416053772, + "logps/chosen": -642.9391479492188, + "logps/rejected": -1931.767822265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.67186164855957, + "rewards/margins": 13.207110404968262, + "rewards/rejected": -18.878971099853516, + "step": 26590 + }, + { + "epoch": 1.59, + "learning_rate": 2.6855155595726758e-06, + "logits/chosen": -2.533134937286377, + "logits/rejected": -1.7976611852645874, + "logps/chosen": -628.8135986328125, + "logps/rejected": -2213.80419921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.528882026672363, + "rewards/margins": 16.161190032958984, + "rewards/rejected": -21.690073013305664, + "step": 26600 + }, + { + "epoch": 1.59, + "learning_rate": 2.6837857225363837e-06, + "logits/chosen": -2.5133798122406006, + "logits/rejected": -1.7932164669036865, + "logps/chosen": -629.0156860351562, + "logps/rejected": -2096.399658203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.622100353240967, + "rewards/margins": 14.895502090454102, + "rewards/rejected": -20.517601013183594, + "step": 26610 + }, + { + "epoch": 1.59, + "learning_rate": 2.6820557970256483e-06, + "logits/chosen": -2.546844005584717, + "logits/rejected": -1.8233697414398193, + "logps/chosen": -651.1301879882812, + "logps/rejected": -2054.79541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.807178497314453, + "rewards/margins": 14.280695915222168, + "rewards/rejected": -20.08787727355957, + "step": 26620 + }, + { + "epoch": 1.59, + "learning_rate": 2.6803257838732537e-06, + "logits/chosen": -2.567558765411377, + "logits/rejected": -1.9220447540283203, + "logps/chosen": -628.4864501953125, + "logps/rejected": -2059.760986328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.562158107757568, + "rewards/margins": 14.590354919433594, + "rewards/rejected": -20.152509689331055, + "step": 26630 + }, + { + "epoch": 1.59, + "learning_rate": 2.6785956839120294e-06, + "logits/chosen": -2.5251965522766113, + "logits/rejected": -1.8262304067611694, + "logps/chosen": -626.1928100585938, + "logps/rejected": -2085.15087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.514039993286133, + "rewards/margins": 14.899385452270508, + "rewards/rejected": -20.41342544555664, + "step": 26640 + }, + { + "epoch": 1.59, + "learning_rate": 2.676865497974845e-06, + "logits/chosen": -2.5677144527435303, + "logits/rejected": -1.913987398147583, + "logps/chosen": -640.4356689453125, + "logps/rejected": -2066.70849609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.755256175994873, + "rewards/margins": 14.455592155456543, + "rewards/rejected": -20.21084976196289, + "step": 26650 + }, + { + "epoch": 1.59, + "learning_rate": 2.6751352268946117e-06, + "logits/chosen": -2.5091662406921387, + "logits/rejected": -1.751660704612732, + "logps/chosen": -639.7566528320312, + "logps/rejected": -2154.80126953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.674595355987549, + "rewards/margins": 15.427578926086426, + "rewards/rejected": -21.102174758911133, + "step": 26660 + }, + { + "epoch": 1.59, + "learning_rate": 2.6734048715042824e-06, + "logits/chosen": -2.578368663787842, + "logits/rejected": -1.8745520114898682, + "logps/chosen": -645.7298583984375, + "logps/rejected": -2065.76123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.759889125823975, + "rewards/margins": 14.453539848327637, + "rewards/rejected": -20.213428497314453, + "step": 26670 + }, + { + "epoch": 1.59, + "learning_rate": 2.671674432636851e-06, + "logits/chosen": -2.555617094039917, + "logits/rejected": -1.8299480676651, + "logps/chosen": -632.2430419921875, + "logps/rejected": -2221.21923828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.664945125579834, + "rewards/margins": 16.095027923583984, + "rewards/rejected": -21.759973526000977, + "step": 26680 + }, + { + "epoch": 1.59, + "learning_rate": 2.669943911125349e-06, + "logits/chosen": -2.4938857555389404, + "logits/rejected": -1.922369360923767, + "logps/chosen": -638.1082763671875, + "logps/rejected": -2016.5894775390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.682628631591797, + "rewards/margins": 14.029500961303711, + "rewards/rejected": -19.71213150024414, + "step": 26690 + }, + { + "epoch": 1.59, + "learning_rate": 2.668213307802851e-06, + "logits/chosen": -2.536353588104248, + "logits/rejected": -1.8547699451446533, + "logps/chosen": -635.3897705078125, + "logps/rejected": -2075.063232421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.694400787353516, + "rewards/margins": 14.605748176574707, + "rewards/rejected": -20.300148010253906, + "step": 26700 + }, + { + "epoch": 1.59, + "learning_rate": 2.6664826235024684e-06, + "logits/chosen": -2.53605318069458, + "logits/rejected": -1.8358453512191772, + "logps/chosen": -642.7594604492188, + "logps/rejected": -2107.14306640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.675088882446289, + "rewards/margins": 14.949917793273926, + "rewards/rejected": -20.6250057220459, + "step": 26710 + }, + { + "epoch": 1.59, + "learning_rate": 2.664751859057353e-06, + "logits/chosen": -2.5563626289367676, + "logits/rejected": -1.9132694005966187, + "logps/chosen": -635.7589721679688, + "logps/rejected": -2106.067626953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5882697105407715, + "rewards/margins": 15.030500411987305, + "rewards/rejected": -20.618770599365234, + "step": 26720 + }, + { + "epoch": 1.59, + "learning_rate": 2.663021015300695e-06, + "logits/chosen": -2.539952039718628, + "logits/rejected": -1.8434722423553467, + "logps/chosen": -634.4912109375, + "logps/rejected": -2077.943359375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.653343677520752, + "rewards/margins": 14.679634094238281, + "rewards/rejected": -20.332977294921875, + "step": 26730 + }, + { + "epoch": 1.59, + "learning_rate": 2.6612900930657215e-06, + "logits/chosen": -2.560845136642456, + "logits/rejected": -1.9498207569122314, + "logps/chosen": -644.62353515625, + "logps/rejected": -2037.807861328125, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.729599952697754, + "rewards/margins": 14.198532104492188, + "rewards/rejected": -19.928131103515625, + "step": 26740 + }, + { + "epoch": 1.6, + "learning_rate": 2.6595590931857e-06, + "logits/chosen": -2.5427136421203613, + "logits/rejected": -1.9871629476547241, + "logps/chosen": -653.5927734375, + "logps/rejected": -2058.700927734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.865593433380127, + "rewards/margins": 14.284521102905273, + "rewards/rejected": -20.150115966796875, + "step": 26750 + }, + { + "epoch": 1.6, + "learning_rate": 2.657828016493933e-06, + "logits/chosen": -2.5365054607391357, + "logits/rejected": -1.924595832824707, + "logps/chosen": -672.3150634765625, + "logps/rejected": -2075.89501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.995020389556885, + "rewards/margins": 14.340719223022461, + "rewards/rejected": -20.335739135742188, + "step": 26760 + }, + { + "epoch": 1.6, + "learning_rate": 2.656096863823761e-06, + "logits/chosen": -2.5203258991241455, + "logits/rejected": -1.8818038702011108, + "logps/chosen": -653.137451171875, + "logps/rejected": -2111.24560546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.845475196838379, + "rewards/margins": 14.804402351379395, + "rewards/rejected": -20.64987564086914, + "step": 26770 + }, + { + "epoch": 1.6, + "learning_rate": 2.6543656360085608e-06, + "logits/chosen": -2.542907476425171, + "logits/rejected": -1.9003502130508423, + "logps/chosen": -681.0687255859375, + "logps/rejected": -2021.134033203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.106961727142334, + "rewards/margins": 13.656850814819336, + "rewards/rejected": -19.763813018798828, + "step": 26780 + }, + { + "epoch": 1.6, + "learning_rate": 2.6526343338817445e-06, + "logits/chosen": -2.536038637161255, + "logits/rejected": -1.8939288854599, + "logps/chosen": -654.9080200195312, + "logps/rejected": -2282.01904296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.809891700744629, + "rewards/margins": 16.565982818603516, + "rewards/rejected": -22.37587547302246, + "step": 26790 + }, + { + "epoch": 1.6, + "learning_rate": 2.650902958276763e-06, + "logits/chosen": -2.557924509048462, + "logits/rejected": -1.789305329322815, + "logps/chosen": -665.0784912109375, + "logps/rejected": -2152.53564453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.934021949768066, + "rewards/margins": 15.142862319946289, + "rewards/rejected": -21.076881408691406, + "step": 26800 + }, + { + "epoch": 1.6, + "learning_rate": 2.6491715100270993e-06, + "logits/chosen": -2.5423333644866943, + "logits/rejected": -2.0578532218933105, + "logps/chosen": -646.4002685546875, + "logps/rejected": -2050.161376953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.753661155700684, + "rewards/margins": 14.30846118927002, + "rewards/rejected": -20.062122344970703, + "step": 26810 + }, + { + "epoch": 1.6, + "learning_rate": 2.647439989966272e-06, + "logits/chosen": -2.5065691471099854, + "logits/rejected": -1.8256423473358154, + "logps/chosen": -645.5091552734375, + "logps/rejected": -2127.30419921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.749955177307129, + "rewards/margins": 15.071635246276855, + "rewards/rejected": -20.821590423583984, + "step": 26820 + }, + { + "epoch": 1.6, + "learning_rate": 2.6457083989278353e-06, + "logits/chosen": -2.577025890350342, + "logits/rejected": -2.030134677886963, + "logps/chosen": -666.71484375, + "logps/rejected": -2147.814697265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.936078071594238, + "rewards/margins": 15.107617378234863, + "rewards/rejected": -21.04369354248047, + "step": 26830 + }, + { + "epoch": 1.6, + "learning_rate": 2.6439767377453774e-06, + "logits/chosen": -2.5793185234069824, + "logits/rejected": -1.9979051351547241, + "logps/chosen": -658.1707153320312, + "logps/rejected": -2159.5234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8468170166015625, + "rewards/margins": 15.304194450378418, + "rewards/rejected": -21.151012420654297, + "step": 26840 + }, + { + "epoch": 1.6, + "learning_rate": 2.6422450072525198e-06, + "logits/chosen": -2.513167142868042, + "logits/rejected": -1.8761107921600342, + "logps/chosen": -666.5557250976562, + "logps/rejected": -2163.30712890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.964515686035156, + "rewards/margins": 15.22291374206543, + "rewards/rejected": -21.18743133544922, + "step": 26850 + }, + { + "epoch": 1.6, + "learning_rate": 2.640513208282917e-06, + "logits/chosen": -2.5634329319000244, + "logits/rejected": -1.8702280521392822, + "logps/chosen": -662.5944213867188, + "logps/rejected": -2083.678955078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.924008369445801, + "rewards/margins": 14.471527099609375, + "rewards/rejected": -20.39553451538086, + "step": 26860 + }, + { + "epoch": 1.6, + "learning_rate": 2.6387813416702585e-06, + "logits/chosen": -2.5621376037597656, + "logits/rejected": -1.9419400691986084, + "logps/chosen": -664.8590087890625, + "logps/rejected": -2193.26611328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.945333957672119, + "rewards/margins": 15.553631782531738, + "rewards/rejected": -21.498966217041016, + "step": 26870 + }, + { + "epoch": 1.6, + "learning_rate": 2.6370494082482632e-06, + "logits/chosen": -2.5742383003234863, + "logits/rejected": -1.9837528467178345, + "logps/chosen": -676.4782104492188, + "logps/rejected": -2138.16064453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.971518516540527, + "rewards/margins": 14.96491813659668, + "rewards/rejected": -20.93643569946289, + "step": 26880 + }, + { + "epoch": 1.6, + "learning_rate": 2.6353174088506854e-06, + "logits/chosen": -2.5341243743896484, + "logits/rejected": -1.8776359558105469, + "logps/chosen": -652.8712768554688, + "logps/rejected": -2102.06689453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.830561637878418, + "rewards/margins": 14.7565336227417, + "rewards/rejected": -20.587095260620117, + "step": 26890 + }, + { + "epoch": 1.6, + "learning_rate": 2.633585344311308e-06, + "logits/chosen": -2.5469393730163574, + "logits/rejected": -1.8060531616210938, + "logps/chosen": -678.5304565429688, + "logps/rejected": -2070.632080078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.092718601226807, + "rewards/margins": 14.176241874694824, + "rewards/rejected": -20.268962860107422, + "step": 26900 + }, + { + "epoch": 1.6, + "learning_rate": 2.6318532154639474e-06, + "logits/chosen": -2.5518195629119873, + "logits/rejected": -1.8794803619384766, + "logps/chosen": -671.265869140625, + "logps/rejected": -2164.966064453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9709882736206055, + "rewards/margins": 15.239143371582031, + "rewards/rejected": -21.210132598876953, + "step": 26910 + }, + { + "epoch": 1.61, + "learning_rate": 2.6301210231424508e-06, + "logits/chosen": -2.5687174797058105, + "logits/rejected": -1.9317796230316162, + "logps/chosen": -663.9234619140625, + "logps/rejected": -2149.28125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.979250431060791, + "rewards/margins": 15.077920913696289, + "rewards/rejected": -21.057170867919922, + "step": 26920 + }, + { + "epoch": 1.61, + "learning_rate": 2.628388768180695e-06, + "logits/chosen": -2.56235671043396, + "logits/rejected": -1.9833641052246094, + "logps/chosen": -680.1510009765625, + "logps/rejected": -2233.150634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.107985019683838, + "rewards/margins": 15.768694877624512, + "rewards/rejected": -21.87668228149414, + "step": 26930 + }, + { + "epoch": 1.61, + "learning_rate": 2.626656451412588e-06, + "logits/chosen": -2.5578696727752686, + "logits/rejected": -2.0092785358428955, + "logps/chosen": -660.5164794921875, + "logps/rejected": -2086.771240234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.894896507263184, + "rewards/margins": 14.536836624145508, + "rewards/rejected": -20.431734085083008, + "step": 26940 + }, + { + "epoch": 1.61, + "learning_rate": 2.6249240736720673e-06, + "logits/chosen": -2.526014804840088, + "logits/rejected": -1.8383735418319702, + "logps/chosen": -658.8933715820312, + "logps/rejected": -2173.356689453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.889861583709717, + "rewards/margins": 15.387521743774414, + "rewards/rejected": -21.277379989624023, + "step": 26950 + }, + { + "epoch": 1.61, + "learning_rate": 2.6231916357930985e-06, + "logits/chosen": -2.4877066612243652, + "logits/rejected": -1.890924096107483, + "logps/chosen": -642.8902587890625, + "logps/rejected": -2175.86572265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8048882484436035, + "rewards/margins": 15.507719039916992, + "rewards/rejected": -21.312604904174805, + "step": 26960 + }, + { + "epoch": 1.61, + "learning_rate": 2.6214591386096782e-06, + "logits/chosen": -2.547123432159424, + "logits/rejected": -1.955649733543396, + "logps/chosen": -671.9149169921875, + "logps/rejected": -2091.482421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.995157241821289, + "rewards/margins": 14.468940734863281, + "rewards/rejected": -20.464099884033203, + "step": 26970 + }, + { + "epoch": 1.61, + "learning_rate": 2.61972658295583e-06, + "logits/chosen": -2.5230212211608887, + "logits/rejected": -1.9369195699691772, + "logps/chosen": -686.711181640625, + "logps/rejected": -2035.654541015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.216495037078857, + "rewards/margins": 13.688989639282227, + "rewards/rejected": -19.90548324584961, + "step": 26980 + }, + { + "epoch": 1.61, + "learning_rate": 2.6179939696656063e-06, + "logits/chosen": -2.5323007106781006, + "logits/rejected": -1.786560297012329, + "logps/chosen": -670.177978515625, + "logps/rejected": -2183.84130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.983274936676025, + "rewards/margins": 15.407913208007812, + "rewards/rejected": -21.391191482543945, + "step": 26990 + }, + { + "epoch": 1.61, + "learning_rate": 2.6162612995730874e-06, + "logits/chosen": -2.576655626296997, + "logits/rejected": -1.959892988204956, + "logps/chosen": -677.5964965820312, + "logps/rejected": -2169.78564453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.083807945251465, + "rewards/margins": 15.158782958984375, + "rewards/rejected": -21.242589950561523, + "step": 27000 + }, + { + "epoch": 1.61, + "learning_rate": 2.61452857351238e-06, + "logits/chosen": -2.5089244842529297, + "logits/rejected": -1.6750257015228271, + "logps/chosen": -660.6555786132812, + "logps/rejected": -2217.74267578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.891000270843506, + "rewards/margins": 15.859458923339844, + "rewards/rejected": -21.750457763671875, + "step": 27010 + }, + { + "epoch": 1.61, + "learning_rate": 2.612795792317619e-06, + "logits/chosen": -2.5145163536071777, + "logits/rejected": -1.8771305084228516, + "logps/chosen": -675.1461181640625, + "logps/rejected": -2156.91943359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.012540817260742, + "rewards/margins": 15.103425979614258, + "rewards/rejected": -21.115966796875, + "step": 27020 + }, + { + "epoch": 1.61, + "learning_rate": 2.6110629568229647e-06, + "logits/chosen": -2.615041732788086, + "logits/rejected": -1.9409288167953491, + "logps/chosen": -672.6099243164062, + "logps/rejected": -2195.58056640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.988783359527588, + "rewards/margins": 15.512361526489258, + "rewards/rejected": -21.501142501831055, + "step": 27030 + }, + { + "epoch": 1.61, + "learning_rate": 2.609330067862605e-06, + "logits/chosen": -2.5349113941192627, + "logits/rejected": -2.0630507469177246, + "logps/chosen": -670.3439331054688, + "logps/rejected": -2086.544921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.019097328186035, + "rewards/margins": 14.405096054077148, + "rewards/rejected": -20.424192428588867, + "step": 27040 + }, + { + "epoch": 1.61, + "learning_rate": 2.6075971262707513e-06, + "logits/chosen": -2.5103185176849365, + "logits/rejected": -1.8639647960662842, + "logps/chosen": -684.6945190429688, + "logps/rejected": -2107.283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1429877281188965, + "rewards/margins": 14.488763809204102, + "rewards/rejected": -20.631750106811523, + "step": 27050 + }, + { + "epoch": 1.61, + "learning_rate": 2.6058641328816425e-06, + "logits/chosen": -2.5843491554260254, + "logits/rejected": -1.9295234680175781, + "logps/chosen": -714.5892333984375, + "logps/rejected": -2204.9111328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.420780181884766, + "rewards/margins": 15.182269096374512, + "rewards/rejected": -21.603052139282227, + "step": 27060 + }, + { + "epoch": 1.61, + "learning_rate": 2.6041310885295424e-06, + "logits/chosen": -2.58833646774292, + "logits/rejected": -1.9219586849212646, + "logps/chosen": -683.8314208984375, + "logps/rejected": -2123.494384765625, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.082146644592285, + "rewards/margins": 14.708831787109375, + "rewards/rejected": -20.790977478027344, + "step": 27070 + }, + { + "epoch": 1.61, + "learning_rate": 2.602397994048737e-06, + "logits/chosen": -2.6456897258758545, + "logits/rejected": -2.0425076484680176, + "logps/chosen": -546.4664306640625, + "logps/rejected": -1833.993408203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7365851402282715, + "rewards/margins": 13.168106079101562, + "rewards/rejected": -17.904691696166992, + "step": 27080 + }, + { + "epoch": 1.62, + "learning_rate": 2.6006648502735384e-06, + "logits/chosen": -2.6057231426239014, + "logits/rejected": -2.0927066802978516, + "logps/chosen": -522.8834838867188, + "logps/rejected": -1810.1422119140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5429534912109375, + "rewards/margins": 13.114581108093262, + "rewards/rejected": -17.657535552978516, + "step": 27090 + }, + { + "epoch": 1.62, + "learning_rate": 2.598931658038282e-06, + "logits/chosen": -2.618319272994995, + "logits/rejected": -2.0755720138549805, + "logps/chosen": -546.84130859375, + "logps/rejected": -1863.6383056640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7776336669921875, + "rewards/margins": 13.421747207641602, + "rewards/rejected": -18.19937515258789, + "step": 27100 + }, + { + "epoch": 1.62, + "learning_rate": 2.597198418177327e-06, + "logits/chosen": -2.6501827239990234, + "logits/rejected": -2.0153591632843018, + "logps/chosen": -530.1707763671875, + "logps/rejected": -1858.1396484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.580567359924316, + "rewards/margins": 13.560511589050293, + "rewards/rejected": -18.14107894897461, + "step": 27110 + }, + { + "epoch": 1.62, + "learning_rate": 2.5954651315250543e-06, + "logits/chosen": -2.5995945930480957, + "logits/rejected": -2.071059465408325, + "logps/chosen": -524.2247314453125, + "logps/rejected": -1889.6611328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.504097938537598, + "rewards/margins": 13.959091186523438, + "rewards/rejected": -18.46319007873535, + "step": 27120 + }, + { + "epoch": 1.62, + "learning_rate": 2.5937317989158694e-06, + "logits/chosen": -2.6435256004333496, + "logits/rejected": -2.087625026702881, + "logps/chosen": -541.875244140625, + "logps/rejected": -1855.6292724609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.648244380950928, + "rewards/margins": 13.46172046661377, + "rewards/rejected": -18.10996437072754, + "step": 27130 + }, + { + "epoch": 1.62, + "learning_rate": 2.591998421184197e-06, + "logits/chosen": -2.6117196083068848, + "logits/rejected": -2.0654006004333496, + "logps/chosen": -526.3292236328125, + "logps/rejected": -1804.183349609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.614415168762207, + "rewards/margins": 12.978700637817383, + "rewards/rejected": -17.593114852905273, + "step": 27140 + }, + { + "epoch": 1.62, + "learning_rate": 2.5902649991644855e-06, + "logits/chosen": -2.626885414123535, + "logits/rejected": -2.1018669605255127, + "logps/chosen": -534.3504638671875, + "logps/rejected": -1854.3505859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6338043212890625, + "rewards/margins": 13.4697265625, + "rewards/rejected": -18.10352897644043, + "step": 27150 + }, + { + "epoch": 1.62, + "learning_rate": 2.5885315336912058e-06, + "logits/chosen": -2.6313960552215576, + "logits/rejected": -2.0431883335113525, + "logps/chosen": -546.3695678710938, + "logps/rejected": -1882.414794921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.745999336242676, + "rewards/margins": 13.628606796264648, + "rewards/rejected": -18.37460708618164, + "step": 27160 + }, + { + "epoch": 1.62, + "learning_rate": 2.5867980255988462e-06, + "logits/chosen": -2.6019606590270996, + "logits/rejected": -2.0707173347473145, + "logps/chosen": -540.4913940429688, + "logps/rejected": -1840.1898193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.624998569488525, + "rewards/margins": 13.31879711151123, + "rewards/rejected": -17.943798065185547, + "step": 27170 + }, + { + "epoch": 1.62, + "learning_rate": 2.5850644757219177e-06, + "logits/chosen": -2.596395254135132, + "logits/rejected": -1.9997549057006836, + "logps/chosen": -532.37353515625, + "logps/rejected": -1872.798583984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.578314304351807, + "rewards/margins": 13.711585998535156, + "rewards/rejected": -18.289899826049805, + "step": 27180 + }, + { + "epoch": 1.62, + "learning_rate": 2.5833308848949523e-06, + "logits/chosen": -2.600571870803833, + "logits/rejected": -2.077988386154175, + "logps/chosen": -545.5903930664062, + "logps/rejected": -1881.5941162109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.717337608337402, + "rewards/margins": 13.65234088897705, + "rewards/rejected": -18.369678497314453, + "step": 27190 + }, + { + "epoch": 1.62, + "learning_rate": 2.5815972539524996e-06, + "logits/chosen": -2.587747573852539, + "logits/rejected": -2.002727508544922, + "logps/chosen": -538.1321411132812, + "logps/rejected": -1852.1728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.659018039703369, + "rewards/margins": 13.40569019317627, + "rewards/rejected": -18.064708709716797, + "step": 27200 + }, + { + "epoch": 1.62, + "learning_rate": 2.5798635837291304e-06, + "logits/chosen": -2.6041152477264404, + "logits/rejected": -2.068291187286377, + "logps/chosen": -558.2596435546875, + "logps/rejected": -1833.876708984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8523945808410645, + "rewards/margins": 13.032666206359863, + "rewards/rejected": -17.885059356689453, + "step": 27210 + }, + { + "epoch": 1.62, + "learning_rate": 2.5781298750594325e-06, + "logits/chosen": -2.6451964378356934, + "logits/rejected": -2.0170693397521973, + "logps/chosen": -536.3121337890625, + "logps/rejected": -1845.804443359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.6570539474487305, + "rewards/margins": 13.367193222045898, + "rewards/rejected": -18.024248123168945, + "step": 27220 + }, + { + "epoch": 1.62, + "learning_rate": 2.5763961287780137e-06, + "logits/chosen": -2.602128505706787, + "logits/rejected": -2.0438246726989746, + "logps/chosen": -539.8510131835938, + "logps/rejected": -1883.927734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.639753341674805, + "rewards/margins": 13.752555847167969, + "rewards/rejected": -18.392309188842773, + "step": 27230 + }, + { + "epoch": 1.62, + "learning_rate": 2.5746623457194996e-06, + "logits/chosen": -2.591639995574951, + "logits/rejected": -2.0909290313720703, + "logps/chosen": -522.9951782226562, + "logps/rejected": -1831.2720947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5403923988342285, + "rewards/margins": 13.318216323852539, + "rewards/rejected": -17.85860824584961, + "step": 27240 + }, + { + "epoch": 1.62, + "learning_rate": 2.5729285267185333e-06, + "logits/chosen": -2.6427597999572754, + "logits/rejected": -1.9635776281356812, + "logps/chosen": -542.7803955078125, + "logps/rejected": -1929.5230712890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.66673469543457, + "rewards/margins": 14.168815612792969, + "rewards/rejected": -18.83555030822754, + "step": 27250 + }, + { + "epoch": 1.63, + "learning_rate": 2.5711946726097754e-06, + "logits/chosen": -2.6342613697052, + "logits/rejected": -2.078782081604004, + "logps/chosen": -531.3794555664062, + "logps/rejected": -1859.0025634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.622547149658203, + "rewards/margins": 13.513731002807617, + "rewards/rejected": -18.136280059814453, + "step": 27260 + }, + { + "epoch": 1.63, + "learning_rate": 2.569460784227903e-06, + "logits/chosen": -2.607177257537842, + "logits/rejected": -2.057940721511841, + "logps/chosen": -531.5582275390625, + "logps/rejected": -1773.665283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.629995822906494, + "rewards/margins": 12.670268058776855, + "rewards/rejected": -17.30026626586914, + "step": 27270 + }, + { + "epoch": 1.63, + "learning_rate": 2.5677268624076094e-06, + "logits/chosen": -2.626335382461548, + "logits/rejected": -2.0978100299835205, + "logps/chosen": -535.5631713867188, + "logps/rejected": -1933.9755859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.719598293304443, + "rewards/margins": 14.183616638183594, + "rewards/rejected": -18.90321159362793, + "step": 27280 + }, + { + "epoch": 1.63, + "learning_rate": 2.5659929079836054e-06, + "logits/chosen": -2.6305556297302246, + "logits/rejected": -2.029299020767212, + "logps/chosen": -548.4736328125, + "logps/rejected": -1822.125244140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.750632286071777, + "rewards/margins": 13.029268264770508, + "rewards/rejected": -17.779897689819336, + "step": 27290 + }, + { + "epoch": 1.63, + "learning_rate": 2.5642589217906164e-06, + "logits/chosen": -2.627129316329956, + "logits/rejected": -2.078174114227295, + "logps/chosen": -531.8477172851562, + "logps/rejected": -1814.657958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.613266468048096, + "rewards/margins": 13.0897855758667, + "rewards/rejected": -17.703052520751953, + "step": 27300 + }, + { + "epoch": 1.63, + "learning_rate": 2.5625249046633832e-06, + "logits/chosen": -2.5889346599578857, + "logits/rejected": -2.0950770378112793, + "logps/chosen": -525.3782348632812, + "logps/rejected": -1870.2523193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.525247573852539, + "rewards/margins": 13.727409362792969, + "rewards/rejected": -18.25265884399414, + "step": 27310 + }, + { + "epoch": 1.63, + "learning_rate": 2.560790857436662e-06, + "logits/chosen": -2.606229305267334, + "logits/rejected": -2.1178927421569824, + "logps/chosen": -530.193603515625, + "logps/rejected": -1867.8343505859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.624324798583984, + "rewards/margins": 13.611913681030273, + "rewards/rejected": -18.23624038696289, + "step": 27320 + }, + { + "epoch": 1.63, + "learning_rate": 2.559056780945223e-06, + "logits/chosen": -2.600442409515381, + "logits/rejected": -2.0702462196350098, + "logps/chosen": -533.6383666992188, + "logps/rejected": -1906.9036865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.639902114868164, + "rewards/margins": 13.980550765991211, + "rewards/rejected": -18.620454788208008, + "step": 27330 + }, + { + "epoch": 1.63, + "learning_rate": 2.5573226760238496e-06, + "logits/chosen": -2.5774085521698, + "logits/rejected": -1.9138076305389404, + "logps/chosen": -521.6212768554688, + "logps/rejected": -1871.0146484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.493587017059326, + "rewards/margins": 13.770120620727539, + "rewards/rejected": -18.26370620727539, + "step": 27340 + }, + { + "epoch": 1.63, + "learning_rate": 2.555588543507341e-06, + "logits/chosen": -2.593085289001465, + "logits/rejected": -2.0675525665283203, + "logps/chosen": -531.1300659179688, + "logps/rejected": -1851.478515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.571700096130371, + "rewards/margins": 13.4932861328125, + "rewards/rejected": -18.064985275268555, + "step": 27350 + }, + { + "epoch": 1.63, + "learning_rate": 2.5538543842305085e-06, + "logits/chosen": -2.58685564994812, + "logits/rejected": -1.8568967580795288, + "logps/chosen": -546.7637939453125, + "logps/rejected": -1897.4127197265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.753139972686768, + "rewards/margins": 13.775276184082031, + "rewards/rejected": -18.52841567993164, + "step": 27360 + }, + { + "epoch": 1.63, + "learning_rate": 2.552120199028176e-06, + "logits/chosen": -2.653106212615967, + "logits/rejected": -2.079511880874634, + "logps/chosen": -520.1790161132812, + "logps/rejected": -1966.653076171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.478375434875488, + "rewards/margins": 14.746310234069824, + "rewards/rejected": -19.224687576293945, + "step": 27370 + }, + { + "epoch": 1.63, + "learning_rate": 2.55038598873518e-06, + "logits/chosen": -2.577693462371826, + "logits/rejected": -2.0528531074523926, + "logps/chosen": -532.7747802734375, + "logps/rejected": -1900.645263671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.629065036773682, + "rewards/margins": 13.930084228515625, + "rewards/rejected": -18.559146881103516, + "step": 27380 + }, + { + "epoch": 1.63, + "learning_rate": 2.5486517541863696e-06, + "logits/chosen": -2.6120147705078125, + "logits/rejected": -2.0402188301086426, + "logps/chosen": -542.4140014648438, + "logps/rejected": -1847.280029296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7242841720581055, + "rewards/margins": 13.305709838867188, + "rewards/rejected": -18.029993057250977, + "step": 27390 + }, + { + "epoch": 1.63, + "learning_rate": 2.546917496216606e-06, + "logits/chosen": -2.595921754837036, + "logits/rejected": -1.9591760635375977, + "logps/chosen": -559.8668212890625, + "logps/rejected": -1844.013671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.936262607574463, + "rewards/margins": 13.06232738494873, + "rewards/rejected": -17.99859046936035, + "step": 27400 + }, + { + "epoch": 1.63, + "learning_rate": 2.5451832156607602e-06, + "logits/chosen": -2.6040003299713135, + "logits/rejected": -2.088918924331665, + "logps/chosen": -552.52099609375, + "logps/rejected": -1885.240478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.823269844055176, + "rewards/margins": 13.59582805633545, + "rewards/rejected": -18.419097900390625, + "step": 27410 + }, + { + "epoch": 1.64, + "learning_rate": 2.5434489133537154e-06, + "logits/chosen": -2.59131121635437, + "logits/rejected": -1.9982225894927979, + "logps/chosen": -539.6328125, + "logps/rejected": -1872.7073974609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.752663612365723, + "rewards/margins": 13.515820503234863, + "rewards/rejected": -18.268484115600586, + "step": 27420 + }, + { + "epoch": 1.64, + "learning_rate": 2.5417145901303634e-06, + "logits/chosen": -2.620851993560791, + "logits/rejected": -2.096788167953491, + "logps/chosen": -543.0482788085938, + "logps/rejected": -1868.1624755859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.710433006286621, + "rewards/margins": 13.524052619934082, + "rewards/rejected": -18.234485626220703, + "step": 27430 + }, + { + "epoch": 1.64, + "learning_rate": 2.5399802468256085e-06, + "logits/chosen": -2.6699512004852295, + "logits/rejected": -2.0448286533355713, + "logps/chosen": -541.1151123046875, + "logps/rejected": -1962.072509765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7048749923706055, + "rewards/margins": 14.47742748260498, + "rewards/rejected": -19.182302474975586, + "step": 27440 + }, + { + "epoch": 1.64, + "learning_rate": 2.5382458842743634e-06, + "logits/chosen": -2.6164472103118896, + "logits/rejected": -2.073322057723999, + "logps/chosen": -528.2350463867188, + "logps/rejected": -1883.91015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.553404808044434, + "rewards/margins": 13.838232040405273, + "rewards/rejected": -18.39163589477539, + "step": 27450 + }, + { + "epoch": 1.64, + "learning_rate": 2.5365115033115494e-06, + "logits/chosen": -2.6371593475341797, + "logits/rejected": -2.0382134914398193, + "logps/chosen": -550.2452392578125, + "logps/rejected": -1815.460693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7377119064331055, + "rewards/margins": 12.985011100769043, + "rewards/rejected": -17.72272491455078, + "step": 27460 + }, + { + "epoch": 1.64, + "learning_rate": 2.5347771047720987e-06, + "logits/chosen": -2.6048145294189453, + "logits/rejected": -2.039114475250244, + "logps/chosen": -546.4927978515625, + "logps/rejected": -1848.616455078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.7520575523376465, + "rewards/margins": 13.292208671569824, + "rewards/rejected": -18.044265747070312, + "step": 27470 + }, + { + "epoch": 1.64, + "learning_rate": 2.53304268949095e-06, + "logits/chosen": -2.579556703567505, + "logits/rejected": -2.0140509605407715, + "logps/chosen": -546.1578979492188, + "logps/rejected": -1809.952392578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.779983997344971, + "rewards/margins": 12.89222240447998, + "rewards/rejected": -17.67220687866211, + "step": 27480 + }, + { + "epoch": 1.64, + "learning_rate": 2.5313082583030513e-06, + "logits/chosen": -2.616673707962036, + "logits/rejected": -2.012376308441162, + "logps/chosen": -545.6365966796875, + "logps/rejected": -1874.3646240234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.752992153167725, + "rewards/margins": 13.555148124694824, + "rewards/rejected": -18.30813980102539, + "step": 27490 + }, + { + "epoch": 1.64, + "learning_rate": 2.5295738120433573e-06, + "logits/chosen": -2.6426138877868652, + "logits/rejected": -2.00862979888916, + "logps/chosen": -552.69189453125, + "logps/rejected": -1881.8209228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.898518085479736, + "rewards/margins": 13.472763061523438, + "rewards/rejected": -18.371280670166016, + "step": 27500 + }, + { + "epoch": 1.64, + "learning_rate": 2.5278393515468312e-06, + "logits/chosen": -2.6024973392486572, + "logits/rejected": -2.0131776332855225, + "logps/chosen": -558.5849609375, + "logps/rejected": -1837.9156494140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.827402114868164, + "rewards/margins": 13.10912799835205, + "rewards/rejected": -17.936527252197266, + "step": 27510 + }, + { + "epoch": 1.64, + "learning_rate": 2.526104877648441e-06, + "logits/chosen": -2.603640079498291, + "logits/rejected": -2.0335137844085693, + "logps/chosen": -550.4426879882812, + "logps/rejected": -1874.0748291015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.874143600463867, + "rewards/margins": 13.422533988952637, + "rewards/rejected": -18.296680450439453, + "step": 27520 + }, + { + "epoch": 1.64, + "learning_rate": 2.5243703911831634e-06, + "logits/chosen": -2.575383186340332, + "logits/rejected": -2.0558228492736816, + "logps/chosen": -562.0652465820312, + "logps/rejected": -1808.799072265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.880160808563232, + "rewards/margins": 12.761380195617676, + "rewards/rejected": -17.64154052734375, + "step": 27530 + }, + { + "epoch": 1.64, + "learning_rate": 2.5226358929859793e-06, + "logits/chosen": -2.563906192779541, + "logits/rejected": -1.9899578094482422, + "logps/chosen": -573.6712036132812, + "logps/rejected": -1921.0435791015625, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.030488967895508, + "rewards/margins": 13.734402656555176, + "rewards/rejected": -18.764890670776367, + "step": 27540 + }, + { + "epoch": 1.64, + "learning_rate": 2.5209013838918765e-06, + "logits/chosen": -2.5860466957092285, + "logits/rejected": -1.949318289756775, + "logps/chosen": -606.6182250976562, + "logps/rejected": -1930.1549072265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.352025508880615, + "rewards/margins": 13.499285697937012, + "rewards/rejected": -18.851308822631836, + "step": 27550 + }, + { + "epoch": 1.64, + "learning_rate": 2.5191668647358485e-06, + "logits/chosen": -2.5997419357299805, + "logits/rejected": -2.0317206382751465, + "logps/chosen": -591.868408203125, + "logps/rejected": -1964.6448974609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1994829177856445, + "rewards/margins": 13.99591064453125, + "rewards/rejected": -19.19539451599121, + "step": 27560 + }, + { + "epoch": 1.64, + "learning_rate": 2.517432336352891e-06, + "logits/chosen": -2.5694127082824707, + "logits/rejected": -2.0088000297546387, + "logps/chosen": -594.1204833984375, + "logps/rejected": -1901.25390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.254246711730957, + "rewards/margins": 13.31432056427002, + "rewards/rejected": -18.568567276000977, + "step": 27570 + }, + { + "epoch": 1.64, + "learning_rate": 2.515697799578008e-06, + "logits/chosen": -2.5777580738067627, + "logits/rejected": -2.026108980178833, + "logps/chosen": -581.4094848632812, + "logps/rejected": -1951.818115234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.155762672424316, + "rewards/margins": 13.9241943359375, + "rewards/rejected": -19.079957962036133, + "step": 27580 + }, + { + "epoch": 1.65, + "learning_rate": 2.513963255246204e-06, + "logits/chosen": -2.6110036373138428, + "logits/rejected": -1.9494266510009766, + "logps/chosen": -576.9820556640625, + "logps/rejected": -1934.252197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.076207160949707, + "rewards/margins": 13.837564468383789, + "rewards/rejected": -18.913768768310547, + "step": 27590 + }, + { + "epoch": 1.65, + "learning_rate": 2.5122287041924897e-06, + "logits/chosen": -2.589337110519409, + "logits/rejected": -1.9487950801849365, + "logps/chosen": -586.609375, + "logps/rejected": -1957.337890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1056647300720215, + "rewards/margins": 14.025219917297363, + "rewards/rejected": -19.13088607788086, + "step": 27600 + }, + { + "epoch": 1.65, + "learning_rate": 2.5104941472518775e-06, + "logits/chosen": -2.616589069366455, + "logits/rejected": -1.957458734512329, + "logps/chosen": -608.3062744140625, + "logps/rejected": -1974.638427734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3829450607299805, + "rewards/margins": 13.924997329711914, + "rewards/rejected": -19.30794334411621, + "step": 27610 + }, + { + "epoch": 1.65, + "learning_rate": 2.508759585259382e-06, + "logits/chosen": -2.586125373840332, + "logits/rejected": -2.004580497741699, + "logps/chosen": -584.281005859375, + "logps/rejected": -2009.958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.117034435272217, + "rewards/margins": 14.525688171386719, + "rewards/rejected": -19.642724990844727, + "step": 27620 + }, + { + "epoch": 1.65, + "learning_rate": 2.507025019050022e-06, + "logits/chosen": -2.5736563205718994, + "logits/rejected": -1.9332239627838135, + "logps/chosen": -577.6915283203125, + "logps/rejected": -2018.8941650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.038025856018066, + "rewards/margins": 14.6981782913208, + "rewards/rejected": -19.736204147338867, + "step": 27630 + }, + { + "epoch": 1.65, + "learning_rate": 2.5052904494588186e-06, + "logits/chosen": -2.5499141216278076, + "logits/rejected": -1.8574033975601196, + "logps/chosen": -593.3758544921875, + "logps/rejected": -1961.6265869140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2234063148498535, + "rewards/margins": 13.951197624206543, + "rewards/rejected": -19.174602508544922, + "step": 27640 + }, + { + "epoch": 1.65, + "learning_rate": 2.503555877320793e-06, + "logits/chosen": -2.55401873588562, + "logits/rejected": -1.9419015645980835, + "logps/chosen": -598.54833984375, + "logps/rejected": -1881.6624755859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.330493927001953, + "rewards/margins": 13.050862312316895, + "rewards/rejected": -18.381359100341797, + "step": 27650 + }, + { + "epoch": 1.65, + "learning_rate": 2.5018213034709683e-06, + "logits/chosen": -2.5498080253601074, + "logits/rejected": -1.9579986333847046, + "logps/chosen": -602.8717651367188, + "logps/rejected": -1926.663818359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.351332664489746, + "rewards/margins": 13.474055290222168, + "rewards/rejected": -18.82538604736328, + "step": 27660 + }, + { + "epoch": 1.65, + "learning_rate": 2.5000867287443676e-06, + "logits/chosen": -2.5660533905029297, + "logits/rejected": -2.001296043395996, + "logps/chosen": -601.375244140625, + "logps/rejected": -1818.648681640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.278928756713867, + "rewards/margins": 12.481436729431152, + "rewards/rejected": -17.760366439819336, + "step": 27670 + }, + { + "epoch": 1.65, + "learning_rate": 2.498352153976016e-06, + "logits/chosen": -2.5730974674224854, + "logits/rejected": -1.939121961593628, + "logps/chosen": -612.6927490234375, + "logps/rejected": -1881.3922119140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.438349723815918, + "rewards/margins": 12.941433906555176, + "rewards/rejected": -18.37978172302246, + "step": 27680 + }, + { + "epoch": 1.65, + "learning_rate": 2.496617580000937e-06, + "logits/chosen": -2.5654101371765137, + "logits/rejected": -1.848726511001587, + "logps/chosen": -616.60888671875, + "logps/rejected": -1958.4892578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.499780178070068, + "rewards/margins": 13.644195556640625, + "rewards/rejected": -19.143978118896484, + "step": 27690 + }, + { + "epoch": 1.65, + "learning_rate": 2.4948830076541554e-06, + "logits/chosen": -2.5809326171875, + "logits/rejected": -1.9766706228256226, + "logps/chosen": -609.9451904296875, + "logps/rejected": -1956.8714599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.365776062011719, + "rewards/margins": 13.75123119354248, + "rewards/rejected": -19.117008209228516, + "step": 27700 + }, + { + "epoch": 1.65, + "learning_rate": 2.4931484377706934e-06, + "logits/chosen": -2.566723585128784, + "logits/rejected": -1.9684383869171143, + "logps/chosen": -636.1339721679688, + "logps/rejected": -1984.182861328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.649887561798096, + "rewards/margins": 13.734970092773438, + "rewards/rejected": -19.384857177734375, + "step": 27710 + }, + { + "epoch": 1.65, + "learning_rate": 2.491413871185574e-06, + "logits/chosen": -2.6079468727111816, + "logits/rejected": -2.066385269165039, + "logps/chosen": -608.8988037109375, + "logps/rejected": -1929.003173828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.318490505218506, + "rewards/margins": 13.533491134643555, + "rewards/rejected": -18.85198402404785, + "step": 27720 + }, + { + "epoch": 1.65, + "learning_rate": 2.489679308733816e-06, + "logits/chosen": -2.570687770843506, + "logits/rejected": -1.9577038288116455, + "logps/chosen": -620.7601318359375, + "logps/rejected": -1959.8668212890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.505156993865967, + "rewards/margins": 13.640103340148926, + "rewards/rejected": -19.145259857177734, + "step": 27730 + }, + { + "epoch": 1.65, + "learning_rate": 2.48794475125044e-06, + "logits/chosen": -2.595064640045166, + "logits/rejected": -1.9454326629638672, + "logps/chosen": -605.8153076171875, + "logps/rejected": -1949.511474609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.371548175811768, + "rewards/margins": 13.689447402954102, + "rewards/rejected": -19.060993194580078, + "step": 27740 + }, + { + "epoch": 1.65, + "learning_rate": 2.486210199570459e-06, + "logits/chosen": -2.5661160945892334, + "logits/rejected": -1.9077180624008179, + "logps/chosen": -616.5209350585938, + "logps/rejected": -2009.475341796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.442381858825684, + "rewards/margins": 14.199007987976074, + "rewards/rejected": -19.641389846801758, + "step": 27750 + }, + { + "epoch": 1.66, + "learning_rate": 2.484475654528889e-06, + "logits/chosen": -2.5278327465057373, + "logits/rejected": -1.9573981761932373, + "logps/chosen": -593.6532592773438, + "logps/rejected": -1941.9896240234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.228815078735352, + "rewards/margins": 13.742376327514648, + "rewards/rejected": -18.97119140625, + "step": 27760 + }, + { + "epoch": 1.66, + "learning_rate": 2.482741116960738e-06, + "logits/chosen": -2.620556354522705, + "logits/rejected": -2.086730480194092, + "logps/chosen": -568.4049682617188, + "logps/rejected": -1968.0462646484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.997622013092041, + "rewards/margins": 14.23011302947998, + "rewards/rejected": -19.22773551940918, + "step": 27770 + }, + { + "epoch": 1.66, + "learning_rate": 2.4810065877010137e-06, + "logits/chosen": -2.6128287315368652, + "logits/rejected": -1.9859243631362915, + "logps/chosen": -586.490478515625, + "logps/rejected": -1917.711669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1924943923950195, + "rewards/margins": 13.553500175476074, + "rewards/rejected": -18.74599266052246, + "step": 27780 + }, + { + "epoch": 1.66, + "learning_rate": 2.479272067584717e-06, + "logits/chosen": -2.5489230155944824, + "logits/rejected": -1.9276883602142334, + "logps/chosen": -596.4427490234375, + "logps/rejected": -2019.211669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.246638774871826, + "rewards/margins": 14.501165390014648, + "rewards/rejected": -19.747806549072266, + "step": 27790 + }, + { + "epoch": 1.66, + "learning_rate": 2.4775375574468478e-06, + "logits/chosen": -2.614104747772217, + "logits/rejected": -1.9769747257232666, + "logps/chosen": -576.1280517578125, + "logps/rejected": -1978.0465087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.021981716156006, + "rewards/margins": 14.305557250976562, + "rewards/rejected": -19.32754135131836, + "step": 27800 + }, + { + "epoch": 1.66, + "learning_rate": 2.475803058122397e-06, + "logits/chosen": -2.57179594039917, + "logits/rejected": -1.899413824081421, + "logps/chosen": -581.3380126953125, + "logps/rejected": -1976.8310546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.07733154296875, + "rewards/margins": 14.253423690795898, + "rewards/rejected": -19.330759048461914, + "step": 27810 + }, + { + "epoch": 1.66, + "learning_rate": 2.4740685704463545e-06, + "logits/chosen": -2.583684206008911, + "logits/rejected": -1.9309101104736328, + "logps/chosen": -581.8092651367188, + "logps/rejected": -1959.279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.085458278656006, + "rewards/margins": 14.063448905944824, + "rewards/rejected": -19.148906707763672, + "step": 27820 + }, + { + "epoch": 1.66, + "learning_rate": 2.472334095253702e-06, + "logits/chosen": -2.5846664905548096, + "logits/rejected": -1.9868481159210205, + "logps/chosen": -587.164306640625, + "logps/rejected": -1996.331298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.180344581604004, + "rewards/margins": 14.345130920410156, + "rewards/rejected": -19.525476455688477, + "step": 27830 + }, + { + "epoch": 1.66, + "learning_rate": 2.470599633379415e-06, + "logits/chosen": -2.5996344089508057, + "logits/rejected": -2.0373454093933105, + "logps/chosen": -572.4346313476562, + "logps/rejected": -2006.2855224609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.036869049072266, + "rewards/margins": 14.580671310424805, + "rewards/rejected": -19.61754035949707, + "step": 27840 + }, + { + "epoch": 1.66, + "learning_rate": 2.4688651856584648e-06, + "logits/chosen": -2.5460188388824463, + "logits/rejected": -1.9299061298370361, + "logps/chosen": -582.9605712890625, + "logps/rejected": -1953.26171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.092165946960449, + "rewards/margins": 13.996356010437012, + "rewards/rejected": -19.08852195739746, + "step": 27850 + }, + { + "epoch": 1.66, + "learning_rate": 2.4671307529258127e-06, + "logits/chosen": -2.6225883960723877, + "logits/rejected": -2.0587704181671143, + "logps/chosen": -599.8072509765625, + "logps/rejected": -1962.8343505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2629194259643555, + "rewards/margins": 13.928720474243164, + "rewards/rejected": -19.191638946533203, + "step": 27860 + }, + { + "epoch": 1.66, + "learning_rate": 2.465396336016417e-06, + "logits/chosen": -2.592460870742798, + "logits/rejected": -1.9921579360961914, + "logps/chosen": -597.5233154296875, + "logps/rejected": -1939.8388671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.3485517501831055, + "rewards/margins": 13.609965324401855, + "rewards/rejected": -18.958515167236328, + "step": 27870 + }, + { + "epoch": 1.66, + "learning_rate": 2.4636619357652234e-06, + "logits/chosen": -2.5523123741149902, + "logits/rejected": -1.9464778900146484, + "logps/chosen": -586.8844604492188, + "logps/rejected": -1911.9127197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.120243549346924, + "rewards/margins": 13.563913345336914, + "rewards/rejected": -18.68415641784668, + "step": 27880 + }, + { + "epoch": 1.66, + "learning_rate": 2.461927553007175e-06, + "logits/chosen": -2.606196403503418, + "logits/rejected": -2.0472662448883057, + "logps/chosen": -584.3395385742188, + "logps/rejected": -1919.484619140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.107532978057861, + "rewards/margins": 13.642484664916992, + "rewards/rejected": -18.750019073486328, + "step": 27890 + }, + { + "epoch": 1.66, + "learning_rate": 2.460193188577201e-06, + "logits/chosen": -2.554248094558716, + "logits/rejected": -1.946117639541626, + "logps/chosen": -580.05615234375, + "logps/rejected": -1897.114990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.117770195007324, + "rewards/margins": 13.413920402526855, + "rewards/rejected": -18.531692504882812, + "step": 27900 + }, + { + "epoch": 1.66, + "learning_rate": 2.458458843310226e-06, + "logits/chosen": -2.5796725749969482, + "logits/rejected": -1.9771931171417236, + "logps/chosen": -582.7297973632812, + "logps/rejected": -2015.0843505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1068620681762695, + "rewards/margins": 14.59619426727295, + "rewards/rejected": -19.70305824279785, + "step": 27910 + }, + { + "epoch": 1.66, + "learning_rate": 2.456724518041163e-06, + "logits/chosen": -2.6058881282806396, + "logits/rejected": -2.0061492919921875, + "logps/chosen": -587.470458984375, + "logps/rejected": -1923.1146240234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.112555027008057, + "rewards/margins": 13.680293083190918, + "rewards/rejected": -18.792848587036133, + "step": 27920 + }, + { + "epoch": 1.67, + "learning_rate": 2.454990213604917e-06, + "logits/chosen": -2.589059829711914, + "logits/rejected": -1.8746755123138428, + "logps/chosen": -570.4216918945312, + "logps/rejected": -1999.8636474609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.983267784118652, + "rewards/margins": 14.570274353027344, + "rewards/rejected": -19.553543090820312, + "step": 27930 + }, + { + "epoch": 1.67, + "learning_rate": 2.453255930836381e-06, + "logits/chosen": -2.581610918045044, + "logits/rejected": -1.9770952463150024, + "logps/chosen": -597.2303466796875, + "logps/rejected": -1962.108154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2919921875, + "rewards/margins": 13.89263916015625, + "rewards/rejected": -19.18463134765625, + "step": 27940 + }, + { + "epoch": 1.67, + "learning_rate": 2.4515216705704396e-06, + "logits/chosen": -2.591005802154541, + "logits/rejected": -2.0204861164093018, + "logps/chosen": -571.5285034179688, + "logps/rejected": -2012.566650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.047174453735352, + "rewards/margins": 14.632412910461426, + "rewards/rejected": -19.67958641052246, + "step": 27950 + }, + { + "epoch": 1.67, + "learning_rate": 2.449787433641965e-06, + "logits/chosen": -2.552297353744507, + "logits/rejected": -1.947570562362671, + "logps/chosen": -586.0657348632812, + "logps/rejected": -1977.3616943359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.195405960083008, + "rewards/margins": 14.134709358215332, + "rewards/rejected": -19.330114364624023, + "step": 27960 + }, + { + "epoch": 1.67, + "learning_rate": 2.4480532208858195e-06, + "logits/chosen": -2.5898869037628174, + "logits/rejected": -2.016209602355957, + "logps/chosen": -582.2562255859375, + "logps/rejected": -1941.5892333984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.135979652404785, + "rewards/margins": 13.8308744430542, + "rewards/rejected": -18.966854095458984, + "step": 27970 + }, + { + "epoch": 1.67, + "learning_rate": 2.4463190331368527e-06, + "logits/chosen": -2.580319404602051, + "logits/rejected": -1.9338077306747437, + "logps/chosen": -582.3869018554688, + "logps/rejected": -1910.917724609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.104083061218262, + "rewards/margins": 13.562789916992188, + "rewards/rejected": -18.666873931884766, + "step": 27980 + }, + { + "epoch": 1.67, + "learning_rate": 2.4445848712299027e-06, + "logits/chosen": -2.585291624069214, + "logits/rejected": -1.9849998950958252, + "logps/chosen": -585.9993286132812, + "logps/rejected": -1981.8544921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.166003227233887, + "rewards/margins": 14.211435317993164, + "rewards/rejected": -19.377437591552734, + "step": 27990 + }, + { + "epoch": 1.67, + "learning_rate": 2.442850735999795e-06, + "logits/chosen": -2.589489459991455, + "logits/rejected": -2.0898995399475098, + "logps/chosen": -579.5090942382812, + "logps/rejected": -1860.059326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.102123260498047, + "rewards/margins": 13.071533203125, + "rewards/rejected": -18.173656463623047, + "step": 28000 + }, + { + "epoch": 1.67, + "eval_logits/chosen": -2.539252996444702, + "eval_logits/rejected": -2.179302215576172, + "eval_logps/chosen": -619.2333984375, + "eval_logps/rejected": -1821.8681640625, + "eval_loss": 6.643367669312283e-05, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": -5.5215888023376465, + "eval_rewards/margins": 12.241609573364258, + "eval_rewards/rejected": -17.763198852539062, + "eval_runtime": 3.9012, + "eval_samples_per_second": 1.282, + "eval_steps_per_second": 0.256, + "step": 28000 + }, + { + "epoch": 1.67, + "learning_rate": 2.441116628281343e-06, + "logits/chosen": -2.5569396018981934, + "logits/rejected": -1.9802652597427368, + "logps/chosen": -611.7565307617188, + "logps/rejected": -2015.822265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.415939807891846, + "rewards/margins": 14.3053617477417, + "rewards/rejected": -19.721302032470703, + "step": 28010 + }, + { + "epoch": 1.67, + "learning_rate": 2.4393825489093438e-06, + "logits/chosen": -2.5934290885925293, + "logits/rejected": -1.937894582748413, + "logps/chosen": -584.2136840820312, + "logps/rejected": -2004.219482421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.15344762802124, + "rewards/margins": 14.437841415405273, + "rewards/rejected": -19.591289520263672, + "step": 28020 + }, + { + "epoch": 1.67, + "learning_rate": 2.437648498718586e-06, + "logits/chosen": -2.638516902923584, + "logits/rejected": -1.9159901142120361, + "logps/chosen": -576.5700073242188, + "logps/rejected": -2071.722900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.025417327880859, + "rewards/margins": 15.256631851196289, + "rewards/rejected": -20.282047271728516, + "step": 28030 + }, + { + "epoch": 1.67, + "learning_rate": 2.4359144785438392e-06, + "logits/chosen": -2.610450506210327, + "logits/rejected": -2.0038371086120605, + "logps/chosen": -593.6237182617188, + "logps/rejected": -1984.6177978515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.255184650421143, + "rewards/margins": 14.149737358093262, + "rewards/rejected": -19.404926300048828, + "step": 28040 + }, + { + "epoch": 1.67, + "learning_rate": 2.434180489219863e-06, + "logits/chosen": -2.6145846843719482, + "logits/rejected": -1.9924100637435913, + "logps/chosen": -579.9957275390625, + "logps/rejected": -1971.835205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.071523666381836, + "rewards/margins": 14.198677062988281, + "rewards/rejected": -19.270198822021484, + "step": 28050 + }, + { + "epoch": 1.67, + "learning_rate": 2.4324465315813968e-06, + "logits/chosen": -2.5523409843444824, + "logits/rejected": -2.0314390659332275, + "logps/chosen": -595.9298706054688, + "logps/rejected": -1874.6812744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.272886753082275, + "rewards/margins": 13.026382446289062, + "rewards/rejected": -18.299270629882812, + "step": 28060 + }, + { + "epoch": 1.67, + "learning_rate": 2.430712606463171e-06, + "logits/chosen": -2.599637746810913, + "logits/rejected": -1.884040117263794, + "logps/chosen": -579.4631958007812, + "logps/rejected": -1931.790283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0950140953063965, + "rewards/margins": 13.772329330444336, + "rewards/rejected": -18.867341995239258, + "step": 28070 + }, + { + "epoch": 1.67, + "learning_rate": 2.428978714699894e-06, + "logits/chosen": -2.538830041885376, + "logits/rejected": -1.9085285663604736, + "logps/chosen": -578.81396484375, + "logps/rejected": -1946.7427978515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.071712970733643, + "rewards/margins": 13.937156677246094, + "rewards/rejected": -19.008869171142578, + "step": 28080 + }, + { + "epoch": 1.68, + "learning_rate": 2.4272448571262645e-06, + "logits/chosen": -2.579589605331421, + "logits/rejected": -2.0221400260925293, + "logps/chosen": -603.6220092773438, + "logps/rejected": -1996.985107421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.243558883666992, + "rewards/margins": 14.265748977661133, + "rewards/rejected": -19.509307861328125, + "step": 28090 + }, + { + "epoch": 1.68, + "learning_rate": 2.4255110345769594e-06, + "logits/chosen": -2.6266987323760986, + "logits/rejected": -1.9173511266708374, + "logps/chosen": -574.4216918945312, + "logps/rejected": -1929.171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.033370018005371, + "rewards/margins": 13.819185256958008, + "rewards/rejected": -18.852554321289062, + "step": 28100 + }, + { + "epoch": 1.68, + "learning_rate": 2.4237772478866403e-06, + "logits/chosen": -2.6012134552001953, + "logits/rejected": -1.9223945140838623, + "logps/chosen": -581.6619873046875, + "logps/rejected": -1968.4176025390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.130771160125732, + "rewards/margins": 14.10956859588623, + "rewards/rejected": -19.240339279174805, + "step": 28110 + }, + { + "epoch": 1.68, + "learning_rate": 2.422043497889954e-06, + "logits/chosen": -2.602792263031006, + "logits/rejected": -1.9880050420761108, + "logps/chosen": -575.3644409179688, + "logps/rejected": -1998.3431396484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.084478855133057, + "rewards/margins": 14.461278915405273, + "rewards/rejected": -19.545757293701172, + "step": 28120 + }, + { + "epoch": 1.68, + "learning_rate": 2.420309785421526e-06, + "logits/chosen": -2.5548782348632812, + "logits/rejected": -1.9191296100616455, + "logps/chosen": -585.8910522460938, + "logps/rejected": -1917.921142578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.132516860961914, + "rewards/margins": 13.60222339630127, + "rewards/rejected": -18.734739303588867, + "step": 28130 + }, + { + "epoch": 1.68, + "learning_rate": 2.4185761113159677e-06, + "logits/chosen": -2.548239231109619, + "logits/rejected": -1.9454071521759033, + "logps/chosen": -581.7535400390625, + "logps/rejected": -1973.5062255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.123360633850098, + "rewards/margins": 14.171201705932617, + "rewards/rejected": -19.294567108154297, + "step": 28140 + }, + { + "epoch": 1.68, + "learning_rate": 2.416842476407867e-06, + "logits/chosen": -2.5728840827941895, + "logits/rejected": -1.9047787189483643, + "logps/chosen": -599.4990844726562, + "logps/rejected": -1971.607666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.299673557281494, + "rewards/margins": 13.974108695983887, + "rewards/rejected": -19.273784637451172, + "step": 28150 + }, + { + "epoch": 1.68, + "learning_rate": 2.415108881531798e-06, + "logits/chosen": -2.633103847503662, + "logits/rejected": -1.9999544620513916, + "logps/chosen": -582.5903930664062, + "logps/rejected": -1906.8580322265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.062091827392578, + "rewards/margins": 13.560023307800293, + "rewards/rejected": -18.622119903564453, + "step": 28160 + }, + { + "epoch": 1.68, + "learning_rate": 2.4133753275223114e-06, + "logits/chosen": -2.6036765575408936, + "logits/rejected": -1.9366586208343506, + "logps/chosen": -604.3248901367188, + "logps/rejected": -1888.5230712890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.354428291320801, + "rewards/margins": 13.096638679504395, + "rewards/rejected": -18.451066970825195, + "step": 28170 + }, + { + "epoch": 1.68, + "learning_rate": 2.411641815213942e-06, + "logits/chosen": -2.5719640254974365, + "logits/rejected": -1.973160982131958, + "logps/chosen": -586.1710205078125, + "logps/rejected": -1937.2562255859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.161257266998291, + "rewards/margins": 13.761270523071289, + "rewards/rejected": -18.922527313232422, + "step": 28180 + }, + { + "epoch": 1.68, + "learning_rate": 2.4099083454412013e-06, + "logits/chosen": -2.588855028152466, + "logits/rejected": -1.946123480796814, + "logps/chosen": -579.9886474609375, + "logps/rejected": -1921.84765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.136246681213379, + "rewards/margins": 13.647364616394043, + "rewards/rejected": -18.78360939025879, + "step": 28190 + }, + { + "epoch": 1.68, + "learning_rate": 2.4081749190385818e-06, + "logits/chosen": -2.6330642700195312, + "logits/rejected": -2.100189447402954, + "logps/chosen": -585.6888427734375, + "logps/rejected": -1973.609130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.143670082092285, + "rewards/margins": 14.153717041015625, + "rewards/rejected": -19.297386169433594, + "step": 28200 + }, + { + "epoch": 1.68, + "learning_rate": 2.406441536840555e-06, + "logits/chosen": -2.602055072784424, + "logits/rejected": -1.942940354347229, + "logps/chosen": -584.6650390625, + "logps/rejected": -1944.0052490234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.080463886260986, + "rewards/margins": 13.907748222351074, + "rewards/rejected": -18.98821449279785, + "step": 28210 + }, + { + "epoch": 1.68, + "learning_rate": 2.4047081996815712e-06, + "logits/chosen": -2.5505051612854004, + "logits/rejected": -1.9617855548858643, + "logps/chosen": -579.66162109375, + "logps/rejected": -2005.122802734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.08604621887207, + "rewards/margins": 14.523469924926758, + "rewards/rejected": -19.609516143798828, + "step": 28220 + }, + { + "epoch": 1.68, + "learning_rate": 2.402974908396059e-06, + "logits/chosen": -2.5742290019989014, + "logits/rejected": -1.9346609115600586, + "logps/chosen": -567.6459350585938, + "logps/rejected": -1929.685791015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.97397518157959, + "rewards/margins": 13.877023696899414, + "rewards/rejected": -18.850997924804688, + "step": 28230 + }, + { + "epoch": 1.68, + "learning_rate": 2.401241663818425e-06, + "logits/chosen": -2.580071210861206, + "logits/rejected": -1.9349727630615234, + "logps/chosen": -586.17822265625, + "logps/rejected": -1965.894287109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.209881782531738, + "rewards/margins": 14.001764297485352, + "rewards/rejected": -19.21164321899414, + "step": 28240 + }, + { + "epoch": 1.68, + "learning_rate": 2.399508466783052e-06, + "logits/chosen": -2.5834872722625732, + "logits/rejected": -2.0456409454345703, + "logps/chosen": -585.6776123046875, + "logps/rejected": -1906.6904296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1185994148254395, + "rewards/margins": 13.508546829223633, + "rewards/rejected": -18.62714195251465, + "step": 28250 + }, + { + "epoch": 1.69, + "learning_rate": 2.397775318124302e-06, + "logits/chosen": -2.6364989280700684, + "logits/rejected": -2.055237293243408, + "logps/chosen": -591.3287963867188, + "logps/rejected": -1912.01171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2078657150268555, + "rewards/margins": 13.470568656921387, + "rewards/rejected": -18.678434371948242, + "step": 28260 + }, + { + "epoch": 1.69, + "learning_rate": 2.396042218676513e-06, + "logits/chosen": -2.563875436782837, + "logits/rejected": -1.9866443872451782, + "logps/chosen": -580.4312133789062, + "logps/rejected": -2016.7613525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.080847263336182, + "rewards/margins": 14.647483825683594, + "rewards/rejected": -19.728328704833984, + "step": 28270 + }, + { + "epoch": 1.69, + "learning_rate": 2.394309169273999e-06, + "logits/chosen": -2.6065242290496826, + "logits/rejected": -2.0151004791259766, + "logps/chosen": -578.9092407226562, + "logps/rejected": -1929.5341796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.03870153427124, + "rewards/margins": 13.798345565795898, + "rewards/rejected": -18.83704948425293, + "step": 28280 + }, + { + "epoch": 1.69, + "learning_rate": 2.3925761707510484e-06, + "logits/chosen": -2.5655734539031982, + "logits/rejected": -2.044426441192627, + "logps/chosen": -592.641845703125, + "logps/rejected": -1950.2564697265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.268609046936035, + "rewards/margins": 13.790266036987305, + "rewards/rejected": -19.058874130249023, + "step": 28290 + }, + { + "epoch": 1.69, + "learning_rate": 2.390843223941929e-06, + "logits/chosen": -2.5510013103485107, + "logits/rejected": -1.839362382888794, + "logps/chosen": -592.0121459960938, + "logps/rejected": -1933.3753662109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.228842258453369, + "rewards/margins": 13.669805526733398, + "rewards/rejected": -18.89864730834961, + "step": 28300 + }, + { + "epoch": 1.69, + "learning_rate": 2.389110329680879e-06, + "logits/chosen": -2.5876376628875732, + "logits/rejected": -2.0632565021514893, + "logps/chosen": -575.7088623046875, + "logps/rejected": -1915.49609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0910491943359375, + "rewards/margins": 13.617263793945312, + "rewards/rejected": -18.70831298828125, + "step": 28310 + }, + { + "epoch": 1.69, + "learning_rate": 2.387377488802116e-06, + "logits/chosen": -2.5694198608398438, + "logits/rejected": -1.9001716375350952, + "logps/chosen": -578.011962890625, + "logps/rejected": -1969.91015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.04058313369751, + "rewards/margins": 14.21418285369873, + "rewards/rejected": -19.254764556884766, + "step": 28320 + }, + { + "epoch": 1.69, + "learning_rate": 2.385644702139827e-06, + "logits/chosen": -2.5302634239196777, + "logits/rejected": -1.8017780780792236, + "logps/chosen": -567.4832153320312, + "logps/rejected": -1933.66015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.962245464324951, + "rewards/margins": 13.933545112609863, + "rewards/rejected": -18.895790100097656, + "step": 28330 + }, + { + "epoch": 1.69, + "learning_rate": 2.383911970528179e-06, + "logits/chosen": -2.560952663421631, + "logits/rejected": -2.0500216484069824, + "logps/chosen": -611.7127685546875, + "logps/rejected": -1971.8922119140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.442334175109863, + "rewards/margins": 13.819900512695312, + "rewards/rejected": -19.262237548828125, + "step": 28340 + }, + { + "epoch": 1.69, + "learning_rate": 2.382179294801305e-06, + "logits/chosen": -2.6137800216674805, + "logits/rejected": -2.1037545204162598, + "logps/chosen": -576.220458984375, + "logps/rejected": -1877.4935302734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.035095691680908, + "rewards/margins": 13.310691833496094, + "rewards/rejected": -18.345788955688477, + "step": 28350 + }, + { + "epoch": 1.69, + "learning_rate": 2.380446675793318e-06, + "logits/chosen": -2.5830602645874023, + "logits/rejected": -2.0116589069366455, + "logps/chosen": -579.7086791992188, + "logps/rejected": -1855.6243896484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.086228847503662, + "rewards/margins": 13.030738830566406, + "rewards/rejected": -18.116966247558594, + "step": 28360 + }, + { + "epoch": 1.69, + "learning_rate": 2.3787141143383002e-06, + "logits/chosen": -2.563784122467041, + "logits/rejected": -1.9539531469345093, + "logps/chosen": -578.9094848632812, + "logps/rejected": -1968.854248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.053405284881592, + "rewards/margins": 14.19093132019043, + "rewards/rejected": -19.24433708190918, + "step": 28370 + }, + { + "epoch": 1.69, + "learning_rate": 2.376981611270305e-06, + "logits/chosen": -2.5979692935943604, + "logits/rejected": -2.0791211128234863, + "logps/chosen": -570.8829345703125, + "logps/rejected": -1957.5689697265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.051558494567871, + "rewards/margins": 14.080119132995605, + "rewards/rejected": -19.131677627563477, + "step": 28380 + }, + { + "epoch": 1.69, + "learning_rate": 2.3752491674233613e-06, + "logits/chosen": -2.607612133026123, + "logits/rejected": -1.953311562538147, + "logps/chosen": -582.6930541992188, + "logps/rejected": -1889.6298828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.131250858306885, + "rewards/margins": 13.325592041015625, + "rewards/rejected": -18.45684242248535, + "step": 28390 + }, + { + "epoch": 1.69, + "learning_rate": 2.373516783631466e-06, + "logits/chosen": -2.595109462738037, + "logits/rejected": -1.9931169748306274, + "logps/chosen": -594.64794921875, + "logps/rejected": -1956.3656005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.154379844665527, + "rewards/margins": 13.958549499511719, + "rewards/rejected": -19.11292839050293, + "step": 28400 + }, + { + "epoch": 1.69, + "learning_rate": 2.3717844607285905e-06, + "logits/chosen": -2.570107936859131, + "logits/rejected": -1.8879671096801758, + "logps/chosen": -578.0475463867188, + "logps/rejected": -1949.892578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.048867702484131, + "rewards/margins": 14.013712882995605, + "rewards/rejected": -19.062580108642578, + "step": 28410 + }, + { + "epoch": 1.69, + "learning_rate": 2.370052199548673e-06, + "logits/chosen": -2.623671293258667, + "logits/rejected": -2.037383794784546, + "logps/chosen": -588.7054443359375, + "logps/rejected": -2050.89013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.166503429412842, + "rewards/margins": 14.88977336883545, + "rewards/rejected": -20.056278228759766, + "step": 28420 + }, + { + "epoch": 1.7, + "learning_rate": 2.368320000925626e-06, + "logits/chosen": -2.613011598587036, + "logits/rejected": -1.9503284692764282, + "logps/chosen": -588.6566772460938, + "logps/rejected": -2007.7861328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.159099102020264, + "rewards/margins": 14.478482246398926, + "rewards/rejected": -19.637582778930664, + "step": 28430 + }, + { + "epoch": 1.7, + "learning_rate": 2.3665878656933285e-06, + "logits/chosen": -2.5372440814971924, + "logits/rejected": -1.9569752216339111, + "logps/chosen": -592.6301879882812, + "logps/rejected": -1952.844482421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.209059238433838, + "rewards/margins": 13.867901802062988, + "rewards/rejected": -19.076961517333984, + "step": 28440 + }, + { + "epoch": 1.7, + "learning_rate": 2.3648557946856303e-06, + "logits/chosen": -2.5842700004577637, + "logits/rejected": -1.955476999282837, + "logps/chosen": -578.1971435546875, + "logps/rejected": -1887.915283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.108771324157715, + "rewards/margins": 13.330777168273926, + "rewards/rejected": -18.439546585083008, + "step": 28450 + }, + { + "epoch": 1.7, + "learning_rate": 2.3631237887363513e-06, + "logits/chosen": -2.6039772033691406, + "logits/rejected": -1.9671142101287842, + "logps/chosen": -585.6282348632812, + "logps/rejected": -2003.3857421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.105025291442871, + "rewards/margins": 14.486332893371582, + "rewards/rejected": -19.591358184814453, + "step": 28460 + }, + { + "epoch": 1.7, + "learning_rate": 2.3613918486792777e-06, + "logits/chosen": -2.604065418243408, + "logits/rejected": -2.028550386428833, + "logps/chosen": -574.07763671875, + "logps/rejected": -1943.1025390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.020363807678223, + "rewards/margins": 13.977724075317383, + "rewards/rejected": -18.998088836669922, + "step": 28470 + }, + { + "epoch": 1.7, + "learning_rate": 2.3596599753481676e-06, + "logits/chosen": -2.617370367050171, + "logits/rejected": -1.906719446182251, + "logps/chosen": -587.4271240234375, + "logps/rejected": -1953.3509521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.201891899108887, + "rewards/margins": 13.892168045043945, + "rewards/rejected": -19.094058990478516, + "step": 28480 + }, + { + "epoch": 1.7, + "learning_rate": 2.357928169576743e-06, + "logits/chosen": -2.581176519393921, + "logits/rejected": -1.9925209283828735, + "logps/chosen": -591.2456665039062, + "logps/rejected": -1906.3470458984375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.185707092285156, + "rewards/margins": 13.446874618530273, + "rewards/rejected": -18.63258171081543, + "step": 28490 + }, + { + "epoch": 1.7, + "learning_rate": 2.3561964321986963e-06, + "logits/chosen": -2.5431599617004395, + "logits/rejected": -1.8978179693222046, + "logps/chosen": -590.4671020507812, + "logps/rejected": -2033.7210693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.2326765060424805, + "rewards/margins": 14.6661376953125, + "rewards/rejected": -19.898813247680664, + "step": 28500 + }, + { + "epoch": 1.7, + "learning_rate": 2.3544647640476843e-06, + "logits/chosen": -2.619983196258545, + "logits/rejected": -1.9215710163116455, + "logps/chosen": -566.3262939453125, + "logps/rejected": -1955.55078125, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.931821346282959, + "rewards/margins": 14.162898063659668, + "rewards/rejected": -19.0947208404541, + "step": 28510 + }, + { + "epoch": 1.7, + "learning_rate": 2.3527331659573343e-06, + "logits/chosen": -2.6054940223693848, + "logits/rejected": -2.050473928451538, + "logps/chosen": -504.3377990722656, + "logps/rejected": -1796.154052734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.335945129394531, + "rewards/margins": 13.19177532196045, + "rewards/rejected": -17.527719497680664, + "step": 28520 + }, + { + "epoch": 1.7, + "learning_rate": 2.351001638761236e-06, + "logits/chosen": -2.5865960121154785, + "logits/rejected": -2.019726514816284, + "logps/chosen": -498.4867248535156, + "logps/rejected": -1856.822021484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.3569183349609375, + "rewards/margins": 13.7603178024292, + "rewards/rejected": -18.117237091064453, + "step": 28530 + }, + { + "epoch": 1.7, + "learning_rate": 2.349270183292946e-06, + "logits/chosen": -2.5508980751037598, + "logits/rejected": -2.091409683227539, + "logps/chosen": -508.63128662109375, + "logps/rejected": -1730.497314453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.397430896759033, + "rewards/margins": 12.45649528503418, + "rewards/rejected": -16.853925704956055, + "step": 28540 + }, + { + "epoch": 1.7, + "learning_rate": 2.347538800385989e-06, + "logits/chosen": -2.6243693828582764, + "logits/rejected": -2.067033052444458, + "logps/chosen": -526.2538452148438, + "logps/rejected": -1790.484375, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.542233467102051, + "rewards/margins": 12.928662300109863, + "rewards/rejected": -17.470895767211914, + "step": 28550 + }, + { + "epoch": 1.7, + "learning_rate": 2.34580749087385e-06, + "logits/chosen": -2.5636985301971436, + "logits/rejected": -2.021235227584839, + "logps/chosen": -591.0741577148438, + "logps/rejected": -1963.518310546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.212094783782959, + "rewards/margins": 13.976470947265625, + "rewards/rejected": -19.18856430053711, + "step": 28560 + }, + { + "epoch": 1.7, + "learning_rate": 2.344076255589985e-06, + "logits/chosen": -2.5164029598236084, + "logits/rejected": -1.876600980758667, + "logps/chosen": -612.3946533203125, + "logps/rejected": -2008.494384765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.473918914794922, + "rewards/margins": 14.164990425109863, + "rewards/rejected": -19.6389102935791, + "step": 28570 + }, + { + "epoch": 1.7, + "learning_rate": 2.3423450953678065e-06, + "logits/chosen": -2.5496315956115723, + "logits/rejected": -1.90609610080719, + "logps/chosen": -616.1717529296875, + "logps/rejected": -1984.6431884765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.492269992828369, + "rewards/margins": 13.916685104370117, + "rewards/rejected": -19.408954620361328, + "step": 28580 + }, + { + "epoch": 1.7, + "learning_rate": 2.3406140110406984e-06, + "logits/chosen": -2.484877109527588, + "logits/rejected": -1.7571260929107666, + "logps/chosen": -619.4830322265625, + "logps/rejected": -2008.809326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.5067458152771, + "rewards/margins": 14.141336441040039, + "rewards/rejected": -19.648082733154297, + "step": 28590 + }, + { + "epoch": 1.71, + "learning_rate": 2.3388830034420026e-06, + "logits/chosen": -2.5949313640594482, + "logits/rejected": -1.9554542303085327, + "logps/chosen": -619.6390380859375, + "logps/rejected": -1993.3023681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.558707237243652, + "rewards/margins": 13.939874649047852, + "rewards/rejected": -19.49858283996582, + "step": 28600 + }, + { + "epoch": 1.71, + "learning_rate": 2.337152073405028e-06, + "logits/chosen": -2.5794949531555176, + "logits/rejected": -1.855446219444275, + "logps/chosen": -607.7575073242188, + "logps/rejected": -2055.8603515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.343264579772949, + "rewards/margins": 14.758581161499023, + "rewards/rejected": -20.10184669494629, + "step": 28610 + }, + { + "epoch": 1.71, + "learning_rate": 2.3354212217630428e-06, + "logits/chosen": -2.5357577800750732, + "logits/rejected": -1.8894065618515015, + "logps/chosen": -601.809814453125, + "logps/rejected": -1973.013916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.318392753601074, + "rewards/margins": 13.964085578918457, + "rewards/rejected": -19.2824764251709, + "step": 28620 + }, + { + "epoch": 1.71, + "learning_rate": 2.3336904493492785e-06, + "logits/chosen": -2.5352182388305664, + "logits/rejected": -1.9405330419540405, + "logps/chosen": -592.7643432617188, + "logps/rejected": -1932.2513427734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.1974968910217285, + "rewards/margins": 13.68177318572998, + "rewards/rejected": -18.879270553588867, + "step": 28630 + }, + { + "epoch": 1.71, + "learning_rate": 2.3319597569969307e-06, + "logits/chosen": -2.602509021759033, + "logits/rejected": -1.8709466457366943, + "logps/chosen": -627.8748779296875, + "logps/rejected": -1998.2777099609375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.578768730163574, + "rewards/margins": 13.958023071289062, + "rewards/rejected": -19.536792755126953, + "step": 28640 + }, + { + "epoch": 1.71, + "learning_rate": 2.3302291455391525e-06, + "logits/chosen": -2.4793848991394043, + "logits/rejected": -1.7956759929656982, + "logps/chosen": -621.2860717773438, + "logps/rejected": -2020.2484130859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.492676258087158, + "rewards/margins": 14.252557754516602, + "rewards/rejected": -19.7452335357666, + "step": 28650 + }, + { + "epoch": 1.71, + "learning_rate": 2.328498615809063e-06, + "logits/chosen": -2.5762851238250732, + "logits/rejected": -1.895186424255371, + "logps/chosen": -621.66748046875, + "logps/rejected": -2003.4261474609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.516441822052002, + "rewards/margins": 14.06470012664795, + "rewards/rejected": -19.58114242553711, + "step": 28660 + }, + { + "epoch": 1.71, + "learning_rate": 2.3267681686397365e-06, + "logits/chosen": -2.5429162979125977, + "logits/rejected": -1.842015266418457, + "logps/chosen": -645.1925659179688, + "logps/rejected": -2104.839599609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.755452632904053, + "rewards/margins": 14.853116035461426, + "rewards/rejected": -20.608570098876953, + "step": 28670 + }, + { + "epoch": 1.71, + "learning_rate": 2.3250378048642117e-06, + "logits/chosen": -2.6027450561523438, + "logits/rejected": -1.9180843830108643, + "logps/chosen": -637.9291381835938, + "logps/rejected": -2069.8447265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.6880784034729, + "rewards/margins": 14.554262161254883, + "rewards/rejected": -20.242341995239258, + "step": 28680 + }, + { + "epoch": 1.71, + "learning_rate": 2.3233075253154854e-06, + "logits/chosen": -2.5280840396881104, + "logits/rejected": -1.7866432666778564, + "logps/chosen": -616.3663330078125, + "logps/rejected": -2013.918212890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.432904243469238, + "rewards/margins": 14.263150215148926, + "rewards/rejected": -19.696056365966797, + "step": 28690 + }, + { + "epoch": 1.71, + "learning_rate": 2.3215773308265145e-06, + "logits/chosen": -2.5225703716278076, + "logits/rejected": -1.818383812904358, + "logps/chosen": -635.2435913085938, + "logps/rejected": -2063.01611328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.641100883483887, + "rewards/margins": 14.541181564331055, + "rewards/rejected": -20.182283401489258, + "step": 28700 + }, + { + "epoch": 1.71, + "learning_rate": 2.3198472222302144e-06, + "logits/chosen": -2.5724470615386963, + "logits/rejected": -1.7689034938812256, + "logps/chosen": -656.4462280273438, + "logps/rejected": -2115.73779296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.811753749847412, + "rewards/margins": 14.912717819213867, + "rewards/rejected": -20.724472045898438, + "step": 28710 + }, + { + "epoch": 1.71, + "learning_rate": 2.3181172003594584e-06, + "logits/chosen": -2.516981601715088, + "logits/rejected": -1.783104658126831, + "logps/chosen": -618.4925537109375, + "logps/rejected": -1931.395263671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.437919616699219, + "rewards/margins": 13.448390007019043, + "rewards/rejected": -18.886310577392578, + "step": 28720 + }, + { + "epoch": 1.71, + "learning_rate": 2.3163872660470808e-06, + "logits/chosen": -2.528555393218994, + "logits/rejected": -1.8453636169433594, + "logps/chosen": -632.6365966796875, + "logps/rejected": -2082.67626953125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.590285778045654, + "rewards/margins": 14.793377876281738, + "rewards/rejected": -20.383665084838867, + "step": 28730 + }, + { + "epoch": 1.71, + "learning_rate": 2.3146574201258697e-06, + "logits/chosen": -2.5410678386688232, + "logits/rejected": -1.8683538436889648, + "logps/chosen": -659.0733642578125, + "logps/rejected": -2069.098876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.864413261413574, + "rewards/margins": 14.384852409362793, + "rewards/rejected": -20.249267578125, + "step": 28740 + }, + { + "epoch": 1.71, + "learning_rate": 2.312927663428576e-06, + "logits/chosen": -2.549978733062744, + "logits/rejected": -1.8979829549789429, + "logps/chosen": -612.8262939453125, + "logps/rejected": -2104.79052734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.433916091918945, + "rewards/margins": 15.163681030273438, + "rewards/rejected": -20.59759521484375, + "step": 28750 + }, + { + "epoch": 1.71, + "learning_rate": 2.3111979967879007e-06, + "logits/chosen": -2.534020185470581, + "logits/rejected": -1.9601370096206665, + "logps/chosen": -646.1762084960938, + "logps/rejected": -2005.412841796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.754817962646484, + "rewards/margins": 13.863723754882812, + "rewards/rejected": -19.618541717529297, + "step": 28760 + }, + { + "epoch": 1.72, + "learning_rate": 2.309468421036509e-06, + "logits/chosen": -2.539498805999756, + "logits/rejected": -1.9448636770248413, + "logps/chosen": -641.2450561523438, + "logps/rejected": -2067.16259765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.697679042816162, + "rewards/margins": 14.537882804870605, + "rewards/rejected": -20.235563278198242, + "step": 28770 + }, + { + "epoch": 1.72, + "learning_rate": 2.3077389370070156e-06, + "logits/chosen": -2.5944530963897705, + "logits/rejected": -1.8390249013900757, + "logps/chosen": -608.4935913085938, + "logps/rejected": -2016.472412109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.336052417755127, + "rewards/margins": 14.381128311157227, + "rewards/rejected": -19.717180252075195, + "step": 28780 + }, + { + "epoch": 1.72, + "learning_rate": 2.306009545531997e-06, + "logits/chosen": -2.546755790710449, + "logits/rejected": -1.8158347606658936, + "logps/chosen": -631.259033203125, + "logps/rejected": -2177.1591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.605311870574951, + "rewards/margins": 15.72375202178955, + "rewards/rejected": -21.329063415527344, + "step": 28790 + }, + { + "epoch": 1.72, + "learning_rate": 2.3042802474439805e-06, + "logits/chosen": -2.5213115215301514, + "logits/rejected": -1.8652511835098267, + "logps/chosen": -631.8999633789062, + "logps/rejected": -2060.43115234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.644443511962891, + "rewards/margins": 14.494549751281738, + "rewards/rejected": -20.138992309570312, + "step": 28800 + }, + { + "epoch": 1.72, + "learning_rate": 2.3025510435754494e-06, + "logits/chosen": -2.509878396987915, + "logits/rejected": -1.8527692556381226, + "logps/chosen": -636.8971557617188, + "logps/rejected": -2102.69580078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.661040782928467, + "rewards/margins": 14.936264038085938, + "rewards/rejected": -20.597305297851562, + "step": 28810 + }, + { + "epoch": 1.72, + "learning_rate": 2.3008219347588443e-06, + "logits/chosen": -2.5066938400268555, + "logits/rejected": -1.8956531286239624, + "logps/chosen": -630.5892333984375, + "logps/rejected": -1999.8919677734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.645476341247559, + "rewards/margins": 13.918644905090332, + "rewards/rejected": -19.56412124633789, + "step": 28820 + }, + { + "epoch": 1.72, + "learning_rate": 2.299092921826556e-06, + "logits/chosen": -2.4953558444976807, + "logits/rejected": -1.7766351699829102, + "logps/chosen": -626.2314453125, + "logps/rejected": -1934.483154296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.578172206878662, + "rewards/margins": 13.304618835449219, + "rewards/rejected": -18.882789611816406, + "step": 28830 + }, + { + "epoch": 1.72, + "learning_rate": 2.297364005610934e-06, + "logits/chosen": -2.5236053466796875, + "logits/rejected": -1.8659775257110596, + "logps/chosen": -628.0078735351562, + "logps/rejected": -1991.636962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.586348533630371, + "rewards/margins": 13.886064529418945, + "rewards/rejected": -19.472408294677734, + "step": 28840 + }, + { + "epoch": 1.72, + "learning_rate": 2.2956351869442758e-06, + "logits/chosen": -2.552682638168335, + "logits/rejected": -1.9371312856674194, + "logps/chosen": -626.5152587890625, + "logps/rejected": -1995.5888671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.521642208099365, + "rewards/margins": 13.998163223266602, + "rewards/rejected": -19.519805908203125, + "step": 28850 + }, + { + "epoch": 1.72, + "learning_rate": 2.293906466658837e-06, + "logits/chosen": -2.5646722316741943, + "logits/rejected": -1.8962090015411377, + "logps/chosen": -624.4759521484375, + "logps/rejected": -2014.836181640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.497655391693115, + "rewards/margins": 14.20640754699707, + "rewards/rejected": -19.70406150817871, + "step": 28860 + }, + { + "epoch": 1.72, + "learning_rate": 2.2921778455868217e-06, + "logits/chosen": -2.4798436164855957, + "logits/rejected": -1.7191756963729858, + "logps/chosen": -606.0128784179688, + "logps/rejected": -2065.98681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.392543792724609, + "rewards/margins": 14.826116561889648, + "rewards/rejected": -20.218658447265625, + "step": 28870 + }, + { + "epoch": 1.72, + "learning_rate": 2.29044932456039e-06, + "logits/chosen": -2.559617280960083, + "logits/rejected": -1.8327808380126953, + "logps/chosen": -637.1165771484375, + "logps/rejected": -2063.15771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.696456432342529, + "rewards/margins": 14.499302864074707, + "rewards/rejected": -20.195758819580078, + "step": 28880 + }, + { + "epoch": 1.72, + "learning_rate": 2.288720904411651e-06, + "logits/chosen": -2.505051374435425, + "logits/rejected": -1.7508703470230103, + "logps/chosen": -632.9639892578125, + "logps/rejected": -2144.223876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.651800632476807, + "rewards/margins": 15.34803581237793, + "rewards/rejected": -20.999835968017578, + "step": 28890 + }, + { + "epoch": 1.72, + "learning_rate": 2.2869925859726644e-06, + "logits/chosen": -2.5325379371643066, + "logits/rejected": -1.874071478843689, + "logps/chosen": -639.80517578125, + "logps/rejected": -1989.927734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.685445308685303, + "rewards/margins": 13.775617599487305, + "rewards/rejected": -19.461063385009766, + "step": 28900 + }, + { + "epoch": 1.72, + "learning_rate": 2.285264370075445e-06, + "logits/chosen": -2.4845664501190186, + "logits/rejected": -1.7738988399505615, + "logps/chosen": -664.47998046875, + "logps/rejected": -2028.546875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9188971519470215, + "rewards/margins": 13.923532485961914, + "rewards/rejected": -19.84242820739746, + "step": 28910 + }, + { + "epoch": 1.72, + "learning_rate": 2.283536257551955e-06, + "logits/chosen": -2.520806074142456, + "logits/rejected": -1.8829418420791626, + "logps/chosen": -643.8652954101562, + "logps/rejected": -1967.480224609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.734731197357178, + "rewards/margins": 13.494148254394531, + "rewards/rejected": -19.228878021240234, + "step": 28920 + }, + { + "epoch": 1.73, + "learning_rate": 2.2818082492341077e-06, + "logits/chosen": -2.546433687210083, + "logits/rejected": -1.7847089767456055, + "logps/chosen": -631.1643676757812, + "logps/rejected": -2072.07861328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.578899383544922, + "rewards/margins": 14.696887016296387, + "rewards/rejected": -20.275785446166992, + "step": 28930 + }, + { + "epoch": 1.73, + "learning_rate": 2.2800803459537665e-06, + "logits/chosen": -2.5587754249572754, + "logits/rejected": -1.9523979425430298, + "logps/chosen": -628.9288330078125, + "logps/rejected": -2095.67041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.523800849914551, + "rewards/margins": 14.994438171386719, + "rewards/rejected": -20.518239974975586, + "step": 28940 + }, + { + "epoch": 1.73, + "learning_rate": 2.278352548542744e-06, + "logits/chosen": -2.5116400718688965, + "logits/rejected": -1.9302457571029663, + "logps/chosen": -638.3822021484375, + "logps/rejected": -2063.441162109375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.661167621612549, + "rewards/margins": 14.545123100280762, + "rewards/rejected": -20.206289291381836, + "step": 28950 + }, + { + "epoch": 1.73, + "learning_rate": 2.2766248578328017e-06, + "logits/chosen": -2.5642447471618652, + "logits/rejected": -1.782151460647583, + "logps/chosen": -666.3484497070312, + "logps/rejected": -2182.431884765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.995887756347656, + "rewards/margins": 15.396955490112305, + "rewards/rejected": -21.392841339111328, + "step": 28960 + }, + { + "epoch": 1.73, + "learning_rate": 2.27489727465565e-06, + "logits/chosen": -2.5290958881378174, + "logits/rejected": -1.7717710733413696, + "logps/chosen": -683.0985717773438, + "logps/rejected": -2213.53515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.109947681427002, + "rewards/margins": 15.572352409362793, + "rewards/rejected": -21.68229866027832, + "step": 28970 + }, + { + "epoch": 1.73, + "learning_rate": 2.2731697998429485e-06, + "logits/chosen": -2.4897656440734863, + "logits/rejected": -1.769829511642456, + "logps/chosen": -677.1513671875, + "logps/rejected": -2025.638427734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0615339279174805, + "rewards/margins": 13.764678955078125, + "rewards/rejected": -19.826213836669922, + "step": 28980 + }, + { + "epoch": 1.73, + "learning_rate": 2.271442434226301e-06, + "logits/chosen": -2.526154041290283, + "logits/rejected": -1.7989170551300049, + "logps/chosen": -658.5843505859375, + "logps/rejected": -2125.96630859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.883944988250732, + "rewards/margins": 14.916638374328613, + "rewards/rejected": -20.80058479309082, + "step": 28990 + }, + { + "epoch": 1.73, + "learning_rate": 2.2697151786372634e-06, + "logits/chosen": -2.507270097732544, + "logits/rejected": -1.7578881978988647, + "logps/chosen": -645.4908447265625, + "logps/rejected": -2031.097900390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.757605075836182, + "rewards/margins": 14.110788345336914, + "rewards/rejected": -19.86839485168457, + "step": 29000 + }, + { + "epoch": 1.73, + "learning_rate": 2.267988033907335e-06, + "logits/chosen": -2.4841055870056152, + "logits/rejected": -1.7706949710845947, + "logps/chosen": -686.5646362304688, + "logps/rejected": -2103.44970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.208631992340088, + "rewards/margins": 14.385485649108887, + "rewards/rejected": -20.594118118286133, + "step": 29010 + }, + { + "epoch": 1.73, + "learning_rate": 2.2662610008679655e-06, + "logits/chosen": -2.5141549110412598, + "logits/rejected": -1.7681849002838135, + "logps/chosen": -660.7835693359375, + "logps/rejected": -2048.747314453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.887906074523926, + "rewards/margins": 14.163812637329102, + "rewards/rejected": -20.05171775817871, + "step": 29020 + }, + { + "epoch": 1.73, + "learning_rate": 2.2645340803505463e-06, + "logits/chosen": -2.5079092979431152, + "logits/rejected": -1.7494621276855469, + "logps/chosen": -661.2891845703125, + "logps/rejected": -2058.82763671875, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.903271675109863, + "rewards/margins": 14.234196662902832, + "rewards/rejected": -20.137470245361328, + "step": 29030 + }, + { + "epoch": 1.73, + "learning_rate": 2.2628072731864186e-06, + "logits/chosen": -2.498487710952759, + "logits/rejected": -1.7562618255615234, + "logps/chosen": -683.7200927734375, + "logps/rejected": -2057.989013671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1318464279174805, + "rewards/margins": 13.999483108520508, + "rewards/rejected": -20.131328582763672, + "step": 29040 + }, + { + "epoch": 1.73, + "learning_rate": 2.2610805802068655e-06, + "logits/chosen": -2.4822275638580322, + "logits/rejected": -1.790490746498108, + "logps/chosen": -655.5850830078125, + "logps/rejected": -2060.76708984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.87746524810791, + "rewards/margins": 14.283407211303711, + "rewards/rejected": -20.160871505737305, + "step": 29050 + }, + { + "epoch": 1.73, + "learning_rate": 2.259354002243119e-06, + "logits/chosen": -2.502607822418213, + "logits/rejected": -1.7256542444229126, + "logps/chosen": -636.4725952148438, + "logps/rejected": -2042.393310546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.650736331939697, + "rewards/margins": 14.324735641479492, + "rewards/rejected": -19.9754695892334, + "step": 29060 + }, + { + "epoch": 1.73, + "learning_rate": 2.257627540126353e-06, + "logits/chosen": -2.5054497718811035, + "logits/rejected": -1.8668410778045654, + "logps/chosen": -651.2794189453125, + "logps/rejected": -2002.6923828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.82421350479126, + "rewards/margins": 13.758668899536133, + "rewards/rejected": -19.582881927490234, + "step": 29070 + }, + { + "epoch": 1.73, + "learning_rate": 2.2559011946876846e-06, + "logits/chosen": -2.542893171310425, + "logits/rejected": -1.8075155019760132, + "logps/chosen": -634.3321533203125, + "logps/rejected": -2049.77783203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.708624362945557, + "rewards/margins": 14.339396476745605, + "rewards/rejected": -20.048017501831055, + "step": 29080 + }, + { + "epoch": 1.73, + "learning_rate": 2.254174966758179e-06, + "logits/chosen": -2.560640335083008, + "logits/rejected": -1.835397720336914, + "logps/chosen": -628.1426391601562, + "logps/rejected": -2131.705810546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.597607135772705, + "rewards/margins": 15.265307426452637, + "rewards/rejected": -20.862916946411133, + "step": 29090 + }, + { + "epoch": 1.74, + "learning_rate": 2.2524488571688407e-06, + "logits/chosen": -2.523813009262085, + "logits/rejected": -1.8964531421661377, + "logps/chosen": -622.5906372070312, + "logps/rejected": -2044.334228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.530973434448242, + "rewards/margins": 14.463793754577637, + "rewards/rejected": -19.994766235351562, + "step": 29100 + }, + { + "epoch": 1.74, + "learning_rate": 2.2507228667506203e-06, + "logits/chosen": -2.5369513034820557, + "logits/rejected": -1.8951034545898438, + "logps/chosen": -621.5812377929688, + "logps/rejected": -2023.061279296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.49005651473999, + "rewards/margins": 14.289645195007324, + "rewards/rejected": -19.779705047607422, + "step": 29110 + }, + { + "epoch": 1.74, + "learning_rate": 2.2489969963344074e-06, + "logits/chosen": -2.549988269805908, + "logits/rejected": -1.904335379600525, + "logps/chosen": -626.7417602539062, + "logps/rejected": -1987.3529052734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.529349327087402, + "rewards/margins": 13.910304069519043, + "rewards/rejected": -19.439653396606445, + "step": 29120 + }, + { + "epoch": 1.74, + "learning_rate": 2.247271246751039e-06, + "logits/chosen": -2.583845615386963, + "logits/rejected": -1.888837456703186, + "logps/chosen": -636.2727661132812, + "logps/rejected": -2040.0654296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.649194717407227, + "rewards/margins": 14.31432819366455, + "rewards/rejected": -19.96352195739746, + "step": 29130 + }, + { + "epoch": 1.74, + "learning_rate": 2.2455456188312875e-06, + "logits/chosen": -2.567258358001709, + "logits/rejected": -1.8139142990112305, + "logps/chosen": -643.4041748046875, + "logps/rejected": -2066.154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.714303016662598, + "rewards/margins": 14.490628242492676, + "rewards/rejected": -20.204933166503906, + "step": 29140 + }, + { + "epoch": 1.74, + "learning_rate": 2.243820113405873e-06, + "logits/chosen": -2.5452940464019775, + "logits/rejected": -1.918633222579956, + "logps/chosen": -644.65966796875, + "logps/rejected": -2067.080078125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.726258754730225, + "rewards/margins": 14.50683307647705, + "rewards/rejected": -20.23309326171875, + "step": 29150 + }, + { + "epoch": 1.74, + "learning_rate": 2.242094731305452e-06, + "logits/chosen": -2.5376155376434326, + "logits/rejected": -1.880920648574829, + "logps/chosen": -675.6972045898438, + "logps/rejected": -2038.7236328125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.087698459625244, + "rewards/margins": 13.876734733581543, + "rewards/rejected": -19.964435577392578, + "step": 29160 + }, + { + "epoch": 1.74, + "learning_rate": 2.240369473360624e-06, + "logits/chosen": -2.5291855335235596, + "logits/rejected": -1.954941987991333, + "logps/chosen": -669.3358154296875, + "logps/rejected": -2020.9713134765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.966428279876709, + "rewards/margins": 13.810163497924805, + "rewards/rejected": -19.776592254638672, + "step": 29170 + }, + { + "epoch": 1.74, + "learning_rate": 2.2386443404019285e-06, + "logits/chosen": -2.5132737159729004, + "logits/rejected": -1.8920704126358032, + "logps/chosen": -642.23095703125, + "logps/rejected": -2009.369873046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.745001792907715, + "rewards/margins": 13.908409118652344, + "rewards/rejected": -19.653409957885742, + "step": 29180 + }, + { + "epoch": 1.74, + "learning_rate": 2.236919333259844e-06, + "logits/chosen": -2.560941457748413, + "logits/rejected": -1.9806318283081055, + "logps/chosen": -644.9951171875, + "logps/rejected": -2147.867431640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.756715297698975, + "rewards/margins": 15.27660846710205, + "rewards/rejected": -21.033323287963867, + "step": 29190 + }, + { + "epoch": 1.74, + "learning_rate": 2.2351944527647894e-06, + "logits/chosen": -2.557486057281494, + "logits/rejected": -1.9791873693466187, + "logps/chosen": -658.7276611328125, + "logps/rejected": -2064.915771484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.933043479919434, + "rewards/margins": 14.275657653808594, + "rewards/rejected": -20.20870018005371, + "step": 29200 + }, + { + "epoch": 1.74, + "learning_rate": 2.2334696997471218e-06, + "logits/chosen": -2.556067943572998, + "logits/rejected": -1.852149248123169, + "logps/chosen": -652.5670166015625, + "logps/rejected": -2030.7769775390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.859679222106934, + "rewards/margins": 14.009739875793457, + "rewards/rejected": -19.86941909790039, + "step": 29210 + }, + { + "epoch": 1.74, + "learning_rate": 2.231745075037137e-06, + "logits/chosen": -2.5518288612365723, + "logits/rejected": -1.8585453033447266, + "logps/chosen": -648.4558715820312, + "logps/rejected": -2099.766357421875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.734524726867676, + "rewards/margins": 14.825650215148926, + "rewards/rejected": -20.560176849365234, + "step": 29220 + }, + { + "epoch": 1.74, + "learning_rate": 2.2300205794650703e-06, + "logits/chosen": -2.519692897796631, + "logits/rejected": -1.8161131143569946, + "logps/chosen": -657.0926513671875, + "logps/rejected": -2064.02880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.844132423400879, + "rewards/margins": 14.351461410522461, + "rewards/rejected": -20.195592880249023, + "step": 29230 + }, + { + "epoch": 1.74, + "learning_rate": 2.2282962138610925e-06, + "logits/chosen": -2.531583070755005, + "logits/rejected": -1.7868057489395142, + "logps/chosen": -665.0736694335938, + "logps/rejected": -2063.032470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.93350887298584, + "rewards/margins": 14.247198104858398, + "rewards/rejected": -20.180707931518555, + "step": 29240 + }, + { + "epoch": 1.74, + "learning_rate": 2.2265719790553147e-06, + "logits/chosen": -2.484917402267456, + "logits/rejected": -1.7963062524795532, + "logps/chosen": -649.037353515625, + "logps/rejected": -1973.7474365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7598161697387695, + "rewards/margins": 13.531509399414062, + "rewards/rejected": -19.29132652282715, + "step": 29250 + }, + { + "epoch": 1.74, + "learning_rate": 2.2248478758777815e-06, + "logits/chosen": -2.5466792583465576, + "logits/rejected": -1.8939037322998047, + "logps/chosen": -665.38232421875, + "logps/rejected": -2078.844970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.91420841217041, + "rewards/margins": 14.440332412719727, + "rewards/rejected": -20.35453987121582, + "step": 29260 + }, + { + "epoch": 1.75, + "learning_rate": 2.2231239051584787e-06, + "logits/chosen": -2.536055088043213, + "logits/rejected": -1.9190136194229126, + "logps/chosen": -665.5317993164062, + "logps/rejected": -2098.086181640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.980661869049072, + "rewards/margins": 14.559709548950195, + "rewards/rejected": -20.54037094116211, + "step": 29270 + }, + { + "epoch": 1.75, + "learning_rate": 2.221400067727323e-06, + "logits/chosen": -2.5303590297698975, + "logits/rejected": -1.7812936305999756, + "logps/chosen": -652.07763671875, + "logps/rejected": -2029.03515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.830201625823975, + "rewards/margins": 14.017130851745605, + "rewards/rejected": -19.847332000732422, + "step": 29280 + }, + { + "epoch": 1.75, + "learning_rate": 2.2196763644141728e-06, + "logits/chosen": -2.5094518661499023, + "logits/rejected": -1.758094072341919, + "logps/chosen": -668.28369140625, + "logps/rejected": -2037.118896484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.983889102935791, + "rewards/margins": 13.95557975769043, + "rewards/rejected": -19.939468383789062, + "step": 29290 + }, + { + "epoch": 1.75, + "learning_rate": 2.217952796048816e-06, + "logits/chosen": -2.5344700813293457, + "logits/rejected": -1.8734838962554932, + "logps/chosen": -668.158447265625, + "logps/rejected": -2115.3408203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.982693195343018, + "rewards/margins": 14.734675407409668, + "rewards/rejected": -20.717369079589844, + "step": 29300 + }, + { + "epoch": 1.75, + "learning_rate": 2.21622936346098e-06, + "logits/chosen": -2.493159294128418, + "logits/rejected": -1.8442792892456055, + "logps/chosen": -673.2396240234375, + "logps/rejected": -2071.598876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.032771110534668, + "rewards/margins": 14.2427978515625, + "rewards/rejected": -20.275569915771484, + "step": 29310 + }, + { + "epoch": 1.75, + "learning_rate": 2.214506067480324e-06, + "logits/chosen": -2.4881319999694824, + "logits/rejected": -1.8963340520858765, + "logps/chosen": -657.7628784179688, + "logps/rejected": -2015.8636474609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9039692878723145, + "rewards/margins": 13.821220397949219, + "rewards/rejected": -19.725189208984375, + "step": 29320 + }, + { + "epoch": 1.75, + "learning_rate": 2.2127829089364445e-06, + "logits/chosen": -2.5313384532928467, + "logits/rejected": -1.8080695867538452, + "logps/chosen": -654.3336181640625, + "logps/rejected": -2087.86962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.868007659912109, + "rewards/margins": 14.56214427947998, + "rewards/rejected": -20.43014907836914, + "step": 29330 + }, + { + "epoch": 1.75, + "learning_rate": 2.2110598886588693e-06, + "logits/chosen": -2.5371007919311523, + "logits/rejected": -1.8739612102508545, + "logps/chosen": -642.1285400390625, + "logps/rejected": -2042.5882568359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7749528884887695, + "rewards/margins": 14.215547561645508, + "rewards/rejected": -19.990501403808594, + "step": 29340 + }, + { + "epoch": 1.75, + "learning_rate": 2.209337007477059e-06, + "logits/chosen": -2.565272808074951, + "logits/rejected": -1.94775390625, + "logps/chosen": -666.4945068359375, + "logps/rejected": -2067.25537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.914425849914551, + "rewards/margins": 14.307286262512207, + "rewards/rejected": -20.221712112426758, + "step": 29350 + }, + { + "epoch": 1.75, + "learning_rate": 2.2076142662204104e-06, + "logits/chosen": -2.559532403945923, + "logits/rejected": -2.030712604522705, + "logps/chosen": -653.7903442382812, + "logps/rejected": -2016.9013671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.867672443389893, + "rewards/margins": 13.862919807434082, + "rewards/rejected": -19.7305908203125, + "step": 29360 + }, + { + "epoch": 1.75, + "learning_rate": 2.2058916657182493e-06, + "logits/chosen": -2.5219054222106934, + "logits/rejected": -1.8919099569320679, + "logps/chosen": -665.4022216796875, + "logps/rejected": -2097.010498046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.948850154876709, + "rewards/margins": 14.582374572753906, + "rewards/rejected": -20.531225204467773, + "step": 29370 + }, + { + "epoch": 1.75, + "learning_rate": 2.204169206799838e-06, + "logits/chosen": -2.541738748550415, + "logits/rejected": -1.9597723484039307, + "logps/chosen": -652.00244140625, + "logps/rejected": -2085.926513671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.788575172424316, + "rewards/margins": 14.625927925109863, + "rewards/rejected": -20.41450309753418, + "step": 29380 + }, + { + "epoch": 1.75, + "learning_rate": 2.202446890294365e-06, + "logits/chosen": -2.5285329818725586, + "logits/rejected": -1.9520328044891357, + "logps/chosen": -637.3612060546875, + "logps/rejected": -2057.39306640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.621832847595215, + "rewards/margins": 14.514203071594238, + "rewards/rejected": -20.136035919189453, + "step": 29390 + }, + { + "epoch": 1.75, + "learning_rate": 2.2007247170309567e-06, + "logits/chosen": -2.5389256477355957, + "logits/rejected": -1.909789800643921, + "logps/chosen": -636.0570068359375, + "logps/rejected": -1961.4761962890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.663997650146484, + "rewards/margins": 13.530197143554688, + "rewards/rejected": -19.194194793701172, + "step": 29400 + }, + { + "epoch": 1.75, + "learning_rate": 2.199002687838665e-06, + "logits/chosen": -2.5386550426483154, + "logits/rejected": -1.7783578634262085, + "logps/chosen": -646.1549072265625, + "logps/rejected": -2115.655029296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.720156669616699, + "rewards/margins": 14.98894214630127, + "rewards/rejected": -20.709095001220703, + "step": 29410 + }, + { + "epoch": 1.75, + "learning_rate": 2.197280803546476e-06, + "logits/chosen": -2.5471906661987305, + "logits/rejected": -1.8796899318695068, + "logps/chosen": -649.880615234375, + "logps/rejected": -2029.4453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.761945724487305, + "rewards/margins": 14.099446296691895, + "rewards/rejected": -19.861392974853516, + "step": 29420 + }, + { + "epoch": 1.75, + "learning_rate": 2.195559064983304e-06, + "logits/chosen": -2.486544609069824, + "logits/rejected": -1.7581682205200195, + "logps/chosen": -668.0496215820312, + "logps/rejected": -2025.5501708984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9493513107299805, + "rewards/margins": 13.876388549804688, + "rewards/rejected": -19.82573890686035, + "step": 29430 + }, + { + "epoch": 1.76, + "learning_rate": 2.1938374729779945e-06, + "logits/chosen": -2.5785748958587646, + "logits/rejected": -1.9779621362686157, + "logps/chosen": -661.73193359375, + "logps/rejected": -2110.596923828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.909409046173096, + "rewards/margins": 14.744668960571289, + "rewards/rejected": -20.65407943725586, + "step": 29440 + }, + { + "epoch": 1.76, + "learning_rate": 2.1921160283593214e-06, + "logits/chosen": -2.5170273780822754, + "logits/rejected": -1.7855056524276733, + "logps/chosen": -650.9492797851562, + "logps/rejected": -2128.736083984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.715365409851074, + "rewards/margins": 15.106099128723145, + "rewards/rejected": -20.82146644592285, + "step": 29450 + }, + { + "epoch": 1.76, + "learning_rate": 2.1903947319559884e-06, + "logits/chosen": -2.5183370113372803, + "logits/rejected": -1.8784618377685547, + "logps/chosen": -653.8032836914062, + "logps/rejected": -2065.668701171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.792136192321777, + "rewards/margins": 14.402958869934082, + "rewards/rejected": -20.19509506225586, + "step": 29460 + }, + { + "epoch": 1.76, + "learning_rate": 2.1886735845966274e-06, + "logits/chosen": -2.53509783744812, + "logits/rejected": -1.864091157913208, + "logps/chosen": -681.8656005859375, + "logps/rejected": -2125.411376953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.126459121704102, + "rewards/margins": 14.681764602661133, + "rewards/rejected": -20.8082218170166, + "step": 29470 + }, + { + "epoch": 1.76, + "learning_rate": 2.1869525871097984e-06, + "logits/chosen": -2.5792534351348877, + "logits/rejected": -1.7720931768417358, + "logps/chosen": -671.6611328125, + "logps/rejected": -2194.453369140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.957377910614014, + "rewards/margins": 15.541145324707031, + "rewards/rejected": -21.498523712158203, + "step": 29480 + }, + { + "epoch": 1.76, + "learning_rate": 2.1852317403239907e-06, + "logits/chosen": -2.5056815147399902, + "logits/rejected": -1.773297905921936, + "logps/chosen": -642.0628662109375, + "logps/rejected": -2062.44775390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.730625152587891, + "rewards/margins": 14.465536117553711, + "rewards/rejected": -20.196163177490234, + "step": 29490 + }, + { + "epoch": 1.76, + "learning_rate": 2.1835110450676183e-06, + "logits/chosen": -2.549751043319702, + "logits/rejected": -1.817539930343628, + "logps/chosen": -681.6888427734375, + "logps/rejected": -2064.09619140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.188294410705566, + "rewards/margins": 14.008995056152344, + "rewards/rejected": -20.197288513183594, + "step": 29500 + }, + { + "epoch": 1.76, + "learning_rate": 2.181790502169026e-06, + "logits/chosen": -2.5921132564544678, + "logits/rejected": -1.9498497247695923, + "logps/chosen": -671.0158081054688, + "logps/rejected": -1985.418212890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.043662071228027, + "rewards/margins": 13.3790922164917, + "rewards/rejected": -19.422754287719727, + "step": 29510 + }, + { + "epoch": 1.76, + "learning_rate": 2.180070112456482e-06, + "logits/chosen": -2.5594003200531006, + "logits/rejected": -1.9249534606933594, + "logps/chosen": -664.5274658203125, + "logps/rejected": -1940.210205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.976680755615234, + "rewards/margins": 12.979998588562012, + "rewards/rejected": -18.956680297851562, + "step": 29520 + }, + { + "epoch": 1.76, + "learning_rate": 2.17834987675818e-06, + "logits/chosen": -2.490309238433838, + "logits/rejected": -1.683990240097046, + "logps/chosen": -678.4007568359375, + "logps/rejected": -2057.057861328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.115727424621582, + "rewards/margins": 14.00001049041748, + "rewards/rejected": -20.11573600769043, + "step": 29530 + }, + { + "epoch": 1.76, + "learning_rate": 2.176629795902245e-06, + "logits/chosen": -2.530456066131592, + "logits/rejected": -1.847259759902954, + "logps/chosen": -664.2735595703125, + "logps/rejected": -2040.700439453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.883123874664307, + "rewards/margins": 14.084405899047852, + "rewards/rejected": -19.967529296875, + "step": 29540 + }, + { + "epoch": 1.76, + "learning_rate": 2.174909870716721e-06, + "logits/chosen": -2.5409739017486572, + "logits/rejected": -1.8189804553985596, + "logps/chosen": -666.8803100585938, + "logps/rejected": -2103.2216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.966476917266846, + "rewards/margins": 14.642662048339844, + "rewards/rejected": -20.609142303466797, + "step": 29550 + }, + { + "epoch": 1.76, + "learning_rate": 2.173190102029582e-06, + "logits/chosen": -2.5110535621643066, + "logits/rejected": -1.7040239572525024, + "logps/chosen": -697.5719604492188, + "logps/rejected": -2128.14111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.315114974975586, + "rewards/margins": 14.530502319335938, + "rewards/rejected": -20.845617294311523, + "step": 29560 + }, + { + "epoch": 1.76, + "learning_rate": 2.1714704906687225e-06, + "logits/chosen": -2.5286858081817627, + "logits/rejected": -1.8839080333709717, + "logps/chosen": -675.02587890625, + "logps/rejected": -2139.84814453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.955676078796387, + "rewards/margins": 14.996744155883789, + "rewards/rejected": -20.952421188354492, + "step": 29570 + }, + { + "epoch": 1.76, + "learning_rate": 2.169751037461966e-06, + "logits/chosen": -2.5377235412597656, + "logits/rejected": -1.934370994567871, + "logps/chosen": -649.1031494140625, + "logps/rejected": -2079.96923828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.800595283508301, + "rewards/margins": 14.555462837219238, + "rewards/rejected": -20.35605812072754, + "step": 29580 + }, + { + "epoch": 1.76, + "learning_rate": 2.1680317432370548e-06, + "logits/chosen": -2.524290084838867, + "logits/rejected": -1.8115367889404297, + "logps/chosen": -660.5758056640625, + "logps/rejected": -2095.261474609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.819159507751465, + "rewards/margins": 14.695783615112305, + "rewards/rejected": -20.514944076538086, + "step": 29590 + }, + { + "epoch": 1.77, + "learning_rate": 2.166312608821659e-06, + "logits/chosen": -2.515158176422119, + "logits/rejected": -1.7994816303253174, + "logps/chosen": -652.2529907226562, + "logps/rejected": -2089.727783203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.826054573059082, + "rewards/margins": 14.62878131866455, + "rewards/rejected": -20.454835891723633, + "step": 29600 + }, + { + "epoch": 1.77, + "learning_rate": 2.1645936350433692e-06, + "logits/chosen": -2.5769639015197754, + "logits/rejected": -1.8829492330551147, + "logps/chosen": -687.479736328125, + "logps/rejected": -2055.219482421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.136989116668701, + "rewards/margins": 13.957277297973633, + "rewards/rejected": -20.09426498413086, + "step": 29610 + }, + { + "epoch": 1.77, + "learning_rate": 2.162874822729698e-06, + "logits/chosen": -2.541602611541748, + "logits/rejected": -1.8111120462417603, + "logps/chosen": -679.8707885742188, + "logps/rejected": -2022.233154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.132950305938721, + "rewards/margins": 13.651623725891113, + "rewards/rejected": -19.78457260131836, + "step": 29620 + }, + { + "epoch": 1.77, + "learning_rate": 2.161156172708084e-06, + "logits/chosen": -2.556948184967041, + "logits/rejected": -1.863347053527832, + "logps/chosen": -703.5960083007812, + "logps/rejected": -2139.801025390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.331605911254883, + "rewards/margins": 14.629727363586426, + "rewards/rejected": -20.961336135864258, + "step": 29630 + }, + { + "epoch": 1.77, + "learning_rate": 2.159437685805883e-06, + "logits/chosen": -2.5459938049316406, + "logits/rejected": -1.809569001197815, + "logps/chosen": -659.7030639648438, + "logps/rejected": -2032.35546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.892251014709473, + "rewards/margins": 13.987548828125, + "rewards/rejected": -19.879798889160156, + "step": 29640 + }, + { + "epoch": 1.77, + "learning_rate": 2.157719362850377e-06, + "logits/chosen": -2.5175533294677734, + "logits/rejected": -1.7720075845718384, + "logps/chosen": -675.2508544921875, + "logps/rejected": -2021.130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.025233745574951, + "rewards/margins": 13.74676513671875, + "rewards/rejected": -19.771997451782227, + "step": 29650 + }, + { + "epoch": 1.77, + "learning_rate": 2.1560012046687638e-06, + "logits/chosen": -2.557295322418213, + "logits/rejected": -1.9410995244979858, + "logps/chosen": -658.2067260742188, + "logps/rejected": -2122.85205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.855543613433838, + "rewards/margins": 14.921899795532227, + "rewards/rejected": -20.77744483947754, + "step": 29660 + }, + { + "epoch": 1.77, + "learning_rate": 2.154283212088168e-06, + "logits/chosen": -2.555591344833374, + "logits/rejected": -1.8545191287994385, + "logps/chosen": -694.916015625, + "logps/rejected": -2019.8472900390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.232030391693115, + "rewards/margins": 13.518157958984375, + "rewards/rejected": -19.750186920166016, + "step": 29670 + }, + { + "epoch": 1.77, + "learning_rate": 2.152565385935628e-06, + "logits/chosen": -2.5006601810455322, + "logits/rejected": -1.846618413925171, + "logps/chosen": -708.9550170898438, + "logps/rejected": -2077.31396484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.363287925720215, + "rewards/margins": 13.970588684082031, + "rewards/rejected": -20.33387565612793, + "step": 29680 + }, + { + "epoch": 1.77, + "learning_rate": 2.1508477270381074e-06, + "logits/chosen": -2.4986824989318848, + "logits/rejected": -1.7635829448699951, + "logps/chosen": -659.387939453125, + "logps/rejected": -1965.580810546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.920552730560303, + "rewards/margins": 13.295675277709961, + "rewards/rejected": -19.216228485107422, + "step": 29690 + }, + { + "epoch": 1.77, + "learning_rate": 2.149130236222487e-06, + "logits/chosen": -2.545914888381958, + "logits/rejected": -1.8299843072891235, + "logps/chosen": -679.9639892578125, + "logps/rejected": -2046.291259765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.020963191986084, + "rewards/margins": 14.003567695617676, + "rewards/rejected": -20.024532318115234, + "step": 29700 + }, + { + "epoch": 1.77, + "learning_rate": 2.1474129143155655e-06, + "logits/chosen": -2.502066135406494, + "logits/rejected": -1.7875471115112305, + "logps/chosen": -665.1558837890625, + "logps/rejected": -2056.37158203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.916266441345215, + "rewards/margins": 14.21020221710205, + "rewards/rejected": -20.126466751098633, + "step": 29710 + }, + { + "epoch": 1.77, + "learning_rate": 2.1456957621440635e-06, + "logits/chosen": -2.527226209640503, + "logits/rejected": -1.880061388015747, + "logps/chosen": -692.2615966796875, + "logps/rejected": -2107.771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.226275444030762, + "rewards/margins": 14.406826972961426, + "rewards/rejected": -20.633100509643555, + "step": 29720 + }, + { + "epoch": 1.77, + "learning_rate": 2.143978780534616e-06, + "logits/chosen": -2.521944761276245, + "logits/rejected": -1.8264849185943604, + "logps/chosen": -664.6202392578125, + "logps/rejected": -2033.252197265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.892758369445801, + "rewards/margins": 14.002131462097168, + "rewards/rejected": -19.89488983154297, + "step": 29730 + }, + { + "epoch": 1.77, + "learning_rate": 2.1422619703137805e-06, + "logits/chosen": -2.5191562175750732, + "logits/rejected": -1.8395789861679077, + "logps/chosen": -656.0650024414062, + "logps/rejected": -2030.110107421875, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8617353439331055, + "rewards/margins": 13.995429992675781, + "rewards/rejected": -19.857166290283203, + "step": 29740 + }, + { + "epoch": 1.77, + "learning_rate": 2.1405453323080263e-06, + "logits/chosen": -2.5111355781555176, + "logits/rejected": -1.9115320444107056, + "logps/chosen": -676.1450805664062, + "logps/rejected": -2069.99853515625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.068602561950684, + "rewards/margins": 14.199209213256836, + "rewards/rejected": -20.267810821533203, + "step": 29750 + }, + { + "epoch": 1.77, + "learning_rate": 2.138828867343746e-06, + "logits/chosen": -2.536774158477783, + "logits/rejected": -1.9368880987167358, + "logps/chosen": -710.9988403320312, + "logps/rejected": -2106.1396484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.422755241394043, + "rewards/margins": 14.189764022827148, + "rewards/rejected": -20.612518310546875, + "step": 29760 + }, + { + "epoch": 1.78, + "learning_rate": 2.137112576247243e-06, + "logits/chosen": -2.5497918128967285, + "logits/rejected": -1.9650452136993408, + "logps/chosen": -683.5089721679688, + "logps/rejected": -2128.2666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.036467552185059, + "rewards/margins": 14.797261238098145, + "rewards/rejected": -20.83372688293457, + "step": 29770 + }, + { + "epoch": 1.78, + "learning_rate": 2.1353964598447425e-06, + "logits/chosen": -2.5662310123443604, + "logits/rejected": -1.9865787029266357, + "logps/chosen": -700.8657836914062, + "logps/rejected": -2053.986083984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3349609375, + "rewards/margins": 13.779153823852539, + "rewards/rejected": -20.11411476135254, + "step": 29780 + }, + { + "epoch": 1.78, + "learning_rate": 2.1336805189623813e-06, + "logits/chosen": -2.5160768032073975, + "logits/rejected": -1.8239784240722656, + "logps/chosen": -701.8583984375, + "logps/rejected": -2074.921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.34683084487915, + "rewards/margins": 13.959528923034668, + "rewards/rejected": -20.306358337402344, + "step": 29790 + }, + { + "epoch": 1.78, + "learning_rate": 2.1319647544262125e-06, + "logits/chosen": -2.5441532135009766, + "logits/rejected": -1.9517691135406494, + "logps/chosen": -709.5465087890625, + "logps/rejected": -2105.88134765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.407656192779541, + "rewards/margins": 14.211735725402832, + "rewards/rejected": -20.619388580322266, + "step": 29800 + }, + { + "epoch": 1.78, + "learning_rate": 2.130249167062208e-06, + "logits/chosen": -2.5821404457092285, + "logits/rejected": -1.9754610061645508, + "logps/chosen": -671.0964965820312, + "logps/rejected": -2079.309326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.990677833557129, + "rewards/margins": 14.360389709472656, + "rewards/rejected": -20.3510684967041, + "step": 29810 + }, + { + "epoch": 1.78, + "learning_rate": 2.128533757696248e-06, + "logits/chosen": -2.5789029598236084, + "logits/rejected": -1.86870539188385, + "logps/chosen": -687.2901611328125, + "logps/rejected": -2059.559326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.142049789428711, + "rewards/margins": 14.00059986114502, + "rewards/rejected": -20.142648696899414, + "step": 29820 + }, + { + "epoch": 1.78, + "learning_rate": 2.1268185271541334e-06, + "logits/chosen": -2.5257678031921387, + "logits/rejected": -1.8665759563446045, + "logps/chosen": -703.099853515625, + "logps/rejected": -2096.445068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.31850004196167, + "rewards/margins": 14.202174186706543, + "rewards/rejected": -20.520673751831055, + "step": 29830 + }, + { + "epoch": 1.78, + "learning_rate": 2.125103476261574e-06, + "logits/chosen": -2.5092098712921143, + "logits/rejected": -1.819604516029358, + "logps/chosen": -692.4503173828125, + "logps/rejected": -2119.666259765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.187688827514648, + "rewards/margins": 14.574687004089355, + "rewards/rejected": -20.762378692626953, + "step": 29840 + }, + { + "epoch": 1.78, + "learning_rate": 2.123388605844198e-06, + "logits/chosen": -2.4985759258270264, + "logits/rejected": -1.8076118230819702, + "logps/chosen": -735.7077026367188, + "logps/rejected": -2057.666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.585177421569824, + "rewards/margins": 13.551739692687988, + "rewards/rejected": -20.136913299560547, + "step": 29850 + }, + { + "epoch": 1.78, + "learning_rate": 2.1216739167275405e-06, + "logits/chosen": -2.5718820095062256, + "logits/rejected": -1.9057228565216064, + "logps/chosen": -670.4573974609375, + "logps/rejected": -1973.2099609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.997075080871582, + "rewards/margins": 13.30180549621582, + "rewards/rejected": -19.298879623413086, + "step": 29860 + }, + { + "epoch": 1.78, + "learning_rate": 2.119959409737056e-06, + "logits/chosen": -2.5675368309020996, + "logits/rejected": -1.9789419174194336, + "logps/chosen": -701.4852905273438, + "logps/rejected": -2118.518798828125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.284577369689941, + "rewards/margins": 14.460047721862793, + "rewards/rejected": -20.744626998901367, + "step": 29870 + }, + { + "epoch": 1.78, + "learning_rate": 2.1182450856981066e-06, + "logits/chosen": -2.5749151706695557, + "logits/rejected": -1.9027827978134155, + "logps/chosen": -683.8563232421875, + "logps/rejected": -2096.038330078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.09275484085083, + "rewards/margins": 14.419797897338867, + "rewards/rejected": -20.51255226135254, + "step": 29880 + }, + { + "epoch": 1.78, + "learning_rate": 2.1165309454359678e-06, + "logits/chosen": -2.6010842323303223, + "logits/rejected": -1.8279129266738892, + "logps/chosen": -668.1586303710938, + "logps/rejected": -2123.339111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.989038467407227, + "rewards/margins": 14.80370807647705, + "rewards/rejected": -20.792743682861328, + "step": 29890 + }, + { + "epoch": 1.78, + "learning_rate": 2.1148169897758273e-06, + "logits/chosen": -2.558750629425049, + "logits/rejected": -1.8098933696746826, + "logps/chosen": -667.9622192382812, + "logps/rejected": -2068.99853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.002953052520752, + "rewards/margins": 14.24254035949707, + "rewards/rejected": -20.245494842529297, + "step": 29900 + }, + { + "epoch": 1.78, + "learning_rate": 2.113103219542782e-06, + "logits/chosen": -2.553133010864258, + "logits/rejected": -1.7920926809310913, + "logps/chosen": -695.0093994140625, + "logps/rejected": -2153.218017578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.251588821411133, + "rewards/margins": 14.833020210266113, + "rewards/rejected": -21.084609985351562, + "step": 29910 + }, + { + "epoch": 1.78, + "learning_rate": 2.1113896355618425e-06, + "logits/chosen": -2.5353362560272217, + "logits/rejected": -1.8759052753448486, + "logps/chosen": -665.4969482421875, + "logps/rejected": -2064.12939453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.982161998748779, + "rewards/margins": 14.217216491699219, + "rewards/rejected": -20.199377059936523, + "step": 29920 + }, + { + "epoch": 1.78, + "learning_rate": 2.1096762386579276e-06, + "logits/chosen": -2.523348569869995, + "logits/rejected": -1.749370813369751, + "logps/chosen": -682.8723754882812, + "logps/rejected": -2113.02490234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0500569343566895, + "rewards/margins": 14.627886772155762, + "rewards/rejected": -20.67794418334961, + "step": 29930 + }, + { + "epoch": 1.79, + "learning_rate": 2.107963029655867e-06, + "logits/chosen": -2.5666582584381104, + "logits/rejected": -1.8752158880233765, + "logps/chosen": -715.2806396484375, + "logps/rejected": -2151.51171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.392980575561523, + "rewards/margins": 14.65861988067627, + "rewards/rejected": -21.051599502563477, + "step": 29940 + }, + { + "epoch": 1.79, + "learning_rate": 2.1062500093803977e-06, + "logits/chosen": -2.520739793777466, + "logits/rejected": -1.9040273427963257, + "logps/chosen": -685.3035888671875, + "logps/rejected": -2096.93994140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.087944984436035, + "rewards/margins": 14.428735733032227, + "rewards/rejected": -20.516679763793945, + "step": 29950 + }, + { + "epoch": 1.79, + "learning_rate": 2.1045371786561696e-06, + "logits/chosen": -2.529365062713623, + "logits/rejected": -1.7675037384033203, + "logps/chosen": -676.8313598632812, + "logps/rejected": -2109.481201171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.059177398681641, + "rewards/margins": 14.593915939331055, + "rewards/rejected": -20.653095245361328, + "step": 29960 + }, + { + "epoch": 1.79, + "learning_rate": 2.1028245383077392e-06, + "logits/chosen": -2.5769097805023193, + "logits/rejected": -1.865850806236267, + "logps/chosen": -692.2279052734375, + "logps/rejected": -2115.976318359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2009711265563965, + "rewards/margins": 14.506762504577637, + "rewards/rejected": -20.707735061645508, + "step": 29970 + }, + { + "epoch": 1.79, + "learning_rate": 2.10111208915957e-06, + "logits/chosen": -2.518303394317627, + "logits/rejected": -1.781802773475647, + "logps/chosen": -671.3902587890625, + "logps/rejected": -2102.5859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.041191577911377, + "rewards/margins": 14.53498649597168, + "rewards/rejected": -20.5761775970459, + "step": 29980 + }, + { + "epoch": 1.79, + "learning_rate": 2.0993998320360366e-06, + "logits/chosen": -2.567974805831909, + "logits/rejected": -1.9154293537139893, + "logps/chosen": -673.6856689453125, + "logps/rejected": -2149.86181640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.023555755615234, + "rewards/margins": 15.030633926391602, + "rewards/rejected": -21.054189682006836, + "step": 29990 + }, + { + "epoch": 1.79, + "learning_rate": 2.0976877677614183e-06, + "logits/chosen": -2.5431694984436035, + "logits/rejected": -1.7633047103881836, + "logps/chosen": -734.3875122070312, + "logps/rejected": -2146.085205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.605401515960693, + "rewards/margins": 14.403810501098633, + "rewards/rejected": -21.009212493896484, + "step": 30000 + }, + { + "epoch": 1.79, + "learning_rate": 2.095975897159904e-06, + "logits/chosen": -2.5537619590759277, + "logits/rejected": -1.9378414154052734, + "logps/chosen": -710.0758056640625, + "logps/rejected": -2003.3941650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.378714084625244, + "rewards/margins": 13.2183198928833, + "rewards/rejected": -19.59703254699707, + "step": 30010 + }, + { + "epoch": 1.79, + "learning_rate": 2.094264221055587e-06, + "logits/chosen": -2.5118377208709717, + "logits/rejected": -1.9267898797988892, + "logps/chosen": -660.6488037109375, + "logps/rejected": -2011.353515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.86937141418457, + "rewards/margins": 13.78453540802002, + "rewards/rejected": -19.653905868530273, + "step": 30020 + }, + { + "epoch": 1.79, + "learning_rate": 2.09255274027247e-06, + "logits/chosen": -2.5153231620788574, + "logits/rejected": -1.8795077800750732, + "logps/chosen": -690.0615844726562, + "logps/rejected": -2137.508544921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.175148963928223, + "rewards/margins": 14.7601318359375, + "rewards/rejected": -20.935283660888672, + "step": 30030 + }, + { + "epoch": 1.79, + "learning_rate": 2.0908414556344587e-06, + "logits/chosen": -2.5957818031311035, + "logits/rejected": -1.9102436304092407, + "logps/chosen": -661.4786987304688, + "logps/rejected": -2037.8082275390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9810099601745605, + "rewards/margins": 13.953394889831543, + "rewards/rejected": -19.934406280517578, + "step": 30040 + }, + { + "epoch": 1.79, + "learning_rate": 2.089130367965364e-06, + "logits/chosen": -2.5124640464782715, + "logits/rejected": -1.8302297592163086, + "logps/chosen": -689.1401977539062, + "logps/rejected": -2034.4290771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.173144340515137, + "rewards/margins": 13.745739936828613, + "rewards/rejected": -19.918880462646484, + "step": 30050 + }, + { + "epoch": 1.79, + "learning_rate": 2.087419478088906e-06, + "logits/chosen": -2.525353193283081, + "logits/rejected": -1.7981021404266357, + "logps/chosen": -680.73388671875, + "logps/rejected": -2025.2337646484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1605024337768555, + "rewards/margins": 13.65882396697998, + "rewards/rejected": -19.819326400756836, + "step": 30060 + }, + { + "epoch": 1.79, + "learning_rate": 2.085708786828705e-06, + "logits/chosen": -2.540825128555298, + "logits/rejected": -1.8049185276031494, + "logps/chosen": -669.7635498046875, + "logps/rejected": -2170.103515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.955801963806152, + "rewards/margins": 15.304242134094238, + "rewards/rejected": -21.26004409790039, + "step": 30070 + }, + { + "epoch": 1.79, + "learning_rate": 2.08399829500829e-06, + "logits/chosen": -2.5233054161071777, + "logits/rejected": -1.8373973369598389, + "logps/chosen": -678.0908203125, + "logps/rejected": -2079.77392578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.094742774963379, + "rewards/margins": 14.260465621948242, + "rewards/rejected": -20.355205535888672, + "step": 30080 + }, + { + "epoch": 1.79, + "learning_rate": 2.0822880034510897e-06, + "logits/chosen": -2.553558588027954, + "logits/rejected": -1.9336017370224, + "logps/chosen": -698.8424682617188, + "logps/rejected": -2135.75, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.287592887878418, + "rewards/margins": 14.628702163696289, + "rewards/rejected": -20.916296005249023, + "step": 30090 + }, + { + "epoch": 1.79, + "learning_rate": 2.0805779129804397e-06, + "logits/chosen": -2.5599684715270996, + "logits/rejected": -1.9743738174438477, + "logps/chosen": -685.008544921875, + "logps/rejected": -2150.235595703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.12555456161499, + "rewards/margins": 14.920049667358398, + "rewards/rejected": -21.045604705810547, + "step": 30100 + }, + { + "epoch": 1.8, + "learning_rate": 2.078868024419576e-06, + "logits/chosen": -2.4861693382263184, + "logits/rejected": -1.8643478155136108, + "logps/chosen": -667.8790283203125, + "logps/rejected": -2131.515380859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.022027492523193, + "rewards/margins": 14.843548774719238, + "rewards/rejected": -20.865575790405273, + "step": 30110 + }, + { + "epoch": 1.8, + "learning_rate": 2.077158338591641e-06, + "logits/chosen": -2.5443053245544434, + "logits/rejected": -1.9525182247161865, + "logps/chosen": -677.5750732421875, + "logps/rejected": -2139.532470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.084689617156982, + "rewards/margins": 14.857647895812988, + "rewards/rejected": -20.942338943481445, + "step": 30120 + }, + { + "epoch": 1.8, + "learning_rate": 2.075448856319676e-06, + "logits/chosen": -2.5119316577911377, + "logits/rejected": -1.9443279504776, + "logps/chosen": -689.2346801757812, + "logps/rejected": -2155.19921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.140139579772949, + "rewards/margins": 14.966135025024414, + "rewards/rejected": -21.106271743774414, + "step": 30130 + }, + { + "epoch": 1.8, + "learning_rate": 2.0737395784266263e-06, + "logits/chosen": -2.5554492473602295, + "logits/rejected": -1.9017711877822876, + "logps/chosen": -670.809326171875, + "logps/rejected": -2076.140869140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.953392028808594, + "rewards/margins": 14.368449211120605, + "rewards/rejected": -20.32184410095215, + "step": 30140 + }, + { + "epoch": 1.8, + "learning_rate": 2.0720305057353384e-06, + "logits/chosen": -2.5149402618408203, + "logits/rejected": -1.8517354726791382, + "logps/chosen": -691.6993408203125, + "logps/rejected": -2092.612060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.270426273345947, + "rewards/margins": 14.210230827331543, + "rewards/rejected": -20.480655670166016, + "step": 30150 + }, + { + "epoch": 1.8, + "learning_rate": 2.0703216390685586e-06, + "logits/chosen": -2.532060384750366, + "logits/rejected": -1.919079065322876, + "logps/chosen": -692.2068481445312, + "logps/rejected": -2021.557861328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.201287269592285, + "rewards/margins": 13.584259033203125, + "rewards/rejected": -19.78554916381836, + "step": 30160 + }, + { + "epoch": 1.8, + "learning_rate": 2.0686129792489372e-06, + "logits/chosen": -2.5518898963928223, + "logits/rejected": -1.9357538223266602, + "logps/chosen": -679.7120971679688, + "logps/rejected": -2026.219482421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.095388889312744, + "rewards/margins": 13.736343383789062, + "rewards/rejected": -19.831729888916016, + "step": 30170 + }, + { + "epoch": 1.8, + "learning_rate": 2.0669045270990216e-06, + "logits/chosen": -2.564275026321411, + "logits/rejected": -1.841456651687622, + "logps/chosen": -660.9005126953125, + "logps/rejected": -2075.37744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.891127586364746, + "rewards/margins": 14.412895202636719, + "rewards/rejected": -20.304019927978516, + "step": 30180 + }, + { + "epoch": 1.8, + "learning_rate": 2.0651962834412613e-06, + "logits/chosen": -2.568549394607544, + "logits/rejected": -1.701662302017212, + "logps/chosen": -674.8630981445312, + "logps/rejected": -2062.859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.003626823425293, + "rewards/margins": 14.177400588989258, + "rewards/rejected": -20.181028366088867, + "step": 30190 + }, + { + "epoch": 1.8, + "learning_rate": 2.0634882490980045e-06, + "logits/chosen": -2.532555341720581, + "logits/rejected": -1.7996299266815186, + "logps/chosen": -686.0786743164062, + "logps/rejected": -2087.472412109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.085827827453613, + "rewards/margins": 14.34204387664795, + "rewards/rejected": -20.427871704101562, + "step": 30200 + }, + { + "epoch": 1.8, + "learning_rate": 2.0617804248914992e-06, + "logits/chosen": -2.5387587547302246, + "logits/rejected": -1.7826511859893799, + "logps/chosen": -669.6099243164062, + "logps/rejected": -1988.633544921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.020039081573486, + "rewards/margins": 13.430256843566895, + "rewards/rejected": -19.45029640197754, + "step": 30210 + }, + { + "epoch": 1.8, + "learning_rate": 2.060072811643892e-06, + "logits/chosen": -2.526512861251831, + "logits/rejected": -1.8959295749664307, + "logps/chosen": -666.8753051757812, + "logps/rejected": -2001.008056640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.979276180267334, + "rewards/margins": 13.58599853515625, + "rewards/rejected": -19.56527328491211, + "step": 30220 + }, + { + "epoch": 1.8, + "learning_rate": 2.058365410177227e-06, + "logits/chosen": -2.5677878856658936, + "logits/rejected": -1.9109703302383423, + "logps/chosen": -699.422607421875, + "logps/rejected": -2134.12255859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.293922424316406, + "rewards/margins": 14.586050033569336, + "rewards/rejected": -20.879972457885742, + "step": 30230 + }, + { + "epoch": 1.8, + "learning_rate": 2.056658221313449e-06, + "logits/chosen": -2.5188181400299072, + "logits/rejected": -1.8980251550674438, + "logps/chosen": -683.8613891601562, + "logps/rejected": -2026.5638427734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.08843994140625, + "rewards/margins": 13.741793632507324, + "rewards/rejected": -19.830232620239258, + "step": 30240 + }, + { + "epoch": 1.8, + "learning_rate": 2.0549512458743962e-06, + "logits/chosen": -2.5145180225372314, + "logits/rejected": -1.8905404806137085, + "logps/chosen": -694.0892944335938, + "logps/rejected": -2077.08447265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.290443420410156, + "rewards/margins": 14.039314270019531, + "rewards/rejected": -20.329757690429688, + "step": 30250 + }, + { + "epoch": 1.8, + "learning_rate": 2.0532444846818095e-06, + "logits/chosen": -2.472809076309204, + "logits/rejected": -1.758424162864685, + "logps/chosen": -681.4251098632812, + "logps/rejected": -2048.31591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.090879440307617, + "rewards/margins": 13.951156616210938, + "rewards/rejected": -20.042034149169922, + "step": 30260 + }, + { + "epoch": 1.81, + "learning_rate": 2.0515379385573205e-06, + "logits/chosen": -2.532057523727417, + "logits/rejected": -1.8398363590240479, + "logps/chosen": -680.6527709960938, + "logps/rejected": -2057.811767578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.053122520446777, + "rewards/margins": 14.075573921203613, + "rewards/rejected": -20.12869644165039, + "step": 30270 + }, + { + "epoch": 1.81, + "learning_rate": 2.049831608322464e-06, + "logits/chosen": -2.5349013805389404, + "logits/rejected": -1.8976829051971436, + "logps/chosen": -669.3627319335938, + "logps/rejected": -2084.386474609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0052008628845215, + "rewards/margins": 14.385704040527344, + "rewards/rejected": -20.390905380249023, + "step": 30280 + }, + { + "epoch": 1.81, + "learning_rate": 2.0481254947986634e-06, + "logits/chosen": -2.5526669025421143, + "logits/rejected": -1.871313452720642, + "logps/chosen": -678.84130859375, + "logps/rejected": -2189.67333984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.058924198150635, + "rewards/margins": 15.388389587402344, + "rewards/rejected": -21.447315216064453, + "step": 30290 + }, + { + "epoch": 1.81, + "learning_rate": 2.0464195988072454e-06, + "logits/chosen": -2.5196375846862793, + "logits/rejected": -1.867203950881958, + "logps/chosen": -688.9110717773438, + "logps/rejected": -2079.586181640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.156447410583496, + "rewards/margins": 14.213412284851074, + "rewards/rejected": -20.369861602783203, + "step": 30300 + }, + { + "epoch": 1.81, + "learning_rate": 2.044713921169427e-06, + "logits/chosen": -2.5333352088928223, + "logits/rejected": -1.8165757656097412, + "logps/chosen": -691.4625854492188, + "logps/rejected": -2051.63916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2212629318237305, + "rewards/margins": 13.864128112792969, + "rewards/rejected": -20.085391998291016, + "step": 30310 + }, + { + "epoch": 1.81, + "learning_rate": 2.0430084627063198e-06, + "logits/chosen": -2.5135838985443115, + "logits/rejected": -1.8771789073944092, + "logps/chosen": -699.0726318359375, + "logps/rejected": -2061.9404296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.272255897521973, + "rewards/margins": 13.90483283996582, + "rewards/rejected": -20.177087783813477, + "step": 30320 + }, + { + "epoch": 1.81, + "learning_rate": 2.041303224238934e-06, + "logits/chosen": -2.5298731327056885, + "logits/rejected": -1.8169000148773193, + "logps/chosen": -660.8648681640625, + "logps/rejected": -2001.1201171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.884932518005371, + "rewards/margins": 13.681012153625488, + "rewards/rejected": -19.565946578979492, + "step": 30330 + }, + { + "epoch": 1.81, + "learning_rate": 2.039598206588169e-06, + "logits/chosen": -2.541382312774658, + "logits/rejected": -1.9289710521697998, + "logps/chosen": -687.2220458984375, + "logps/rejected": -2095.391845703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.146002769470215, + "rewards/margins": 14.389608383178711, + "rewards/rejected": -20.53561019897461, + "step": 30340 + }, + { + "epoch": 1.81, + "learning_rate": 2.037893410574823e-06, + "logits/chosen": -2.525330066680908, + "logits/rejected": -1.8628127574920654, + "logps/chosen": -687.3167114257812, + "logps/rejected": -2157.19580078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.174931526184082, + "rewards/margins": 14.948270797729492, + "rewards/rejected": -21.12320327758789, + "step": 30350 + }, + { + "epoch": 1.81, + "learning_rate": 2.036188837019582e-06, + "logits/chosen": -2.5286688804626465, + "logits/rejected": -1.8304636478424072, + "logps/chosen": -705.010009765625, + "logps/rejected": -2085.543701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.327874183654785, + "rewards/margins": 14.084722518920898, + "rewards/rejected": -20.412595748901367, + "step": 30360 + }, + { + "epoch": 1.81, + "learning_rate": 2.0344844867430296e-06, + "logits/chosen": -2.571192502975464, + "logits/rejected": -1.984387755393982, + "logps/chosen": -702.2957763671875, + "logps/rejected": -2122.8212890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.25698709487915, + "rewards/margins": 14.515615463256836, + "rewards/rejected": -20.772602081298828, + "step": 30370 + }, + { + "epoch": 1.81, + "learning_rate": 2.0327803605656386e-06, + "logits/chosen": -2.5296967029571533, + "logits/rejected": -1.8936545848846436, + "logps/chosen": -691.081298828125, + "logps/rejected": -2064.759521484375, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.169353485107422, + "rewards/margins": 14.036242485046387, + "rewards/rejected": -20.205596923828125, + "step": 30380 + }, + { + "epoch": 1.81, + "learning_rate": 2.031076459307777e-06, + "logits/chosen": -2.545435667037964, + "logits/rejected": -1.777151107788086, + "logps/chosen": -739.7183837890625, + "logps/rejected": -2127.10986328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.709023475646973, + "rewards/margins": 14.106013298034668, + "rewards/rejected": -20.81503677368164, + "step": 30390 + }, + { + "epoch": 1.81, + "learning_rate": 2.0293727837897016e-06, + "logits/chosen": -2.4425840377807617, + "logits/rejected": -1.7005831003189087, + "logps/chosen": -737.5581665039062, + "logps/rejected": -2164.786865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.716248512268066, + "rewards/margins": 14.497556686401367, + "rewards/rejected": -21.21380615234375, + "step": 30400 + }, + { + "epoch": 1.81, + "learning_rate": 2.027669334831562e-06, + "logits/chosen": -2.4954352378845215, + "logits/rejected": -1.7916927337646484, + "logps/chosen": -776.8181762695312, + "logps/rejected": -2282.99462890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.090954780578613, + "rewards/margins": 15.291070938110352, + "rewards/rejected": -22.382022857666016, + "step": 30410 + }, + { + "epoch": 1.81, + "learning_rate": 2.0259661132533983e-06, + "logits/chosen": -2.5295357704162598, + "logits/rejected": -1.7408831119537354, + "logps/chosen": -777.3949584960938, + "logps/rejected": -2263.727783203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.038365364074707, + "rewards/margins": 15.167004585266113, + "rewards/rejected": -22.20536994934082, + "step": 30420 + }, + { + "epoch": 1.81, + "learning_rate": 2.024263119875142e-06, + "logits/chosen": -2.476414918899536, + "logits/rejected": -1.7511556148529053, + "logps/chosen": -765.5812377929688, + "logps/rejected": -2211.94873046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9742889404296875, + "rewards/margins": 14.695295333862305, + "rewards/rejected": -21.669584274291992, + "step": 30430 + }, + { + "epoch": 1.82, + "learning_rate": 2.0225603555166136e-06, + "logits/chosen": -2.5071849822998047, + "logits/rejected": -1.7829927206039429, + "logps/chosen": -763.1129760742188, + "logps/rejected": -2191.552734375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.927244663238525, + "rewards/margins": 14.537515640258789, + "rewards/rejected": -21.464759826660156, + "step": 30440 + }, + { + "epoch": 1.82, + "learning_rate": 2.020857820997524e-06, + "logits/chosen": -2.5295941829681396, + "logits/rejected": -1.7927758693695068, + "logps/chosen": -703.6065673828125, + "logps/rejected": -2173.64794921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.360177516937256, + "rewards/margins": 14.933502197265625, + "rewards/rejected": -21.293682098388672, + "step": 30450 + }, + { + "epoch": 1.82, + "learning_rate": 2.019155517137473e-06, + "logits/chosen": -2.518815517425537, + "logits/rejected": -1.6935596466064453, + "logps/chosen": -696.1407470703125, + "logps/rejected": -2136.87255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.248630523681641, + "rewards/margins": 14.678842544555664, + "rewards/rejected": -20.927473068237305, + "step": 30460 + }, + { + "epoch": 1.82, + "learning_rate": 2.0174534447559496e-06, + "logits/chosen": -2.48419189453125, + "logits/rejected": -1.7049381732940674, + "logps/chosen": -701.0701293945312, + "logps/rejected": -2126.113037109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.311054229736328, + "rewards/margins": 14.510797500610352, + "rewards/rejected": -20.821849822998047, + "step": 30470 + }, + { + "epoch": 1.82, + "learning_rate": 2.015751604672333e-06, + "logits/chosen": -2.5100150108337402, + "logits/rejected": -1.7551991939544678, + "logps/chosen": -703.6183471679688, + "logps/rejected": -2192.64794921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.345242500305176, + "rewards/margins": 15.136049270629883, + "rewards/rejected": -21.48128890991211, + "step": 30480 + }, + { + "epoch": 1.82, + "learning_rate": 2.0140499977058873e-06, + "logits/chosen": -2.4884064197540283, + "logits/rejected": -1.620212197303772, + "logps/chosen": -689.295654296875, + "logps/rejected": -2058.544921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.136180400848389, + "rewards/margins": 14.005203247070312, + "rewards/rejected": -20.14138412475586, + "step": 30490 + }, + { + "epoch": 1.82, + "learning_rate": 2.012348624675766e-06, + "logits/chosen": -2.525031566619873, + "logits/rejected": -1.6327794790267944, + "logps/chosen": -705.0852661132812, + "logps/rejected": -2119.9453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.411264896392822, + "rewards/margins": 14.335960388183594, + "rewards/rejected": -20.74722671508789, + "step": 30500 + }, + { + "epoch": 1.82, + "learning_rate": 2.010647486401011e-06, + "logits/chosen": -2.4827489852905273, + "logits/rejected": -1.7575743198394775, + "logps/chosen": -704.5250244140625, + "logps/rejected": -2040.257080078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.32197380065918, + "rewards/margins": 13.644296646118164, + "rewards/rejected": -19.966270446777344, + "step": 30510 + }, + { + "epoch": 1.82, + "learning_rate": 2.008946583700549e-06, + "logits/chosen": -2.4764950275421143, + "logits/rejected": -1.7266457080841064, + "logps/chosen": -710.5094604492188, + "logps/rejected": -2163.27197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.450006008148193, + "rewards/margins": 14.736804008483887, + "rewards/rejected": -21.186811447143555, + "step": 30520 + }, + { + "epoch": 1.82, + "learning_rate": 2.0072459173931965e-06, + "logits/chosen": -2.556396722793579, + "logits/rejected": -1.832506537437439, + "logps/chosen": -702.1677856445312, + "logps/rejected": -2108.429443359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.289809703826904, + "rewards/margins": 14.3651762008667, + "rewards/rejected": -20.654987335205078, + "step": 30530 + }, + { + "epoch": 1.82, + "learning_rate": 2.005545488297652e-06, + "logits/chosen": -2.589049816131592, + "logits/rejected": -1.8497365713119507, + "logps/chosen": -704.2153930664062, + "logps/rejected": -2143.260986328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.329907417297363, + "rewards/margins": 14.662564277648926, + "rewards/rejected": -20.99247169494629, + "step": 30540 + }, + { + "epoch": 1.82, + "learning_rate": 2.0038452972325043e-06, + "logits/chosen": -2.472330331802368, + "logits/rejected": -1.6922725439071655, + "logps/chosen": -683.1864013671875, + "logps/rejected": -2171.733154296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.146954536437988, + "rewards/margins": 15.129098892211914, + "rewards/rejected": -21.276050567626953, + "step": 30550 + }, + { + "epoch": 1.82, + "learning_rate": 2.0021453450162227e-06, + "logits/chosen": -2.5136117935180664, + "logits/rejected": -1.6908800601959229, + "logps/chosen": -691.0985107421875, + "logps/rejected": -2193.814453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.224358558654785, + "rewards/margins": 15.276458740234375, + "rewards/rejected": -21.500816345214844, + "step": 30560 + }, + { + "epoch": 1.82, + "learning_rate": 2.0004456324671673e-06, + "logits/chosen": -2.5368123054504395, + "logits/rejected": -1.7513526678085327, + "logps/chosen": -705.1664428710938, + "logps/rejected": -2099.61279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.305663108825684, + "rewards/margins": 14.252212524414062, + "rewards/rejected": -20.557878494262695, + "step": 30570 + }, + { + "epoch": 1.82, + "learning_rate": 1.9987461604035775e-06, + "logits/chosen": -2.556069850921631, + "logits/rejected": -1.8854055404663086, + "logps/chosen": -715.1091918945312, + "logps/rejected": -2172.7099609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.453923225402832, + "rewards/margins": 14.8228120803833, + "rewards/rejected": -21.2767333984375, + "step": 30580 + }, + { + "epoch": 1.82, + "learning_rate": 1.997046929643579e-06, + "logits/chosen": -2.5239415168762207, + "logits/rejected": -1.7605736255645752, + "logps/chosen": -691.8831787109375, + "logps/rejected": -2113.83203125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2292609214782715, + "rewards/margins": 14.468633651733398, + "rewards/rejected": -20.697893142700195, + "step": 30590 + }, + { + "epoch": 1.82, + "learning_rate": 1.9953479410051833e-06, + "logits/chosen": -2.5640575885772705, + "logits/rejected": -1.8662019968032837, + "logps/chosen": -682.8734741210938, + "logps/rejected": -2128.75537109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.10862398147583, + "rewards/margins": 14.750020980834961, + "rewards/rejected": -20.858646392822266, + "step": 30600 + }, + { + "epoch": 1.83, + "learning_rate": 1.9936491953062813e-06, + "logits/chosen": -2.4927024841308594, + "logits/rejected": -1.7787036895751953, + "logps/chosen": -725.2298583984375, + "logps/rejected": -2209.79833984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.554083824157715, + "rewards/margins": 15.1100492477417, + "rewards/rejected": -21.66413688659668, + "step": 30610 + }, + { + "epoch": 1.83, + "learning_rate": 1.991950693364651e-06, + "logits/chosen": -2.5196661949157715, + "logits/rejected": -1.8047329187393188, + "logps/chosen": -721.2186889648438, + "logps/rejected": -2025.31640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.498641014099121, + "rewards/margins": 13.31397819519043, + "rewards/rejected": -19.8126220703125, + "step": 30620 + }, + { + "epoch": 1.83, + "learning_rate": 1.9902524359979494e-06, + "logits/chosen": -2.5312297344207764, + "logits/rejected": -1.7081758975982666, + "logps/chosen": -693.6202392578125, + "logps/rejected": -2117.20654296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.257392883300781, + "rewards/margins": 14.487741470336914, + "rewards/rejected": -20.745136260986328, + "step": 30630 + }, + { + "epoch": 1.83, + "learning_rate": 1.9885544240237196e-06, + "logits/chosen": -2.5080161094665527, + "logits/rejected": -1.57887864112854, + "logps/chosen": -718.201904296875, + "logps/rejected": -2170.373046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.515275478363037, + "rewards/margins": 14.750444412231445, + "rewards/rejected": -21.26572036743164, + "step": 30640 + }, + { + "epoch": 1.83, + "learning_rate": 1.9868566582593828e-06, + "logits/chosen": -2.532738208770752, + "logits/rejected": -1.6679385900497437, + "logps/chosen": -705.8538818359375, + "logps/rejected": -2094.541748046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.310004234313965, + "rewards/margins": 14.19866943359375, + "rewards/rejected": -20.508676528930664, + "step": 30650 + }, + { + "epoch": 1.83, + "learning_rate": 1.985159139522245e-06, + "logits/chosen": -2.5315499305725098, + "logits/rejected": -1.7932546138763428, + "logps/chosen": -717.1817626953125, + "logps/rejected": -2126.468505859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.478335380554199, + "rewards/margins": 14.34814739227295, + "rewards/rejected": -20.82648468017578, + "step": 30660 + }, + { + "epoch": 1.83, + "learning_rate": 1.9834618686294902e-06, + "logits/chosen": -2.494868755340576, + "logits/rejected": -1.7435945272445679, + "logps/chosen": -717.4022216796875, + "logps/rejected": -2151.15673828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.478137016296387, + "rewards/margins": 14.589421272277832, + "rewards/rejected": -21.067556381225586, + "step": 30670 + }, + { + "epoch": 1.83, + "learning_rate": 1.981764846398186e-06, + "logits/chosen": -2.5135550498962402, + "logits/rejected": -1.7621628046035767, + "logps/chosen": -706.2513427734375, + "logps/rejected": -2183.1328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.406067848205566, + "rewards/margins": 14.98505973815918, + "rewards/rejected": -21.39112663269043, + "step": 30680 + }, + { + "epoch": 1.83, + "learning_rate": 1.9800680736452773e-06, + "logits/chosen": -2.491746187210083, + "logits/rejected": -1.759610891342163, + "logps/chosen": -712.975341796875, + "logps/rejected": -2051.762451171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.421156406402588, + "rewards/margins": 13.64885425567627, + "rewards/rejected": -20.070011138916016, + "step": 30690 + }, + { + "epoch": 1.83, + "learning_rate": 1.9783715511875913e-06, + "logits/chosen": -2.540367364883423, + "logits/rejected": -1.7704241275787354, + "logps/chosen": -704.8922729492188, + "logps/rejected": -2214.42578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.316393852233887, + "rewards/margins": 15.382899284362793, + "rewards/rejected": -21.699291229248047, + "step": 30700 + }, + { + "epoch": 1.83, + "learning_rate": 1.976675279841835e-06, + "logits/chosen": -2.4987940788269043, + "logits/rejected": -1.768943428993225, + "logps/chosen": -713.2362060546875, + "logps/rejected": -2209.23876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.449564456939697, + "rewards/margins": 15.205060005187988, + "rewards/rejected": -21.65462303161621, + "step": 30710 + }, + { + "epoch": 1.83, + "learning_rate": 1.974979260424591e-06, + "logits/chosen": -2.4857962131500244, + "logits/rejected": -1.703678846359253, + "logps/chosen": -718.8173828125, + "logps/rejected": -2177.9931640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.485774040222168, + "rewards/margins": 14.853876113891602, + "rewards/rejected": -21.339651107788086, + "step": 30720 + }, + { + "epoch": 1.83, + "learning_rate": 1.9732834937523253e-06, + "logits/chosen": -2.5250813961029053, + "logits/rejected": -1.8617610931396484, + "logps/chosen": -701.7443237304688, + "logps/rejected": -2150.236328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3031415939331055, + "rewards/margins": 14.753923416137695, + "rewards/rejected": -21.057064056396484, + "step": 30730 + }, + { + "epoch": 1.83, + "learning_rate": 1.971587980641378e-06, + "logits/chosen": -2.5154051780700684, + "logits/rejected": -1.6821876764297485, + "logps/chosen": -715.9832153320312, + "logps/rejected": -2059.97607421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.438519477844238, + "rewards/margins": 13.704185485839844, + "rewards/rejected": -20.142704010009766, + "step": 30740 + }, + { + "epoch": 1.83, + "learning_rate": 1.969892721907971e-06, + "logits/chosen": -2.5788419246673584, + "logits/rejected": -1.8238933086395264, + "logps/chosen": -679.6090698242188, + "logps/rejected": -2219.77880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.094907283782959, + "rewards/margins": 15.658628463745117, + "rewards/rejected": -21.753536224365234, + "step": 30750 + }, + { + "epoch": 1.83, + "learning_rate": 1.9681977183682015e-06, + "logits/chosen": -2.536328077316284, + "logits/rejected": -1.7658799886703491, + "logps/chosen": -656.28515625, + "logps/rejected": -2187.791748046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.870142936706543, + "rewards/margins": 15.568048477172852, + "rewards/rejected": -21.43819236755371, + "step": 30760 + }, + { + "epoch": 1.83, + "learning_rate": 1.966502970838042e-06, + "logits/chosen": -2.5214200019836426, + "logits/rejected": -1.7317841053009033, + "logps/chosen": -688.8507690429688, + "logps/rejected": -2059.166015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.177579402923584, + "rewards/margins": 13.977190017700195, + "rewards/rejected": -20.154769897460938, + "step": 30770 + }, + { + "epoch": 1.84, + "learning_rate": 1.9648084801333468e-06, + "logits/chosen": -2.490257501602173, + "logits/rejected": -1.8427356481552124, + "logps/chosen": -724.1483154296875, + "logps/rejected": -2069.39404296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.561668395996094, + "rewards/margins": 13.70141315460205, + "rewards/rejected": -20.26308250427246, + "step": 30780 + }, + { + "epoch": 1.84, + "learning_rate": 1.9631142470698413e-06, + "logits/chosen": -2.5277740955352783, + "logits/rejected": -1.8769546747207642, + "logps/chosen": -687.2526245117188, + "logps/rejected": -2150.177734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.181735038757324, + "rewards/margins": 14.86201286315918, + "rewards/rejected": -21.04374885559082, + "step": 30790 + }, + { + "epoch": 1.84, + "learning_rate": 1.9614202724631316e-06, + "logits/chosen": -2.5183587074279785, + "logits/rejected": -1.7811918258666992, + "logps/chosen": -689.1614990234375, + "logps/rejected": -2078.12353515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.162232398986816, + "rewards/margins": 14.179647445678711, + "rewards/rejected": -20.341880798339844, + "step": 30800 + }, + { + "epoch": 1.84, + "learning_rate": 1.9597265571286945e-06, + "logits/chosen": -2.525214433670044, + "logits/rejected": -1.7896000146865845, + "logps/chosen": -684.172119140625, + "logps/rejected": -2023.3310546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.174244403839111, + "rewards/margins": 13.622334480285645, + "rewards/rejected": -19.796579360961914, + "step": 30810 + }, + { + "epoch": 1.84, + "learning_rate": 1.958033101881887e-06, + "logits/chosen": -2.4941623210906982, + "logits/rejected": -1.8007481098175049, + "logps/chosen": -661.3892822265625, + "logps/rejected": -2142.53466796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.935400485992432, + "rewards/margins": 15.050521850585938, + "rewards/rejected": -20.98592185974121, + "step": 30820 + }, + { + "epoch": 1.84, + "learning_rate": 1.9563399075379365e-06, + "logits/chosen": -2.564429759979248, + "logits/rejected": -1.8502830266952515, + "logps/chosen": -701.9244995117188, + "logps/rejected": -2057.5927734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.263919830322266, + "rewards/margins": 13.87939167022705, + "rewards/rejected": -20.143312454223633, + "step": 30830 + }, + { + "epoch": 1.84, + "learning_rate": 1.9546469749119485e-06, + "logits/chosen": -2.5090115070343018, + "logits/rejected": -1.8583223819732666, + "logps/chosen": -709.5564575195312, + "logps/rejected": -2068.35009765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.380459785461426, + "rewards/margins": 13.847427368164062, + "rewards/rejected": -20.227886199951172, + "step": 30840 + }, + { + "epoch": 1.84, + "learning_rate": 1.9529543048189e-06, + "logits/chosen": -2.480201482772827, + "logits/rejected": -1.6616621017456055, + "logps/chosen": -676.2493286132812, + "logps/rejected": -2200.480224609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.045130729675293, + "rewards/margins": 15.510441780090332, + "rewards/rejected": -21.555574417114258, + "step": 30850 + }, + { + "epoch": 1.84, + "learning_rate": 1.951261898073641e-06, + "logits/chosen": -2.5684285163879395, + "logits/rejected": -1.7868413925170898, + "logps/chosen": -692.74169921875, + "logps/rejected": -2276.072998046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.136641502380371, + "rewards/margins": 16.184669494628906, + "rewards/rejected": -22.321311950683594, + "step": 30860 + }, + { + "epoch": 1.84, + "learning_rate": 1.9495697554908984e-06, + "logits/chosen": -2.483872652053833, + "logits/rejected": -1.7188985347747803, + "logps/chosen": -683.6192016601562, + "logps/rejected": -2158.836669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.143277168273926, + "rewards/margins": 15.016424179077148, + "rewards/rejected": -21.15970230102539, + "step": 30870 + }, + { + "epoch": 1.84, + "learning_rate": 1.9478778778852673e-06, + "logits/chosen": -2.5207576751708984, + "logits/rejected": -1.8742425441741943, + "logps/chosen": -685.945556640625, + "logps/rejected": -2149.15478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.188384532928467, + "rewards/margins": 14.8589448928833, + "rewards/rejected": -21.04732894897461, + "step": 30880 + }, + { + "epoch": 1.84, + "learning_rate": 1.94618626607122e-06, + "logits/chosen": -2.5021743774414062, + "logits/rejected": -1.7742938995361328, + "logps/chosen": -693.8001098632812, + "logps/rejected": -2159.08349609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.236659049987793, + "rewards/margins": 14.9153413772583, + "rewards/rejected": -21.152002334594727, + "step": 30890 + }, + { + "epoch": 1.84, + "learning_rate": 1.944494920863096e-06, + "logits/chosen": -2.5017223358154297, + "logits/rejected": -1.835161566734314, + "logps/chosen": -688.8543090820312, + "logps/rejected": -2102.2333984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.179194450378418, + "rewards/margins": 14.404296875, + "rewards/rejected": -20.583492279052734, + "step": 30900 + }, + { + "epoch": 1.84, + "learning_rate": 1.9428038430751106e-06, + "logits/chosen": -2.500476360321045, + "logits/rejected": -1.7117197513580322, + "logps/chosen": -708.8024291992188, + "logps/rejected": -2169.0166015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.364490509033203, + "rewards/margins": 14.872967720031738, + "rewards/rejected": -21.237459182739258, + "step": 30910 + }, + { + "epoch": 1.84, + "learning_rate": 1.941113033521348e-06, + "logits/chosen": -2.52113676071167, + "logits/rejected": -1.7321796417236328, + "logps/chosen": -676.2507934570312, + "logps/rejected": -2160.03369140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.076651573181152, + "rewards/margins": 15.079612731933594, + "rewards/rejected": -21.15626335144043, + "step": 30920 + }, + { + "epoch": 1.84, + "learning_rate": 1.939422493015764e-06, + "logits/chosen": -2.4934935569763184, + "logits/rejected": -1.8019936084747314, + "logps/chosen": -688.4459228515625, + "logps/rejected": -2131.365478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.104569435119629, + "rewards/margins": 14.761604309082031, + "rewards/rejected": -20.866172790527344, + "step": 30930 + }, + { + "epoch": 1.84, + "learning_rate": 1.937732222372185e-06, + "logits/chosen": -2.471561908721924, + "logits/rejected": -1.689531922340393, + "logps/chosen": -696.9876708984375, + "logps/rejected": -2111.809326171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.275643825531006, + "rewards/margins": 14.383956909179688, + "rewards/rejected": -20.65960121154785, + "step": 30940 + }, + { + "epoch": 1.85, + "learning_rate": 1.9360422224043064e-06, + "logits/chosen": -2.535163402557373, + "logits/rejected": -1.8635609149932861, + "logps/chosen": -688.34521484375, + "logps/rejected": -2191.267578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.094837188720703, + "rewards/margins": 15.377575874328613, + "rewards/rejected": -21.472414016723633, + "step": 30950 + }, + { + "epoch": 1.85, + "learning_rate": 1.934352493925695e-06, + "logits/chosen": -2.5161192417144775, + "logits/rejected": -1.833030104637146, + "logps/chosen": -677.23828125, + "logps/rejected": -2083.7900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0879998207092285, + "rewards/margins": 14.294763565063477, + "rewards/rejected": -20.382762908935547, + "step": 30960 + }, + { + "epoch": 1.85, + "learning_rate": 1.9326630377497853e-06, + "logits/chosen": -2.5450549125671387, + "logits/rejected": -1.908182144165039, + "logps/chosen": -703.4261474609375, + "logps/rejected": -2123.438232421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3547892570495605, + "rewards/margins": 14.447549819946289, + "rewards/rejected": -20.80234146118164, + "step": 30970 + }, + { + "epoch": 1.85, + "learning_rate": 1.9309738546898834e-06, + "logits/chosen": -2.524942398071289, + "logits/rejected": -1.7579253911972046, + "logps/chosen": -678.0557250976562, + "logps/rejected": -2135.07763671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.022312641143799, + "rewards/margins": 14.899858474731445, + "rewards/rejected": -20.922168731689453, + "step": 30980 + }, + { + "epoch": 1.85, + "learning_rate": 1.929284945559159e-06, + "logits/chosen": -2.5446410179138184, + "logits/rejected": -1.7916219234466553, + "logps/chosen": -686.7945556640625, + "logps/rejected": -2122.09423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.229310035705566, + "rewards/margins": 14.552820205688477, + "rewards/rejected": -20.78213119506836, + "step": 30990 + }, + { + "epoch": 1.85, + "learning_rate": 1.927596311170656e-06, + "logits/chosen": -2.545921564102173, + "logits/rejected": -1.8777726888656616, + "logps/chosen": -701.6646728515625, + "logps/rejected": -2042.813720703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.287980079650879, + "rewards/margins": 13.700979232788086, + "rewards/rejected": -19.98896026611328, + "step": 31000 + }, + { + "epoch": 1.85, + "learning_rate": 1.9259079523372797e-06, + "logits/chosen": -2.496063709259033, + "logits/rejected": -1.7944501638412476, + "logps/chosen": -694.91943359375, + "logps/rejected": -2184.881103515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.229601860046387, + "rewards/margins": 15.180376052856445, + "rewards/rejected": -21.40997886657715, + "step": 31010 + }, + { + "epoch": 1.85, + "learning_rate": 1.9242198698718096e-06, + "logits/chosen": -2.4527130126953125, + "logits/rejected": -1.7353107929229736, + "logps/chosen": -700.1677856445312, + "logps/rejected": -2042.5009765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.353127479553223, + "rewards/margins": 13.644274711608887, + "rewards/rejected": -19.99740219116211, + "step": 31020 + }, + { + "epoch": 1.85, + "learning_rate": 1.9225320645868863e-06, + "logits/chosen": -2.4753825664520264, + "logits/rejected": -1.7466514110565186, + "logps/chosen": -682.7391357421875, + "logps/rejected": -2033.3333740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1667585372924805, + "rewards/margins": 13.722883224487305, + "rewards/rejected": -19.8896427154541, + "step": 31030 + }, + { + "epoch": 1.85, + "learning_rate": 1.920844537295019e-06, + "logits/chosen": -2.605437755584717, + "logits/rejected": -1.7202638387680054, + "logps/chosen": -677.1472778320312, + "logps/rejected": -2215.7939453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.057864189147949, + "rewards/margins": 15.666290283203125, + "rewards/rejected": -21.72415542602539, + "step": 31040 + }, + { + "epoch": 1.85, + "learning_rate": 1.919157288808585e-06, + "logits/chosen": -2.4907145500183105, + "logits/rejected": -1.7716214656829834, + "logps/chosen": -689.0984497070312, + "logps/rejected": -1980.776123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.181301116943359, + "rewards/margins": 13.188665390014648, + "rewards/rejected": -19.36996841430664, + "step": 31050 + }, + { + "epoch": 1.85, + "learning_rate": 1.9174703199398236e-06, + "logits/chosen": -2.5448944568634033, + "logits/rejected": -1.8907047510147095, + "logps/chosen": -697.0626831054688, + "logps/rejected": -2135.900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.218104362487793, + "rewards/margins": 14.69911003112793, + "rewards/rejected": -20.91721534729004, + "step": 31060 + }, + { + "epoch": 1.85, + "learning_rate": 1.915783631500844e-06, + "logits/chosen": -2.5057356357574463, + "logits/rejected": -1.7635536193847656, + "logps/chosen": -691.7662353515625, + "logps/rejected": -2203.884765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2487592697143555, + "rewards/margins": 15.349077224731445, + "rewards/rejected": -21.597835540771484, + "step": 31070 + }, + { + "epoch": 1.85, + "learning_rate": 1.914097224303616e-06, + "logits/chosen": -2.5406346321105957, + "logits/rejected": -1.8216546773910522, + "logps/chosen": -673.1266479492188, + "logps/rejected": -2152.4150390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.010494709014893, + "rewards/margins": 15.071405410766602, + "rewards/rejected": -21.081899642944336, + "step": 31080 + }, + { + "epoch": 1.85, + "learning_rate": 1.9124110991599772e-06, + "logits/chosen": -2.4800944328308105, + "logits/rejected": -1.8115699291229248, + "logps/chosen": -689.0078125, + "logps/rejected": -2139.882080078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.17397403717041, + "rewards/margins": 14.782180786132812, + "rewards/rejected": -20.95615577697754, + "step": 31090 + }, + { + "epoch": 1.85, + "learning_rate": 1.910725256881627e-06, + "logits/chosen": -2.5424304008483887, + "logits/rejected": -1.6623547077178955, + "logps/chosen": -697.5823974609375, + "logps/rejected": -2183.3740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.308192253112793, + "rewards/margins": 15.086204528808594, + "rewards/rejected": -21.394399642944336, + "step": 31100 + }, + { + "epoch": 1.86, + "learning_rate": 1.9090396982801317e-06, + "logits/chosen": -2.5418248176574707, + "logits/rejected": -1.8680903911590576, + "logps/chosen": -686.7454833984375, + "logps/rejected": -2144.09521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.10386323928833, + "rewards/margins": 14.881739616394043, + "rewards/rejected": -20.985605239868164, + "step": 31110 + }, + { + "epoch": 1.86, + "learning_rate": 1.907354424166918e-06, + "logits/chosen": -2.5299010276794434, + "logits/rejected": -1.7380311489105225, + "logps/chosen": -696.7957763671875, + "logps/rejected": -2183.6572265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2194132804870605, + "rewards/margins": 15.170977592468262, + "rewards/rejected": -21.390392303466797, + "step": 31120 + }, + { + "epoch": 1.86, + "learning_rate": 1.9056694353532763e-06, + "logits/chosen": -2.519501209259033, + "logits/rejected": -1.8048938512802124, + "logps/chosen": -676.9010620117188, + "logps/rejected": -2075.01513671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.100472450256348, + "rewards/margins": 14.226542472839355, + "rewards/rejected": -20.327014923095703, + "step": 31130 + }, + { + "epoch": 1.86, + "learning_rate": 1.9039847326503608e-06, + "logits/chosen": -2.531761407852173, + "logits/rejected": -1.8367475271224976, + "logps/chosen": -702.1506958007812, + "logps/rejected": -2137.8818359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.338726997375488, + "rewards/margins": 14.59221363067627, + "rewards/rejected": -20.930936813354492, + "step": 31140 + }, + { + "epoch": 1.86, + "learning_rate": 1.9023003168691878e-06, + "logits/chosen": -2.4891769886016846, + "logits/rejected": -1.8542454242706299, + "logps/chosen": -715.0604248046875, + "logps/rejected": -2143.311767578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4552812576293945, + "rewards/margins": 14.528729438781738, + "rewards/rejected": -20.984010696411133, + "step": 31150 + }, + { + "epoch": 1.86, + "learning_rate": 1.9006161888206342e-06, + "logits/chosen": -2.5105433464050293, + "logits/rejected": -1.7512954473495483, + "logps/chosen": -671.3070678710938, + "logps/rejected": -2033.0970458984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.020388603210449, + "rewards/margins": 13.890055656433105, + "rewards/rejected": -19.910444259643555, + "step": 31160 + }, + { + "epoch": 1.86, + "learning_rate": 1.8989323493154402e-06, + "logits/chosen": -2.5304348468780518, + "logits/rejected": -1.807080864906311, + "logps/chosen": -682.9288330078125, + "logps/rejected": -2209.181396484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.117784023284912, + "rewards/margins": 15.538670539855957, + "rewards/rejected": -21.65645408630371, + "step": 31170 + }, + { + "epoch": 1.86, + "learning_rate": 1.8972487991642053e-06, + "logits/chosen": -2.5681748390197754, + "logits/rejected": -1.8368475437164307, + "logps/chosen": -683.5762329101562, + "logps/rejected": -2212.142333984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.11474609375, + "rewards/margins": 15.572723388671875, + "rewards/rejected": -21.687469482421875, + "step": 31180 + }, + { + "epoch": 1.86, + "learning_rate": 1.8955655391773909e-06, + "logits/chosen": -2.5306694507598877, + "logits/rejected": -1.81454336643219, + "logps/chosen": -713.8809204101562, + "logps/rejected": -2068.742919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.396508693695068, + "rewards/margins": 13.858485221862793, + "rewards/rejected": -20.254993438720703, + "step": 31190 + }, + { + "epoch": 1.86, + "learning_rate": 1.893882570165318e-06, + "logits/chosen": -2.5122172832489014, + "logits/rejected": -1.8186931610107422, + "logps/chosen": -682.0884399414062, + "logps/rejected": -2204.095703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.182159423828125, + "rewards/margins": 15.409128189086914, + "rewards/rejected": -21.591283798217773, + "step": 31200 + }, + { + "epoch": 1.86, + "learning_rate": 1.8921998929381687e-06, + "logits/chosen": -2.4623119831085205, + "logits/rejected": -1.7325674295425415, + "logps/chosen": -677.292236328125, + "logps/rejected": -2165.65966796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.062191009521484, + "rewards/margins": 15.136846542358398, + "rewards/rejected": -21.19903564453125, + "step": 31210 + }, + { + "epoch": 1.86, + "learning_rate": 1.8905175083059823e-06, + "logits/chosen": -2.464149236679077, + "logits/rejected": -1.713039755821228, + "logps/chosen": -674.3656005859375, + "logps/rejected": -2095.21240234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0966997146606445, + "rewards/margins": 14.417970657348633, + "rewards/rejected": -20.51466941833496, + "step": 31220 + }, + { + "epoch": 1.86, + "learning_rate": 1.8888354170786604e-06, + "logits/chosen": -2.49920916557312, + "logits/rejected": -1.7222614288330078, + "logps/chosen": -662.5889282226562, + "logps/rejected": -2087.01708984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.910475254058838, + "rewards/margins": 14.52624797821045, + "rewards/rejected": -20.436725616455078, + "step": 31230 + }, + { + "epoch": 1.86, + "learning_rate": 1.8871536200659602e-06, + "logits/chosen": -2.5206427574157715, + "logits/rejected": -1.713749885559082, + "logps/chosen": -692.6257934570312, + "logps/rejected": -2123.4990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.206472873687744, + "rewards/margins": 14.577298164367676, + "rewards/rejected": -20.783769607543945, + "step": 31240 + }, + { + "epoch": 1.86, + "learning_rate": 1.8854721180775004e-06, + "logits/chosen": -2.5304932594299316, + "logits/rejected": -1.7457555532455444, + "logps/chosen": -671.2598876953125, + "logps/rejected": -2056.395263671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.026214122772217, + "rewards/margins": 14.101995468139648, + "rewards/rejected": -20.128210067749023, + "step": 31250 + }, + { + "epoch": 1.86, + "learning_rate": 1.8837909119227541e-06, + "logits/chosen": -2.5136940479278564, + "logits/rejected": -1.7527433633804321, + "logps/chosen": -666.2811279296875, + "logps/rejected": -2046.9635009765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.987043857574463, + "rewards/margins": 14.037958145141602, + "rewards/rejected": -20.024999618530273, + "step": 31260 + }, + { + "epoch": 1.86, + "learning_rate": 1.8821100024110558e-06, + "logits/chosen": -2.5423097610473633, + "logits/rejected": -1.9489820003509521, + "logps/chosen": -688.7305908203125, + "logps/rejected": -2035.93359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.112746715545654, + "rewards/margins": 13.807968139648438, + "rewards/rejected": -19.920713424682617, + "step": 31270 + }, + { + "epoch": 1.87, + "learning_rate": 1.8804293903515936e-06, + "logits/chosen": -2.4793665409088135, + "logits/rejected": -1.6864736080169678, + "logps/chosen": -682.4479370117188, + "logps/rejected": -2094.85498046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.122793197631836, + "rewards/margins": 14.389448165893555, + "rewards/rejected": -20.51224136352539, + "step": 31280 + }, + { + "epoch": 1.87, + "learning_rate": 1.878749076553416e-06, + "logits/chosen": -2.5446934700012207, + "logits/rejected": -1.6953926086425781, + "logps/chosen": -680.7077026367188, + "logps/rejected": -2193.812255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.073094367980957, + "rewards/margins": 15.409672737121582, + "rewards/rejected": -21.482769012451172, + "step": 31290 + }, + { + "epoch": 1.87, + "learning_rate": 1.877069061825425e-06, + "logits/chosen": -2.5018112659454346, + "logits/rejected": -1.755645990371704, + "logps/chosen": -678.7916870117188, + "logps/rejected": -2190.89013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1557488441467285, + "rewards/margins": 15.299423217773438, + "rewards/rejected": -21.455171585083008, + "step": 31300 + }, + { + "epoch": 1.87, + "learning_rate": 1.8753893469763787e-06, + "logits/chosen": -2.5526022911071777, + "logits/rejected": -1.8024393320083618, + "logps/chosen": -666.6770629882812, + "logps/rejected": -2175.875244140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.948794364929199, + "rewards/margins": 15.359210014343262, + "rewards/rejected": -21.30800437927246, + "step": 31310 + }, + { + "epoch": 1.87, + "learning_rate": 1.873709932814894e-06, + "logits/chosen": -2.448986291885376, + "logits/rejected": -1.7614939212799072, + "logps/chosen": -689.67919921875, + "logps/rejected": -2081.463623046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.233992099761963, + "rewards/margins": 14.148656845092773, + "rewards/rejected": -20.382648468017578, + "step": 31320 + }, + { + "epoch": 1.87, + "learning_rate": 1.872030820149438e-06, + "logits/chosen": -2.5436248779296875, + "logits/rejected": -1.8705294132232666, + "logps/chosen": -697.1156005859375, + "logps/rejected": -2155.70947265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.270598411560059, + "rewards/margins": 14.837313652038574, + "rewards/rejected": -21.107913970947266, + "step": 31330 + }, + { + "epoch": 1.87, + "learning_rate": 1.8703520097883389e-06, + "logits/chosen": -2.540030002593994, + "logits/rejected": -1.856001853942871, + "logps/chosen": -684.9134521484375, + "logps/rejected": -2265.817626953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1566162109375, + "rewards/margins": 16.049705505371094, + "rewards/rejected": -22.206323623657227, + "step": 31340 + }, + { + "epoch": 1.87, + "learning_rate": 1.8686735025397728e-06, + "logits/chosen": -2.515058755874634, + "logits/rejected": -1.706970453262329, + "logps/chosen": -669.0771484375, + "logps/rejected": -2202.29150390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.97584342956543, + "rewards/margins": 15.602005004882812, + "rewards/rejected": -21.577848434448242, + "step": 31350 + }, + { + "epoch": 1.87, + "learning_rate": 1.8669952992117757e-06, + "logits/chosen": -2.5357937812805176, + "logits/rejected": -1.7721319198608398, + "logps/chosen": -685.1337280273438, + "logps/rejected": -2041.840576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.135471343994141, + "rewards/margins": 13.824328422546387, + "rewards/rejected": -19.959802627563477, + "step": 31360 + }, + { + "epoch": 1.87, + "learning_rate": 1.8653174006122326e-06, + "logits/chosen": -2.550778865814209, + "logits/rejected": -1.8431848287582397, + "logps/chosen": -714.8507690429688, + "logps/rejected": -2116.12548828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.432515621185303, + "rewards/margins": 14.3006010055542, + "rewards/rejected": -20.733118057250977, + "step": 31370 + }, + { + "epoch": 1.87, + "learning_rate": 1.8636398075488857e-06, + "logits/chosen": -2.4992308616638184, + "logits/rejected": -1.7804759740829468, + "logps/chosen": -671.846435546875, + "logps/rejected": -2199.35400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.996369361877441, + "rewards/margins": 15.550605773925781, + "rewards/rejected": -21.54697608947754, + "step": 31380 + }, + { + "epoch": 1.87, + "learning_rate": 1.861962520829327e-06, + "logits/chosen": -2.5421600341796875, + "logits/rejected": -1.7893133163452148, + "logps/chosen": -676.3184814453125, + "logps/rejected": -2113.68505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.056149482727051, + "rewards/margins": 14.630647659301758, + "rewards/rejected": -20.68679428100586, + "step": 31390 + }, + { + "epoch": 1.87, + "learning_rate": 1.8602855412610022e-06, + "logits/chosen": -2.5056726932525635, + "logits/rejected": -1.7653312683105469, + "logps/chosen": -676.7332763671875, + "logps/rejected": -2157.246337890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.043990135192871, + "rewards/margins": 15.08549976348877, + "rewards/rejected": -21.12948989868164, + "step": 31400 + }, + { + "epoch": 1.87, + "learning_rate": 1.8586088696512101e-06, + "logits/chosen": -2.5485360622406006, + "logits/rejected": -1.7221044301986694, + "logps/chosen": -673.9915771484375, + "logps/rejected": -2104.90087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.007688045501709, + "rewards/margins": 14.593629837036133, + "rewards/rejected": -20.601318359375, + "step": 31410 + }, + { + "epoch": 1.87, + "learning_rate": 1.8569325068070995e-06, + "logits/chosen": -2.5531985759735107, + "logits/rejected": -1.836411476135254, + "logps/chosen": -665.7742309570312, + "logps/rejected": -2020.3402099609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.897523403167725, + "rewards/margins": 13.874470710754395, + "rewards/rejected": -19.77199363708496, + "step": 31420 + }, + { + "epoch": 1.87, + "learning_rate": 1.855256453535672e-06, + "logits/chosen": -2.5488297939300537, + "logits/rejected": -1.879568099975586, + "logps/chosen": -684.2841186523438, + "logps/rejected": -2092.036376953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.126728534698486, + "rewards/margins": 14.3504638671875, + "rewards/rejected": -20.477191925048828, + "step": 31430 + }, + { + "epoch": 1.87, + "learning_rate": 1.85358071064378e-06, + "logits/chosen": -2.4970650672912598, + "logits/rejected": -1.7233877182006836, + "logps/chosen": -674.5545043945312, + "logps/rejected": -2106.13671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0469489097595215, + "rewards/margins": 14.563199043273926, + "rewards/rejected": -20.610149383544922, + "step": 31440 + }, + { + "epoch": 1.88, + "learning_rate": 1.8519052789381253e-06, + "logits/chosen": -2.4929592609405518, + "logits/rejected": -1.8153314590454102, + "logps/chosen": -662.1588745117188, + "logps/rejected": -2075.784912109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.881753921508789, + "rewards/margins": 14.425065994262695, + "rewards/rejected": -20.306819915771484, + "step": 31450 + }, + { + "epoch": 1.88, + "learning_rate": 1.8502301592252613e-06, + "logits/chosen": -2.550358295440674, + "logits/rejected": -1.9110345840454102, + "logps/chosen": -669.0233154296875, + "logps/rejected": -2104.50439453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.005460262298584, + "rewards/margins": 14.593460083007812, + "rewards/rejected": -20.598922729492188, + "step": 31460 + }, + { + "epoch": 1.88, + "learning_rate": 1.8485553523115902e-06, + "logits/chosen": -2.535945415496826, + "logits/rejected": -1.9019775390625, + "logps/chosen": -666.5314331054688, + "logps/rejected": -2183.655029296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.994488716125488, + "rewards/margins": 15.394360542297363, + "rewards/rejected": -21.38884925842285, + "step": 31470 + }, + { + "epoch": 1.88, + "learning_rate": 1.8468808590033648e-06, + "logits/chosen": -2.5135371685028076, + "logits/rejected": -1.815284013748169, + "logps/chosen": -660.520751953125, + "logps/rejected": -2077.6484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.864614963531494, + "rewards/margins": 14.475217819213867, + "rewards/rejected": -20.339832305908203, + "step": 31480 + }, + { + "epoch": 1.88, + "learning_rate": 1.8452066801066842e-06, + "logits/chosen": -2.536487579345703, + "logits/rejected": -1.8389122486114502, + "logps/chosen": -666.0745849609375, + "logps/rejected": -2156.514404296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.998308181762695, + "rewards/margins": 15.116806030273438, + "rewards/rejected": -21.115114212036133, + "step": 31490 + }, + { + "epoch": 1.88, + "learning_rate": 1.8435328164275007e-06, + "logits/chosen": -2.5505480766296387, + "logits/rejected": -1.807960867881775, + "logps/chosen": -687.2302856445312, + "logps/rejected": -2174.63134765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.086352348327637, + "rewards/margins": 15.223737716674805, + "rewards/rejected": -21.310091018676758, + "step": 31500 + }, + { + "epoch": 1.88, + "learning_rate": 1.8418592687716103e-06, + "logits/chosen": -2.4813716411590576, + "logits/rejected": -1.7491848468780518, + "logps/chosen": -671.69140625, + "logps/rejected": -2130.692138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.059653282165527, + "rewards/margins": 14.800076484680176, + "rewards/rejected": -20.859729766845703, + "step": 31510 + }, + { + "epoch": 1.88, + "learning_rate": 1.84018603794466e-06, + "logits/chosen": -2.5021824836730957, + "logits/rejected": -1.7522392272949219, + "logps/chosen": -660.7980346679688, + "logps/rejected": -2134.374267578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.926379680633545, + "rewards/margins": 14.979364395141602, + "rewards/rejected": -20.90574073791504, + "step": 31520 + }, + { + "epoch": 1.88, + "learning_rate": 1.838513124752142e-06, + "logits/chosen": -2.553065538406372, + "logits/rejected": -1.8561311960220337, + "logps/chosen": -660.3841552734375, + "logps/rejected": -2137.541748046875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.916483402252197, + "rewards/margins": 15.0096435546875, + "rewards/rejected": -20.926128387451172, + "step": 31530 + }, + { + "epoch": 1.88, + "learning_rate": 1.836840529999398e-06, + "logits/chosen": -2.509430408477783, + "logits/rejected": -1.7701047658920288, + "logps/chosen": -673.068603515625, + "logps/rejected": -2084.97705078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.031548976898193, + "rewards/margins": 14.373799324035645, + "rewards/rejected": -20.405345916748047, + "step": 31540 + }, + { + "epoch": 1.88, + "learning_rate": 1.8351682544916135e-06, + "logits/chosen": -2.5347399711608887, + "logits/rejected": -1.8062559366226196, + "logps/chosen": -660.0123291015625, + "logps/rejected": -2160.91552734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.861255645751953, + "rewards/margins": 15.30058479309082, + "rewards/rejected": -21.16183853149414, + "step": 31550 + }, + { + "epoch": 1.88, + "learning_rate": 1.833496299033824e-06, + "logits/chosen": -2.5240554809570312, + "logits/rejected": -1.9079163074493408, + "logps/chosen": -678.8167724609375, + "logps/rejected": -2025.2965087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.08726167678833, + "rewards/margins": 13.729780197143555, + "rewards/rejected": -19.817041397094727, + "step": 31560 + }, + { + "epoch": 1.88, + "learning_rate": 1.8318246644309073e-06, + "logits/chosen": -2.5138323307037354, + "logits/rejected": -1.7587093114852905, + "logps/chosen": -677.0155029296875, + "logps/rejected": -2113.38037109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.073958873748779, + "rewards/margins": 14.617398262023926, + "rewards/rejected": -20.69135856628418, + "step": 31570 + }, + { + "epoch": 1.88, + "learning_rate": 1.8301533514875879e-06, + "logits/chosen": -2.5279042720794678, + "logits/rejected": -1.776003122329712, + "logps/chosen": -680.0172119140625, + "logps/rejected": -2075.169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0727949142456055, + "rewards/margins": 14.234553337097168, + "rewards/rejected": -20.307348251342773, + "step": 31580 + }, + { + "epoch": 1.88, + "learning_rate": 1.8284823610084375e-06, + "logits/chosen": -2.5220322608947754, + "logits/rejected": -1.7465803623199463, + "logps/chosen": -688.75634765625, + "logps/rejected": -2085.20751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.202115058898926, + "rewards/margins": 14.214263916015625, + "rewards/rejected": -20.416378021240234, + "step": 31590 + }, + { + "epoch": 1.88, + "learning_rate": 1.8268116937978692e-06, + "logits/chosen": -2.5341200828552246, + "logits/rejected": -1.854112982749939, + "logps/chosen": -681.9093017578125, + "logps/rejected": -2083.123779296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.18228816986084, + "rewards/margins": 14.202433586120605, + "rewards/rejected": -20.384719848632812, + "step": 31600 + }, + { + "epoch": 1.88, + "learning_rate": 1.8251413506601434e-06, + "logits/chosen": -2.5106453895568848, + "logits/rejected": -1.7568156719207764, + "logps/chosen": -679.7854614257812, + "logps/rejected": -2033.389892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0983452796936035, + "rewards/margins": 13.808639526367188, + "rewards/rejected": -19.906984329223633, + "step": 31610 + }, + { + "epoch": 1.89, + "learning_rate": 1.8234713323993622e-06, + "logits/chosen": -2.506343126296997, + "logits/rejected": -1.8089675903320312, + "logps/chosen": -686.0180053710938, + "logps/rejected": -2030.353759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.24291467666626, + "rewards/margins": 13.64179515838623, + "rewards/rejected": -19.884708404541016, + "step": 31620 + }, + { + "epoch": 1.89, + "learning_rate": 1.8218016398194739e-06, + "logits/chosen": -2.5177459716796875, + "logits/rejected": -1.6815439462661743, + "logps/chosen": -660.2144775390625, + "logps/rejected": -2086.366455078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.918868064880371, + "rewards/margins": 14.50586223602295, + "rewards/rejected": -20.424728393554688, + "step": 31630 + }, + { + "epoch": 1.89, + "learning_rate": 1.8201322737242663e-06, + "logits/chosen": -2.564689874649048, + "logits/rejected": -1.9311374425888062, + "logps/chosen": -698.2105712890625, + "logps/rejected": -2160.59814453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.303228855133057, + "rewards/margins": 14.855209350585938, + "rewards/rejected": -21.158437728881836, + "step": 31640 + }, + { + "epoch": 1.89, + "learning_rate": 1.8184632349173747e-06, + "logits/chosen": -2.4459011554718018, + "logits/rejected": -1.7371448278427124, + "logps/chosen": -661.6908569335938, + "logps/rejected": -2054.84326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.891894817352295, + "rewards/margins": 14.198354721069336, + "rewards/rejected": -20.090248107910156, + "step": 31650 + }, + { + "epoch": 1.89, + "learning_rate": 1.8167945242022725e-06, + "logits/chosen": -2.5200066566467285, + "logits/rejected": -1.7457606792449951, + "logps/chosen": -682.5364379882812, + "logps/rejected": -2194.509765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.161208152770996, + "rewards/margins": 15.343893051147461, + "rewards/rejected": -21.505102157592773, + "step": 31660 + }, + { + "epoch": 1.89, + "learning_rate": 1.8151261423822776e-06, + "logits/chosen": -2.499648094177246, + "logits/rejected": -1.8017492294311523, + "logps/chosen": -673.1357421875, + "logps/rejected": -2148.84521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.057549476623535, + "rewards/margins": 14.98967456817627, + "rewards/rejected": -21.047222137451172, + "step": 31670 + }, + { + "epoch": 1.89, + "learning_rate": 1.8134580902605491e-06, + "logits/chosen": -2.5080151557922363, + "logits/rejected": -1.7245975732803345, + "logps/chosen": -684.85986328125, + "logps/rejected": -2066.88525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.122130393981934, + "rewards/margins": 14.098329544067383, + "rewards/rejected": -20.220458984375, + "step": 31680 + }, + { + "epoch": 1.89, + "learning_rate": 1.8117903686400867e-06, + "logits/chosen": -2.536485433578491, + "logits/rejected": -1.8337390422821045, + "logps/chosen": -658.4653930664062, + "logps/rejected": -2103.345458984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.86602783203125, + "rewards/margins": 14.732902526855469, + "rewards/rejected": -20.59893226623535, + "step": 31690 + }, + { + "epoch": 1.89, + "learning_rate": 1.8101229783237328e-06, + "logits/chosen": -2.5452287197113037, + "logits/rejected": -1.879186987876892, + "logps/chosen": -656.0563354492188, + "logps/rejected": -2091.373291015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.840323448181152, + "rewards/margins": 14.631396293640137, + "rewards/rejected": -20.471721649169922, + "step": 31700 + }, + { + "epoch": 1.89, + "learning_rate": 1.8084559201141677e-06, + "logits/chosen": -2.5546507835388184, + "logits/rejected": -1.8744655847549438, + "logps/chosen": -668.6863403320312, + "logps/rejected": -2151.26025390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.952971458435059, + "rewards/margins": 15.114642143249512, + "rewards/rejected": -21.06761360168457, + "step": 31710 + }, + { + "epoch": 1.89, + "learning_rate": 1.806789194813915e-06, + "logits/chosen": -2.5454039573669434, + "logits/rejected": -1.8027263879776, + "logps/chosen": -675.0631713867188, + "logps/rejected": -2139.406005859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.022980690002441, + "rewards/margins": 14.913396835327148, + "rewards/rejected": -20.936376571655273, + "step": 31720 + }, + { + "epoch": 1.89, + "learning_rate": 1.8051228032253346e-06, + "logits/chosen": -2.5241518020629883, + "logits/rejected": -1.730176568031311, + "logps/chosen": -670.4571533203125, + "logps/rejected": -2134.217529296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.023379325866699, + "rewards/margins": 14.880056381225586, + "rewards/rejected": -20.903432846069336, + "step": 31730 + }, + { + "epoch": 1.89, + "learning_rate": 1.803456746150627e-06, + "logits/chosen": -2.5443334579467773, + "logits/rejected": -1.8476883172988892, + "logps/chosen": -680.4545288085938, + "logps/rejected": -2151.609130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.128125190734863, + "rewards/margins": 14.926773071289062, + "rewards/rejected": -21.05489730834961, + "step": 31740 + }, + { + "epoch": 1.89, + "learning_rate": 1.8017910243918342e-06, + "logits/chosen": -2.5680091381073, + "logits/rejected": -1.8287990093231201, + "logps/chosen": -662.6675415039062, + "logps/rejected": -2118.635009765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.940465927124023, + "rewards/margins": 14.80407428741455, + "rewards/rejected": -20.744537353515625, + "step": 31750 + }, + { + "epoch": 1.89, + "learning_rate": 1.800125638750832e-06, + "logits/chosen": -2.546257495880127, + "logits/rejected": -1.8021694421768188, + "logps/chosen": -674.2554931640625, + "logps/rejected": -2101.64892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.052178382873535, + "rewards/margins": 14.520358085632324, + "rewards/rejected": -20.57253646850586, + "step": 31760 + }, + { + "epoch": 1.89, + "learning_rate": 1.7984605900293395e-06, + "logits/chosen": -2.566011905670166, + "logits/rejected": -1.8569812774658203, + "logps/chosen": -673.67822265625, + "logps/rejected": -2109.995361328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.023867607116699, + "rewards/margins": 14.644096374511719, + "rewards/rejected": -20.667964935302734, + "step": 31770 + }, + { + "epoch": 1.9, + "learning_rate": 1.796795879028909e-06, + "logits/chosen": -2.5348942279815674, + "logits/rejected": -1.7687547206878662, + "logps/chosen": -662.6175537109375, + "logps/rejected": -2081.605712890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.927577972412109, + "rewards/margins": 14.43957233428955, + "rewards/rejected": -20.36715316772461, + "step": 31780 + }, + { + "epoch": 1.9, + "learning_rate": 1.7951315065509345e-06, + "logits/chosen": -2.5791218280792236, + "logits/rejected": -1.906713843345642, + "logps/chosen": -657.8322143554688, + "logps/rejected": -2112.3212890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8902974128723145, + "rewards/margins": 14.794517517089844, + "rewards/rejected": -20.684814453125, + "step": 31790 + }, + { + "epoch": 1.9, + "learning_rate": 1.7934674733966426e-06, + "logits/chosen": -2.517305850982666, + "logits/rejected": -1.8339431285858154, + "logps/chosen": -673.0100708007812, + "logps/rejected": -2007.3134765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.959797382354736, + "rewards/margins": 13.662997245788574, + "rewards/rejected": -19.62279510498047, + "step": 31800 + }, + { + "epoch": 1.9, + "learning_rate": 1.7918037803671012e-06, + "logits/chosen": -2.518643856048584, + "logits/rejected": -1.8142063617706299, + "logps/chosen": -668.955078125, + "logps/rejected": -2087.5, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.032654762268066, + "rewards/margins": 14.386624336242676, + "rewards/rejected": -20.41927719116211, + "step": 31810 + }, + { + "epoch": 1.9, + "learning_rate": 1.7901404282632108e-06, + "logits/chosen": -2.5242021083831787, + "logits/rejected": -1.7666393518447876, + "logps/chosen": -692.866943359375, + "logps/rejected": -2011.9459228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.222815990447998, + "rewards/margins": 13.469955444335938, + "rewards/rejected": -19.692771911621094, + "step": 31820 + }, + { + "epoch": 1.9, + "learning_rate": 1.7884774178857079e-06, + "logits/chosen": -2.5261178016662598, + "logits/rejected": -1.813174843788147, + "logps/chosen": -675.0730590820312, + "logps/rejected": -2174.090087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.102143287658691, + "rewards/margins": 15.1966552734375, + "rewards/rejected": -21.298797607421875, + "step": 31830 + }, + { + "epoch": 1.9, + "learning_rate": 1.7868147500351685e-06, + "logits/chosen": -2.540581226348877, + "logits/rejected": -1.8308451175689697, + "logps/chosen": -669.1474609375, + "logps/rejected": -2103.177734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.950408935546875, + "rewards/margins": 14.639341354370117, + "rewards/rejected": -20.589752197265625, + "step": 31840 + }, + { + "epoch": 1.9, + "learning_rate": 1.785152425511998e-06, + "logits/chosen": -2.553300619125366, + "logits/rejected": -1.7531518936157227, + "logps/chosen": -661.5137939453125, + "logps/rejected": -2079.744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.899259090423584, + "rewards/margins": 14.449076652526855, + "rewards/rejected": -20.34833526611328, + "step": 31850 + }, + { + "epoch": 1.9, + "learning_rate": 1.7834904451164417e-06, + "logits/chosen": -2.5476317405700684, + "logits/rejected": -1.7376571893692017, + "logps/chosen": -668.0157470703125, + "logps/rejected": -2096.0146484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.987858772277832, + "rewards/margins": 14.53645133972168, + "rewards/rejected": -20.524311065673828, + "step": 31860 + }, + { + "epoch": 1.9, + "learning_rate": 1.781828809648575e-06, + "logits/chosen": -2.526134967803955, + "logits/rejected": -1.881959319114685, + "logps/chosen": -662.486083984375, + "logps/rejected": -2099.132080078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.890093803405762, + "rewards/margins": 14.671844482421875, + "rewards/rejected": -20.56193733215332, + "step": 31870 + }, + { + "epoch": 1.9, + "learning_rate": 1.7801675199083113e-06, + "logits/chosen": -2.570955276489258, + "logits/rejected": -1.7847425937652588, + "logps/chosen": -676.6566772460938, + "logps/rejected": -2075.31591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.100907325744629, + "rewards/margins": 14.19285774230957, + "rewards/rejected": -20.293766021728516, + "step": 31880 + }, + { + "epoch": 1.9, + "learning_rate": 1.7785065766953932e-06, + "logits/chosen": -2.5313260555267334, + "logits/rejected": -1.759222388267517, + "logps/chosen": -678.8016967773438, + "logps/rejected": -2012.3245849609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.073349952697754, + "rewards/margins": 13.60699462890625, + "rewards/rejected": -19.68034553527832, + "step": 31890 + }, + { + "epoch": 1.9, + "learning_rate": 1.776845980809401e-06, + "logits/chosen": -2.4923341274261475, + "logits/rejected": -1.7050409317016602, + "logps/chosen": -684.4141845703125, + "logps/rejected": -2090.27685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.061863899230957, + "rewards/margins": 14.388888359069824, + "rewards/rejected": -20.45075035095215, + "step": 31900 + }, + { + "epoch": 1.9, + "learning_rate": 1.7751857330497441e-06, + "logits/chosen": -2.4764223098754883, + "logits/rejected": -1.6287758350372314, + "logps/chosen": -685.6068115234375, + "logps/rejected": -2063.49755859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.16448450088501, + "rewards/margins": 14.024807929992676, + "rewards/rejected": -20.189292907714844, + "step": 31910 + }, + { + "epoch": 1.9, + "learning_rate": 1.7735258342156653e-06, + "logits/chosen": -2.50052547454834, + "logits/rejected": -1.724302053451538, + "logps/chosen": -678.4322509765625, + "logps/rejected": -2039.4833984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.127143383026123, + "rewards/margins": 13.835949897766113, + "rewards/rejected": -19.963092803955078, + "step": 31920 + }, + { + "epoch": 1.9, + "learning_rate": 1.7718662851062416e-06, + "logits/chosen": -2.53113055229187, + "logits/rejected": -1.7889807224273682, + "logps/chosen": -669.7510375976562, + "logps/rejected": -2102.246337890625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.961824417114258, + "rewards/margins": 14.612472534179688, + "rewards/rejected": -20.57429313659668, + "step": 31930 + }, + { + "epoch": 1.9, + "learning_rate": 1.770207086520378e-06, + "logits/chosen": -2.542186737060547, + "logits/rejected": -1.9387693405151367, + "logps/chosen": -664.8095703125, + "logps/rejected": -2145.18017578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.886063575744629, + "rewards/margins": 15.111764907836914, + "rewards/rejected": -20.997825622558594, + "step": 31940 + }, + { + "epoch": 1.91, + "learning_rate": 1.768548239256815e-06, + "logits/chosen": -2.51055645942688, + "logits/rejected": -1.806775689125061, + "logps/chosen": -670.0594482421875, + "logps/rejected": -2048.6552734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.996561050415039, + "rewards/margins": 14.041154861450195, + "rewards/rejected": -20.037715911865234, + "step": 31950 + }, + { + "epoch": 1.91, + "learning_rate": 1.7668897441141187e-06, + "logits/chosen": -2.479149580001831, + "logits/rejected": -1.8539613485336304, + "logps/chosen": -661.8604125976562, + "logps/rejected": -2088.698486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.90842342376709, + "rewards/margins": 14.555425643920898, + "rewards/rejected": -20.463848114013672, + "step": 31960 + }, + { + "epoch": 1.91, + "learning_rate": 1.7652316018906914e-06, + "logits/chosen": -2.5234737396240234, + "logits/rejected": -1.8936519622802734, + "logps/chosen": -669.1131591796875, + "logps/rejected": -2137.931884765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0092973709106445, + "rewards/margins": 14.929888725280762, + "rewards/rejected": -20.939184188842773, + "step": 31970 + }, + { + "epoch": 1.91, + "learning_rate": 1.7635738133847608e-06, + "logits/chosen": -2.5243968963623047, + "logits/rejected": -1.7838189601898193, + "logps/chosen": -658.866455078125, + "logps/rejected": -2091.79052734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8837385177612305, + "rewards/margins": 14.587514877319336, + "rewards/rejected": -20.471254348754883, + "step": 31980 + }, + { + "epoch": 1.91, + "learning_rate": 1.7619163793943881e-06, + "logits/chosen": -2.5353589057922363, + "logits/rejected": -1.8106300830841064, + "logps/chosen": -660.5498657226562, + "logps/rejected": -2157.89208984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8619489669799805, + "rewards/margins": 15.272668838500977, + "rewards/rejected": -21.134618759155273, + "step": 31990 + }, + { + "epoch": 1.91, + "learning_rate": 1.7602593007174607e-06, + "logits/chosen": -2.481417179107666, + "logits/rejected": -1.7679088115692139, + "logps/chosen": -643.7301025390625, + "logps/rejected": -2081.82470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.765902042388916, + "rewards/margins": 14.617280960083008, + "rewards/rejected": -20.383182525634766, + "step": 32000 + }, + { + "epoch": 1.91, + "learning_rate": 1.7586025781516958e-06, + "logits/chosen": -2.5431551933288574, + "logits/rejected": -1.822715401649475, + "logps/chosen": -666.392333984375, + "logps/rejected": -2087.40283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9534196853637695, + "rewards/margins": 14.4821138381958, + "rewards/rejected": -20.43553352355957, + "step": 32010 + }, + { + "epoch": 1.91, + "learning_rate": 1.756946212494641e-06, + "logits/chosen": -2.5757317543029785, + "logits/rejected": -1.8546695709228516, + "logps/chosen": -678.2274169921875, + "logps/rejected": -2065.518310546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.07566499710083, + "rewards/margins": 14.146066665649414, + "rewards/rejected": -20.221731185913086, + "step": 32020 + }, + { + "epoch": 1.91, + "learning_rate": 1.7552902045436694e-06, + "logits/chosen": -2.5533394813537598, + "logits/rejected": -1.8443511724472046, + "logps/chosen": -654.71142578125, + "logps/rejected": -2050.023681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.894805908203125, + "rewards/margins": 14.143171310424805, + "rewards/rejected": -20.037979125976562, + "step": 32030 + }, + { + "epoch": 1.91, + "learning_rate": 1.7536345550959844e-06, + "logits/chosen": -2.519038438796997, + "logits/rejected": -1.8104108572006226, + "logps/chosen": -655.6290283203125, + "logps/rejected": -2016.708740234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.830054759979248, + "rewards/margins": 13.894088745117188, + "rewards/rejected": -19.72414207458496, + "step": 32040 + }, + { + "epoch": 1.91, + "learning_rate": 1.7519792649486143e-06, + "logits/chosen": -2.5561673641204834, + "logits/rejected": -1.9582977294921875, + "logps/chosen": -660.6058959960938, + "logps/rejected": -2059.12841796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.896145820617676, + "rewards/margins": 14.248781204223633, + "rewards/rejected": -20.14492416381836, + "step": 32050 + }, + { + "epoch": 1.91, + "learning_rate": 1.7503243348984173e-06, + "logits/chosen": -2.5944647789001465, + "logits/rejected": -1.9781516790390015, + "logps/chosen": -636.4100341796875, + "logps/rejected": -1985.648681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.666269779205322, + "rewards/margins": 13.749176025390625, + "rewards/rejected": -19.415447235107422, + "step": 32060 + }, + { + "epoch": 1.91, + "learning_rate": 1.7486697657420752e-06, + "logits/chosen": -2.5599992275238037, + "logits/rejected": -1.903822898864746, + "logps/chosen": -636.9549560546875, + "logps/rejected": -2067.44189453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.704294204711914, + "rewards/margins": 14.526333808898926, + "rewards/rejected": -20.230628967285156, + "step": 32070 + }, + { + "epoch": 1.91, + "learning_rate": 1.7470155582760991e-06, + "logits/chosen": -2.5335302352905273, + "logits/rejected": -1.8735716342926025, + "logps/chosen": -652.973876953125, + "logps/rejected": -2017.224365234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.768768787384033, + "rewards/margins": 13.953198432922363, + "rewards/rejected": -19.721965789794922, + "step": 32080 + }, + { + "epoch": 1.91, + "learning_rate": 1.7453617132968236e-06, + "logits/chosen": -2.535254955291748, + "logits/rejected": -1.7438052892684937, + "logps/chosen": -639.5533447265625, + "logps/rejected": -2028.537841796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.665746212005615, + "rewards/margins": 14.167078018188477, + "rewards/rejected": -19.83282470703125, + "step": 32090 + }, + { + "epoch": 1.91, + "learning_rate": 1.743708231600409e-06, + "logits/chosen": -2.5481367111206055, + "logits/rejected": -1.9324228763580322, + "logps/chosen": -657.883544921875, + "logps/rejected": -2037.4423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8749542236328125, + "rewards/margins": 14.045539855957031, + "rewards/rejected": -19.920494079589844, + "step": 32100 + }, + { + "epoch": 1.91, + "learning_rate": 1.742055113982843e-06, + "logits/chosen": -2.586078405380249, + "logits/rejected": -1.8053712844848633, + "logps/chosen": -643.3622436523438, + "logps/rejected": -2117.08837890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.73300313949585, + "rewards/margins": 14.989480972290039, + "rewards/rejected": -20.722484588623047, + "step": 32110 + }, + { + "epoch": 1.92, + "learning_rate": 1.7404023612399342e-06, + "logits/chosen": -2.54138445854187, + "logits/rejected": -1.8598817586898804, + "logps/chosen": -656.8760375976562, + "logps/rejected": -2025.406005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.906852722167969, + "rewards/margins": 13.895299911499023, + "rewards/rejected": -19.802154541015625, + "step": 32120 + }, + { + "epoch": 1.92, + "learning_rate": 1.7387499741673197e-06, + "logits/chosen": -2.565333366394043, + "logits/rejected": -1.8942527770996094, + "logps/chosen": -656.6160888671875, + "logps/rejected": -2122.15087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.84365177154541, + "rewards/margins": 14.926797866821289, + "rewards/rejected": -20.770450592041016, + "step": 32130 + }, + { + "epoch": 1.92, + "learning_rate": 1.737097953560458e-06, + "logits/chosen": -2.535109758377075, + "logits/rejected": -1.786389708518982, + "logps/chosen": -645.2025756835938, + "logps/rejected": -2114.05517578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.742848873138428, + "rewards/margins": 14.958826065063477, + "rewards/rejected": -20.701675415039062, + "step": 32140 + }, + { + "epoch": 1.92, + "learning_rate": 1.7354463002146315e-06, + "logits/chosen": -2.5481410026550293, + "logits/rejected": -1.7213897705078125, + "logps/chosen": -646.656005859375, + "logps/rejected": -2044.9619140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.760340213775635, + "rewards/margins": 14.248620986938477, + "rewards/rejected": -20.008960723876953, + "step": 32150 + }, + { + "epoch": 1.92, + "learning_rate": 1.7337950149249466e-06, + "logits/chosen": -2.4899237155914307, + "logits/rejected": -1.8075931072235107, + "logps/chosen": -654.4920654296875, + "logps/rejected": -2005.845458984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.845491409301758, + "rewards/margins": 13.77690601348877, + "rewards/rejected": -19.62239646911621, + "step": 32160 + }, + { + "epoch": 1.92, + "learning_rate": 1.7321440984863314e-06, + "logits/chosen": -2.491978883743286, + "logits/rejected": -1.646166205406189, + "logps/chosen": -648.9852905273438, + "logps/rejected": -2109.7861328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.755383491516113, + "rewards/margins": 14.896509170532227, + "rewards/rejected": -20.65189552307129, + "step": 32170 + }, + { + "epoch": 1.92, + "learning_rate": 1.7304935516935383e-06, + "logits/chosen": -2.551699161529541, + "logits/rejected": -1.9215768575668335, + "logps/chosen": -667.86767578125, + "logps/rejected": -2061.136474609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.92161226272583, + "rewards/margins": 14.231854438781738, + "rewards/rejected": -20.15346908569336, + "step": 32180 + }, + { + "epoch": 1.92, + "learning_rate": 1.7288433753411383e-06, + "logits/chosen": -2.5372366905212402, + "logits/rejected": -1.662827491760254, + "logps/chosen": -646.6094360351562, + "logps/rejected": -2056.845947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.796603202819824, + "rewards/margins": 14.33043098449707, + "rewards/rejected": -20.127033233642578, + "step": 32190 + }, + { + "epoch": 1.92, + "learning_rate": 1.7271935702235287e-06, + "logits/chosen": -2.5230915546417236, + "logits/rejected": -1.7939298152923584, + "logps/chosen": -659.7490844726562, + "logps/rejected": -2115.1640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.937402248382568, + "rewards/margins": 14.77979564666748, + "rewards/rejected": -20.717199325561523, + "step": 32200 + }, + { + "epoch": 1.92, + "learning_rate": 1.7255441371349236e-06, + "logits/chosen": -2.5197319984436035, + "logits/rejected": -1.81357741355896, + "logps/chosen": -646.5714111328125, + "logps/rejected": -2074.39599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.758805751800537, + "rewards/margins": 14.535199165344238, + "rewards/rejected": -20.29400634765625, + "step": 32210 + }, + { + "epoch": 1.92, + "learning_rate": 1.7238950768693619e-06, + "logits/chosen": -2.5158214569091797, + "logits/rejected": -1.8443915843963623, + "logps/chosen": -641.7559814453125, + "logps/rejected": -2093.5068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.69727087020874, + "rewards/margins": 14.804391860961914, + "rewards/rejected": -20.501663208007812, + "step": 32220 + }, + { + "epoch": 1.92, + "learning_rate": 1.7222463902206992e-06, + "logits/chosen": -2.5304908752441406, + "logits/rejected": -1.6997102499008179, + "logps/chosen": -664.2147827148438, + "logps/rejected": -2029.811279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.859271049499512, + "rewards/margins": 13.998420715332031, + "rewards/rejected": -19.857690811157227, + "step": 32230 + }, + { + "epoch": 1.92, + "learning_rate": 1.7205980779826156e-06, + "logits/chosen": -2.541954278945923, + "logits/rejected": -1.906732201576233, + "logps/chosen": -647.3178100585938, + "logps/rejected": -2094.77197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.759263515472412, + "rewards/margins": 14.734540939331055, + "rewards/rejected": -20.493804931640625, + "step": 32240 + }, + { + "epoch": 1.92, + "learning_rate": 1.7189501409486061e-06, + "logits/chosen": -2.4848577976226807, + "logits/rejected": -1.7666680812835693, + "logps/chosen": -640.1151123046875, + "logps/rejected": -2116.4931640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.747053146362305, + "rewards/margins": 14.97404670715332, + "rewards/rejected": -20.721099853515625, + "step": 32250 + }, + { + "epoch": 1.92, + "learning_rate": 1.71730257991199e-06, + "logits/chosen": -2.5356040000915527, + "logits/rejected": -1.8909698724746704, + "logps/chosen": -664.4620361328125, + "logps/rejected": -2077.678955078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.89597225189209, + "rewards/margins": 14.454401016235352, + "rewards/rejected": -20.350372314453125, + "step": 32260 + }, + { + "epoch": 1.92, + "learning_rate": 1.7156553956659018e-06, + "logits/chosen": -2.5241379737854004, + "logits/rejected": -1.8051462173461914, + "logps/chosen": -638.140869140625, + "logps/rejected": -2091.45263671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.668332576751709, + "rewards/margins": 14.801078796386719, + "rewards/rejected": -20.469411849975586, + "step": 32270 + }, + { + "epoch": 1.92, + "learning_rate": 1.7140085890032951e-06, + "logits/chosen": -2.511251211166382, + "logits/rejected": -1.6937830448150635, + "logps/chosen": -674.2169799804688, + "logps/rejected": -1970.757568359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.999487400054932, + "rewards/margins": 13.267766952514648, + "rewards/rejected": -19.267253875732422, + "step": 32280 + }, + { + "epoch": 1.93, + "learning_rate": 1.7123621607169446e-06, + "logits/chosen": -2.5487351417541504, + "logits/rejected": -1.9510395526885986, + "logps/chosen": -664.3482666015625, + "logps/rejected": -2123.8271484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.923382759094238, + "rewards/margins": 14.87732982635498, + "rewards/rejected": -20.80071449279785, + "step": 32290 + }, + { + "epoch": 1.93, + "learning_rate": 1.7107161115994392e-06, + "logits/chosen": -2.53560209274292, + "logits/rejected": -1.834191918373108, + "logps/chosen": -671.7197265625, + "logps/rejected": -2110.908447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.062888145446777, + "rewards/margins": 14.60772705078125, + "rewards/rejected": -20.670612335205078, + "step": 32300 + }, + { + "epoch": 1.93, + "learning_rate": 1.7090704424431882e-06, + "logits/chosen": -2.5597567558288574, + "logits/rejected": -1.8905704021453857, + "logps/chosen": -659.1177978515625, + "logps/rejected": -2132.777587890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.879720687866211, + "rewards/margins": 14.996617317199707, + "rewards/rejected": -20.876338958740234, + "step": 32310 + }, + { + "epoch": 1.93, + "learning_rate": 1.7074251540404153e-06, + "logits/chosen": -2.5724518299102783, + "logits/rejected": -1.771118402481079, + "logps/chosen": -668.5458374023438, + "logps/rejected": -2045.6224365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.977845191955566, + "rewards/margins": 14.04401683807373, + "rewards/rejected": -20.021862030029297, + "step": 32320 + }, + { + "epoch": 1.93, + "learning_rate": 1.7057802471831642e-06, + "logits/chosen": -2.5180623531341553, + "logits/rejected": -1.8645305633544922, + "logps/chosen": -667.75732421875, + "logps/rejected": -2143.339111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.943737030029297, + "rewards/margins": 15.045519828796387, + "rewards/rejected": -20.9892578125, + "step": 32330 + }, + { + "epoch": 1.93, + "learning_rate": 1.704135722663291e-06, + "logits/chosen": -2.569342851638794, + "logits/rejected": -1.8387629985809326, + "logps/chosen": -663.3777465820312, + "logps/rejected": -2131.79931640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.949474811553955, + "rewards/margins": 14.92173957824707, + "rewards/rejected": -20.871212005615234, + "step": 32340 + }, + { + "epoch": 1.93, + "learning_rate": 1.7024915812724715e-06, + "logits/chosen": -2.5511679649353027, + "logits/rejected": -1.8958740234375, + "logps/chosen": -685.5296020507812, + "logps/rejected": -2096.15087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.134346961975098, + "rewards/margins": 14.397573471069336, + "rewards/rejected": -20.53192138671875, + "step": 32350 + }, + { + "epoch": 1.93, + "learning_rate": 1.7008478238021942e-06, + "logits/chosen": -2.511876344680786, + "logits/rejected": -1.8628482818603516, + "logps/chosen": -661.837158203125, + "logps/rejected": -2086.867431640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.922863483428955, + "rewards/margins": 14.500848770141602, + "rewards/rejected": -20.423709869384766, + "step": 32360 + }, + { + "epoch": 1.93, + "learning_rate": 1.6992044510437644e-06, + "logits/chosen": -2.5264315605163574, + "logits/rejected": -1.8471641540527344, + "logps/chosen": -655.3131103515625, + "logps/rejected": -2076.95703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.815554141998291, + "rewards/margins": 14.498041152954102, + "rewards/rejected": -20.313594818115234, + "step": 32370 + }, + { + "epoch": 1.93, + "learning_rate": 1.697561463788302e-06, + "logits/chosen": -2.5946760177612305, + "logits/rejected": -1.9206202030181885, + "logps/chosen": -672.3184814453125, + "logps/rejected": -2076.010498046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.018553733825684, + "rewards/margins": 14.30915355682373, + "rewards/rejected": -20.327709197998047, + "step": 32380 + }, + { + "epoch": 1.93, + "learning_rate": 1.6959188628267403e-06, + "logits/chosen": -2.5528178215026855, + "logits/rejected": -1.9353179931640625, + "logps/chosen": -652.8182373046875, + "logps/rejected": -2068.733154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.757674217224121, + "rewards/margins": 14.48071002960205, + "rewards/rejected": -20.238384246826172, + "step": 32390 + }, + { + "epoch": 1.93, + "learning_rate": 1.6942766489498278e-06, + "logits/chosen": -2.552666187286377, + "logits/rejected": -1.8626493215560913, + "logps/chosen": -661.7618408203125, + "logps/rejected": -2125.241455078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.871382713317871, + "rewards/margins": 14.918573379516602, + "rewards/rejected": -20.789958953857422, + "step": 32400 + }, + { + "epoch": 1.93, + "learning_rate": 1.6926348229481263e-06, + "logits/chosen": -2.5433292388916016, + "logits/rejected": -1.836554765701294, + "logps/chosen": -655.2925415039062, + "logps/rejected": -2070.915283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.848995685577393, + "rewards/margins": 14.420710563659668, + "rewards/rejected": -20.26970672607422, + "step": 32410 + }, + { + "epoch": 1.93, + "learning_rate": 1.6909933856120106e-06, + "logits/chosen": -2.573272943496704, + "logits/rejected": -1.7623640298843384, + "logps/chosen": -661.7720947265625, + "logps/rejected": -2155.148681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.918957233428955, + "rewards/margins": 15.191699028015137, + "rewards/rejected": -21.110654830932617, + "step": 32420 + }, + { + "epoch": 1.93, + "learning_rate": 1.689352337731669e-06, + "logits/chosen": -2.594900608062744, + "logits/rejected": -1.8911349773406982, + "logps/chosen": -665.4561767578125, + "logps/rejected": -2084.130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.948300361633301, + "rewards/margins": 14.449983596801758, + "rewards/rejected": -20.398283004760742, + "step": 32430 + }, + { + "epoch": 1.93, + "learning_rate": 1.6877116800971017e-06, + "logits/chosen": -2.5028598308563232, + "logits/rejected": -1.7215893268585205, + "logps/chosen": -669.3375244140625, + "logps/rejected": -2153.32275390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.963995456695557, + "rewards/margins": 15.1131591796875, + "rewards/rejected": -21.0771541595459, + "step": 32440 + }, + { + "epoch": 1.94, + "learning_rate": 1.6860714134981214e-06, + "logits/chosen": -2.5039453506469727, + "logits/rejected": -1.7631428241729736, + "logps/chosen": -677.50732421875, + "logps/rejected": -2049.57568359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.141085624694824, + "rewards/margins": 13.916786193847656, + "rewards/rejected": -20.057870864868164, + "step": 32450 + }, + { + "epoch": 1.94, + "learning_rate": 1.6844315387243514e-06, + "logits/chosen": -2.5803475379943848, + "logits/rejected": -1.9443743228912354, + "logps/chosen": -660.4259033203125, + "logps/rejected": -2068.665771484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.931310176849365, + "rewards/margins": 14.316081047058105, + "rewards/rejected": -20.247392654418945, + "step": 32460 + }, + { + "epoch": 1.94, + "learning_rate": 1.6827920565652295e-06, + "logits/chosen": -2.513798952102661, + "logits/rejected": -1.720107078552246, + "logps/chosen": -632.1182861328125, + "logps/rejected": -2035.3531494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.714432716369629, + "rewards/margins": 14.212579727172852, + "rewards/rejected": -19.92701530456543, + "step": 32470 + }, + { + "epoch": 1.94, + "learning_rate": 1.6811529678099997e-06, + "logits/chosen": -2.5719833374023438, + "logits/rejected": -1.9504988193511963, + "logps/chosen": -664.9909057617188, + "logps/rejected": -2095.1982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.91483211517334, + "rewards/margins": 14.589508056640625, + "rewards/rejected": -20.50433921813965, + "step": 32480 + }, + { + "epoch": 1.94, + "learning_rate": 1.6795142732477222e-06, + "logits/chosen": -2.5754265785217285, + "logits/rejected": -1.8988683223724365, + "logps/chosen": -681.3566284179688, + "logps/rejected": -2216.044189453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.103714942932129, + "rewards/margins": 15.61766529083252, + "rewards/rejected": -21.721378326416016, + "step": 32490 + }, + { + "epoch": 1.94, + "learning_rate": 1.6778759736672617e-06, + "logits/chosen": -2.586102247238159, + "logits/rejected": -2.029588222503662, + "logps/chosen": -678.9683837890625, + "logps/rejected": -2080.483154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.035078048706055, + "rewards/margins": 14.333707809448242, + "rewards/rejected": -20.36878776550293, + "step": 32500 + }, + { + "epoch": 1.94, + "learning_rate": 1.6762380698572982e-06, + "logits/chosen": -2.5415239334106445, + "logits/rejected": -1.8594348430633545, + "logps/chosen": -667.5462036132812, + "logps/rejected": -2093.434326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9452948570251465, + "rewards/margins": 14.541857719421387, + "rewards/rejected": -20.487154006958008, + "step": 32510 + }, + { + "epoch": 1.94, + "learning_rate": 1.6746005626063163e-06, + "logits/chosen": -2.5231893062591553, + "logits/rejected": -1.8070186376571655, + "logps/chosen": -674.9584350585938, + "logps/rejected": -2191.93994140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.009574890136719, + "rewards/margins": 15.452740669250488, + "rewards/rejected": -21.462318420410156, + "step": 32520 + }, + { + "epoch": 1.94, + "learning_rate": 1.6729634527026141e-06, + "logits/chosen": -2.5171937942504883, + "logits/rejected": -1.8285858631134033, + "logps/chosen": -679.4744873046875, + "logps/rejected": -2047.8658447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.149387359619141, + "rewards/margins": 13.884664535522461, + "rewards/rejected": -20.034048080444336, + "step": 32530 + }, + { + "epoch": 1.94, + "learning_rate": 1.6713267409342953e-06, + "logits/chosen": -2.507965326309204, + "logits/rejected": -1.8232309818267822, + "logps/chosen": -658.6285400390625, + "logps/rejected": -1999.5035400390625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.913818359375, + "rewards/margins": 13.64130687713623, + "rewards/rejected": -19.555126190185547, + "step": 32540 + }, + { + "epoch": 1.94, + "learning_rate": 1.6696904280892716e-06, + "logits/chosen": -2.5353381633758545, + "logits/rejected": -1.779229760169983, + "logps/chosen": -664.3138427734375, + "logps/rejected": -2066.937255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.929254055023193, + "rewards/margins": 14.296585083007812, + "rewards/rejected": -20.225841522216797, + "step": 32550 + }, + { + "epoch": 1.94, + "learning_rate": 1.6680545149552667e-06, + "logits/chosen": -2.5498595237731934, + "logits/rejected": -1.858630895614624, + "logps/chosen": -667.5797119140625, + "logps/rejected": -2127.98291015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.018681526184082, + "rewards/margins": 14.818349838256836, + "rewards/rejected": -20.837032318115234, + "step": 32560 + }, + { + "epoch": 1.94, + "learning_rate": 1.6664190023198063e-06, + "logits/chosen": -2.512878894805908, + "logits/rejected": -1.8569624423980713, + "logps/chosen": -673.6962890625, + "logps/rejected": -2111.104736328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.060622692108154, + "rewards/margins": 14.599248886108398, + "rewards/rejected": -20.659870147705078, + "step": 32570 + }, + { + "epoch": 1.94, + "learning_rate": 1.6647838909702287e-06, + "logits/chosen": -2.5344181060791016, + "logits/rejected": -1.9074556827545166, + "logps/chosen": -659.1807861328125, + "logps/rejected": -2127.56298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.863527774810791, + "rewards/margins": 14.972516059875488, + "rewards/rejected": -20.836040496826172, + "step": 32580 + }, + { + "epoch": 1.94, + "learning_rate": 1.6631491816936743e-06, + "logits/chosen": -2.5164642333984375, + "logits/rejected": -1.7937148809432983, + "logps/chosen": -666.7679443359375, + "logps/rejected": -2124.22998046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.976696968078613, + "rewards/margins": 14.810791015625, + "rewards/rejected": -20.78748893737793, + "step": 32590 + }, + { + "epoch": 1.94, + "learning_rate": 1.6615148752770943e-06, + "logits/chosen": -2.5266518592834473, + "logits/rejected": -1.809695839881897, + "logps/chosen": -677.8360595703125, + "logps/rejected": -2000.7568359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0446648597717285, + "rewards/margins": 13.529828071594238, + "rewards/rejected": -19.57448959350586, + "step": 32600 + }, + { + "epoch": 1.94, + "learning_rate": 1.6598809725072412e-06, + "logits/chosen": -2.5289251804351807, + "logits/rejected": -1.9111812114715576, + "logps/chosen": -676.8175659179688, + "logps/rejected": -2082.097900390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.090604305267334, + "rewards/margins": 14.282754898071289, + "rewards/rejected": -20.37335777282715, + "step": 32610 + }, + { + "epoch": 1.95, + "learning_rate": 1.658247474170679e-06, + "logits/chosen": -2.4667553901672363, + "logits/rejected": -1.8385871648788452, + "logps/chosen": -707.1740112304688, + "logps/rejected": -1983.615234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.382631778717041, + "rewards/margins": 13.013821601867676, + "rewards/rejected": -19.396451950073242, + "step": 32620 + }, + { + "epoch": 1.95, + "learning_rate": 1.6566143810537708e-06, + "logits/chosen": -2.550952434539795, + "logits/rejected": -1.792313814163208, + "logps/chosen": -716.0574951171875, + "logps/rejected": -2084.405029296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4283013343811035, + "rewards/margins": 13.98701286315918, + "rewards/rejected": -20.415313720703125, + "step": 32630 + }, + { + "epoch": 1.95, + "learning_rate": 1.6549816939426888e-06, + "logits/chosen": -2.5846755504608154, + "logits/rejected": -1.892551064491272, + "logps/chosen": -712.4031372070312, + "logps/rejected": -2052.03466796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.376733303070068, + "rewards/margins": 13.700843811035156, + "rewards/rejected": -20.077573776245117, + "step": 32640 + }, + { + "epoch": 1.95, + "learning_rate": 1.6533494136234092e-06, + "logits/chosen": -2.553999185562134, + "logits/rejected": -1.9374173879623413, + "logps/chosen": -697.361083984375, + "logps/rejected": -2064.83740234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.288210868835449, + "rewards/margins": 13.919116020202637, + "rewards/rejected": -20.207326889038086, + "step": 32650 + }, + { + "epoch": 1.95, + "learning_rate": 1.6517175408817104e-06, + "logits/chosen": -2.5409793853759766, + "logits/rejected": -1.9879001379013062, + "logps/chosen": -701.799072265625, + "logps/rejected": -2128.025390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.260313510894775, + "rewards/margins": 14.589065551757812, + "rewards/rejected": -20.849380493164062, + "step": 32660 + }, + { + "epoch": 1.95, + "learning_rate": 1.6500860765031767e-06, + "logits/chosen": -2.531003475189209, + "logits/rejected": -1.8217452764511108, + "logps/chosen": -680.5028076171875, + "logps/rejected": -2128.3046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.123017311096191, + "rewards/margins": 14.714909553527832, + "rewards/rejected": -20.837926864624023, + "step": 32670 + }, + { + "epoch": 1.95, + "learning_rate": 1.6484550212731954e-06, + "logits/chosen": -2.515815496444702, + "logits/rejected": -1.7599939107894897, + "logps/chosen": -690.750732421875, + "logps/rejected": -2111.5830078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2024946212768555, + "rewards/margins": 14.474082946777344, + "rewards/rejected": -20.67657470703125, + "step": 32680 + }, + { + "epoch": 1.95, + "learning_rate": 1.6468243759769558e-06, + "logits/chosen": -2.5522115230560303, + "logits/rejected": -1.849648118019104, + "logps/chosen": -688.19140625, + "logps/rejected": -2077.13525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.200822353363037, + "rewards/margins": 14.126078605651855, + "rewards/rejected": -20.326900482177734, + "step": 32690 + }, + { + "epoch": 1.95, + "learning_rate": 1.64519414139945e-06, + "logits/chosen": -2.5281119346618652, + "logits/rejected": -1.7163368463516235, + "logps/chosen": -668.7896728515625, + "logps/rejected": -2086.60107421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.989457130432129, + "rewards/margins": 14.440106391906738, + "rewards/rejected": -20.4295654296875, + "step": 32700 + }, + { + "epoch": 1.95, + "learning_rate": 1.643564318325475e-06, + "logits/chosen": -2.5686967372894287, + "logits/rejected": -1.796938180923462, + "logps/chosen": -701.7684326171875, + "logps/rejected": -2136.37109375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.344437599182129, + "rewards/margins": 14.556474685668945, + "rewards/rejected": -20.900909423828125, + "step": 32710 + }, + { + "epoch": 1.95, + "learning_rate": 1.6419349075396256e-06, + "logits/chosen": -2.474684238433838, + "logits/rejected": -1.7782037258148193, + "logps/chosen": -817.6812744140625, + "logps/rejected": -2182.968505859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4982171058654785, + "rewards/margins": 13.899930953979492, + "rewards/rejected": -21.39814567565918, + "step": 32720 + }, + { + "epoch": 1.95, + "learning_rate": 1.6403059098263003e-06, + "logits/chosen": -2.41338849067688, + "logits/rejected": -1.543988585472107, + "logps/chosen": -830.3816528320312, + "logps/rejected": -2245.02197265625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.651853084564209, + "rewards/margins": 14.35450267791748, + "rewards/rejected": -22.006357192993164, + "step": 32730 + }, + { + "epoch": 1.95, + "learning_rate": 1.6386773259697e-06, + "logits/chosen": -2.475435733795166, + "logits/rejected": -1.611682653427124, + "logps/chosen": -822.2548828125, + "logps/rejected": -2213.848876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.505457401275635, + "rewards/margins": 14.194453239440918, + "rewards/rejected": -21.699909210205078, + "step": 32740 + }, + { + "epoch": 1.95, + "learning_rate": 1.637049156753823e-06, + "logits/chosen": -2.4885244369506836, + "logits/rejected": -1.7263309955596924, + "logps/chosen": -856.2490234375, + "logps/rejected": -2307.5908203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.852208614349365, + "rewards/margins": 14.77551555633545, + "rewards/rejected": -22.62772560119629, + "step": 32750 + }, + { + "epoch": 1.95, + "learning_rate": 1.6354214029624719e-06, + "logits/chosen": -2.503196954727173, + "logits/rejected": -1.7975873947143555, + "logps/chosen": -855.0782470703125, + "logps/rejected": -2229.72412109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.803177833557129, + "rewards/margins": 14.059305191040039, + "rewards/rejected": -21.862483978271484, + "step": 32760 + }, + { + "epoch": 1.95, + "learning_rate": 1.6337940653792453e-06, + "logits/chosen": -2.4755940437316895, + "logits/rejected": -1.5864580869674683, + "logps/chosen": -842.8184814453125, + "logps/rejected": -2173.258056640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.696218967437744, + "rewards/margins": 13.592948913574219, + "rewards/rejected": -21.289169311523438, + "step": 32770 + }, + { + "epoch": 1.95, + "learning_rate": 1.6321671447875457e-06, + "logits/chosen": -2.5576515197753906, + "logits/rejected": -1.6722275018692017, + "logps/chosen": -810.0064086914062, + "logps/rejected": -2199.50341796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.393337249755859, + "rewards/margins": 14.147448539733887, + "rewards/rejected": -21.540786743164062, + "step": 32780 + }, + { + "epoch": 1.96, + "learning_rate": 1.6305406419705704e-06, + "logits/chosen": -2.4672584533691406, + "logits/rejected": -1.6834590435028076, + "logps/chosen": -820.8098754882812, + "logps/rejected": -2147.927734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.554598331451416, + "rewards/margins": 13.480618476867676, + "rewards/rejected": -21.03521728515625, + "step": 32790 + }, + { + "epoch": 1.96, + "learning_rate": 1.6289145577113202e-06, + "logits/chosen": -2.49239444732666, + "logits/rejected": -1.6275615692138672, + "logps/chosen": -846.1553955078125, + "logps/rejected": -2190.255859375, + "loss": 0.0006, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.759880065917969, + "rewards/margins": 13.70372200012207, + "rewards/rejected": -21.46360206604004, + "step": 32800 + }, + { + "epoch": 1.96, + "learning_rate": 1.6272888927925906e-06, + "logits/chosen": -2.5159404277801514, + "logits/rejected": -1.6923046112060547, + "logps/chosen": -827.9220581054688, + "logps/rejected": -2189.346435546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5849738121032715, + "rewards/margins": 13.870832443237305, + "rewards/rejected": -21.455806732177734, + "step": 32810 + }, + { + "epoch": 1.96, + "learning_rate": 1.6256636479969757e-06, + "logits/chosen": -2.4815495014190674, + "logits/rejected": -1.7334344387054443, + "logps/chosen": -846.0925903320312, + "logps/rejected": -2206.338623046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.751832485198975, + "rewards/margins": 13.86897087097168, + "rewards/rejected": -21.620805740356445, + "step": 32820 + }, + { + "epoch": 1.96, + "learning_rate": 1.624038824106871e-06, + "logits/chosen": -2.4810712337493896, + "logits/rejected": -1.8205972909927368, + "logps/chosen": -860.0430908203125, + "logps/rejected": -2240.38232421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.890679836273193, + "rewards/margins": 14.0693941116333, + "rewards/rejected": -21.960073471069336, + "step": 32830 + }, + { + "epoch": 1.96, + "learning_rate": 1.6224144219044637e-06, + "logits/chosen": -2.487806797027588, + "logits/rejected": -1.7294352054595947, + "logps/chosen": -805.0546264648438, + "logps/rejected": -2179.93603515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.397875785827637, + "rewards/margins": 13.967000007629395, + "rewards/rejected": -21.36487579345703, + "step": 32840 + }, + { + "epoch": 1.96, + "learning_rate": 1.6207904421717438e-06, + "logits/chosen": -2.468306064605713, + "logits/rejected": -1.6308501958847046, + "logps/chosen": -885.3369140625, + "logps/rejected": -2235.701171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.12895393371582, + "rewards/margins": 13.782766342163086, + "rewards/rejected": -21.91172218322754, + "step": 32850 + }, + { + "epoch": 1.96, + "learning_rate": 1.619166885690493e-06, + "logits/chosen": -2.443571090698242, + "logits/rejected": -1.73911452293396, + "logps/chosen": -864.8828125, + "logps/rejected": -2235.691650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.960904598236084, + "rewards/margins": 13.95105266571045, + "rewards/rejected": -21.91196060180664, + "step": 32860 + }, + { + "epoch": 1.96, + "learning_rate": 1.6175437532422927e-06, + "logits/chosen": -2.4763436317443848, + "logits/rejected": -1.6849759817123413, + "logps/chosen": -856.0335693359375, + "logps/rejected": -2200.0419921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.866452217102051, + "rewards/margins": 13.701830863952637, + "rewards/rejected": -21.568283081054688, + "step": 32870 + }, + { + "epoch": 1.96, + "learning_rate": 1.6159210456085179e-06, + "logits/chosen": -2.5309054851531982, + "logits/rejected": -1.808117151260376, + "logps/chosen": -829.58837890625, + "logps/rejected": -2250.8203125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.55907678604126, + "rewards/margins": 14.507261276245117, + "rewards/rejected": -22.06633949279785, + "step": 32880 + }, + { + "epoch": 1.96, + "learning_rate": 1.6142987635703412e-06, + "logits/chosen": -2.5183465480804443, + "logits/rejected": -1.7463527917861938, + "logps/chosen": -853.0228271484375, + "logps/rejected": -2191.03076171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.789284706115723, + "rewards/margins": 13.666389465332031, + "rewards/rejected": -21.45567512512207, + "step": 32890 + }, + { + "epoch": 1.96, + "learning_rate": 1.6126769079087279e-06, + "logits/chosen": -2.531557083129883, + "logits/rejected": -1.7681291103363037, + "logps/chosen": -847.1630859375, + "logps/rejected": -2290.70068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.823219299316406, + "rewards/margins": 14.644482612609863, + "rewards/rejected": -22.467700958251953, + "step": 32900 + }, + { + "epoch": 1.96, + "learning_rate": 1.6110554794044397e-06, + "logits/chosen": -2.4921305179595947, + "logits/rejected": -1.7219607830047607, + "logps/chosen": -881.4403076171875, + "logps/rejected": -2190.071044921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.118696212768555, + "rewards/margins": 13.337114334106445, + "rewards/rejected": -21.455808639526367, + "step": 32910 + }, + { + "epoch": 1.96, + "learning_rate": 1.6094344788380323e-06, + "logits/chosen": -2.445429801940918, + "logits/rejected": -1.7212835550308228, + "logps/chosen": -834.8736572265625, + "logps/rejected": -2208.44384765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.687745571136475, + "rewards/margins": 13.959932327270508, + "rewards/rejected": -21.647676467895508, + "step": 32920 + }, + { + "epoch": 1.96, + "learning_rate": 1.6078139069898551e-06, + "logits/chosen": -2.478377342224121, + "logits/rejected": -1.7120929956436157, + "logps/chosen": -834.8919677734375, + "logps/rejected": -2211.203857421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.627894401550293, + "rewards/margins": 14.037195205688477, + "rewards/rejected": -21.665090560913086, + "step": 32930 + }, + { + "epoch": 1.96, + "learning_rate": 1.6061937646400526e-06, + "logits/chosen": -2.5135016441345215, + "logits/rejected": -1.7833999395370483, + "logps/chosen": -863.5135498046875, + "logps/rejected": -2316.59716796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.96190881729126, + "rewards/margins": 14.756093978881836, + "rewards/rejected": -22.718002319335938, + "step": 32940 + }, + { + "epoch": 1.96, + "learning_rate": 1.6045740525685598e-06, + "logits/chosen": -2.501699924468994, + "logits/rejected": -1.771604299545288, + "logps/chosen": -846.7653198242188, + "logps/rejected": -2284.610595703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7213287353515625, + "rewards/margins": 14.68309211730957, + "rewards/rejected": -22.404422760009766, + "step": 32950 + }, + { + "epoch": 1.97, + "learning_rate": 1.6029547715551076e-06, + "logits/chosen": -2.4994301795959473, + "logits/rejected": -1.6924724578857422, + "logps/chosen": -836.4156494140625, + "logps/rejected": -2290.81103515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.605437278747559, + "rewards/margins": 14.853631973266602, + "rewards/rejected": -22.459068298339844, + "step": 32960 + }, + { + "epoch": 1.97, + "learning_rate": 1.6013359223792155e-06, + "logits/chosen": -2.4670932292938232, + "logits/rejected": -1.6849855184555054, + "logps/chosen": -839.0960693359375, + "logps/rejected": -2201.21728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.701320648193359, + "rewards/margins": 13.88427448272705, + "rewards/rejected": -21.58559799194336, + "step": 32970 + }, + { + "epoch": 1.97, + "learning_rate": 1.5997175058202002e-06, + "logits/chosen": -2.463294744491577, + "logits/rejected": -1.6102797985076904, + "logps/chosen": -856.1262817382812, + "logps/rejected": -2234.57861328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.879752159118652, + "rewards/margins": 14.016569137573242, + "rewards/rejected": -21.89632225036621, + "step": 32980 + }, + { + "epoch": 1.97, + "learning_rate": 1.598099522657166e-06, + "logits/chosen": -2.492424964904785, + "logits/rejected": -1.7285442352294922, + "logps/chosen": -860.3225708007812, + "logps/rejected": -2188.586669921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.911373138427734, + "rewards/margins": 13.533836364746094, + "rewards/rejected": -21.445209503173828, + "step": 32990 + }, + { + "epoch": 1.97, + "learning_rate": 1.596481973669009e-06, + "logits/chosen": -2.4897193908691406, + "logits/rejected": -1.626315712928772, + "logps/chosen": -846.9279174804688, + "logps/rejected": -2190.913330078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.802467346191406, + "rewards/margins": 13.675878524780273, + "rewards/rejected": -21.478343963623047, + "step": 33000 + }, + { + "epoch": 1.97, + "learning_rate": 1.5948648596344197e-06, + "logits/chosen": -2.4986109733581543, + "logits/rejected": -1.7830806970596313, + "logps/chosen": -836.2205810546875, + "logps/rejected": -2177.408447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.728165626525879, + "rewards/margins": 13.602752685546875, + "rewards/rejected": -21.330917358398438, + "step": 33010 + }, + { + "epoch": 1.97, + "learning_rate": 1.5932481813318743e-06, + "logits/chosen": -2.507458448410034, + "logits/rejected": -1.7931257486343384, + "logps/chosen": -849.2708740234375, + "logps/rejected": -2275.298095703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.769097805023193, + "rewards/margins": 14.533966064453125, + "rewards/rejected": -22.303064346313477, + "step": 33020 + }, + { + "epoch": 1.97, + "learning_rate": 1.591631939539644e-06, + "logits/chosen": -2.5012741088867188, + "logits/rejected": -1.7303282022476196, + "logps/chosen": -829.2552490234375, + "logps/rejected": -2228.61767578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.550637245178223, + "rewards/margins": 14.292140007019043, + "rewards/rejected": -21.8427791595459, + "step": 33030 + }, + { + "epoch": 1.97, + "learning_rate": 1.590016135035785e-06, + "logits/chosen": -2.480905055999756, + "logits/rejected": -1.8041093349456787, + "logps/chosen": -856.8323364257812, + "logps/rejected": -2292.076171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.830621242523193, + "rewards/margins": 14.65306282043457, + "rewards/rejected": -22.48368263244629, + "step": 33040 + }, + { + "epoch": 1.97, + "learning_rate": 1.5884007685981483e-06, + "logits/chosen": -2.4734625816345215, + "logits/rejected": -1.6740481853485107, + "logps/chosen": -834.1712036132812, + "logps/rejected": -2296.654541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.623634338378906, + "rewards/margins": 14.888997077941895, + "rewards/rejected": -22.51262855529785, + "step": 33050 + }, + { + "epoch": 1.97, + "learning_rate": 1.5867858410043688e-06, + "logits/chosen": -2.511021852493286, + "logits/rejected": -1.7229677438735962, + "logps/chosen": -856.3283081054688, + "logps/rejected": -2285.615234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.840958595275879, + "rewards/margins": 14.562414169311523, + "rewards/rejected": -22.403371810913086, + "step": 33060 + }, + { + "epoch": 1.97, + "learning_rate": 1.585171353031875e-06, + "logits/chosen": -2.488758087158203, + "logits/rejected": -1.7863399982452393, + "logps/chosen": -851.6165161132812, + "logps/rejected": -2232.60595703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7982587814331055, + "rewards/margins": 14.094551086425781, + "rewards/rejected": -21.892810821533203, + "step": 33070 + }, + { + "epoch": 1.97, + "learning_rate": 1.5835573054578802e-06, + "logits/chosen": -2.437058448791504, + "logits/rejected": -1.632925271987915, + "logps/chosen": -859.9302978515625, + "logps/rejected": -2117.8515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.002450942993164, + "rewards/margins": 12.730690002441406, + "rewards/rejected": -20.733139038085938, + "step": 33080 + }, + { + "epoch": 1.97, + "learning_rate": 1.5819436990593855e-06, + "logits/chosen": -2.492260456085205, + "logits/rejected": -1.7782138586044312, + "logps/chosen": -857.4235229492188, + "logps/rejected": -2179.16259765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.9137864112854, + "rewards/margins": 13.435707092285156, + "rewards/rejected": -21.34949493408203, + "step": 33090 + }, + { + "epoch": 1.97, + "learning_rate": 1.5803305346131836e-06, + "logits/chosen": -2.461008310317993, + "logits/rejected": -1.7177118062973022, + "logps/chosen": -848.8341674804688, + "logps/rejected": -2205.09521484375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.820589542388916, + "rewards/margins": 13.777460098266602, + "rewards/rejected": -21.59804916381836, + "step": 33100 + }, + { + "epoch": 1.97, + "learning_rate": 1.5787178128958496e-06, + "logits/chosen": -2.5247511863708496, + "logits/rejected": -1.8334146738052368, + "logps/chosen": -853.5789184570312, + "logps/rejected": -2213.726806640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.844850063323975, + "rewards/margins": 13.856643676757812, + "rewards/rejected": -21.701494216918945, + "step": 33110 + }, + { + "epoch": 1.97, + "learning_rate": 1.5771055346837498e-06, + "logits/chosen": -2.5290682315826416, + "logits/rejected": -1.790077567100525, + "logps/chosen": -856.1888427734375, + "logps/rejected": -2251.328857421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.835489749908447, + "rewards/margins": 14.24022102355957, + "rewards/rejected": -22.07571029663086, + "step": 33120 + }, + { + "epoch": 1.98, + "learning_rate": 1.5754937007530326e-06, + "logits/chosen": -2.494307041168213, + "logits/rejected": -1.6741249561309814, + "logps/chosen": -873.1160888671875, + "logps/rejected": -2260.82373046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.981169700622559, + "rewards/margins": 14.182888984680176, + "rewards/rejected": -22.164060592651367, + "step": 33130 + }, + { + "epoch": 1.98, + "learning_rate": 1.5738823118796371e-06, + "logits/chosen": -2.503133773803711, + "logits/rejected": -1.7745110988616943, + "logps/chosen": -856.6940307617188, + "logps/rejected": -2299.4296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.831610679626465, + "rewards/margins": 14.710966110229492, + "rewards/rejected": -22.54258155822754, + "step": 33140 + }, + { + "epoch": 1.98, + "learning_rate": 1.5722713688392844e-06, + "logits/chosen": -2.530111789703369, + "logits/rejected": -1.7630035877227783, + "logps/chosen": -848.1506958007812, + "logps/rejected": -2186.4443359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.802618980407715, + "rewards/margins": 13.623613357543945, + "rewards/rejected": -21.426231384277344, + "step": 33150 + }, + { + "epoch": 1.98, + "learning_rate": 1.570660872407483e-06, + "logits/chosen": -2.5439140796661377, + "logits/rejected": -1.7893701791763306, + "logps/chosen": -838.0963134765625, + "logps/rejected": -2290.854248046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.6995062828063965, + "rewards/margins": 14.769248962402344, + "rewards/rejected": -22.46875762939453, + "step": 33160 + }, + { + "epoch": 1.98, + "learning_rate": 1.569050823359526e-06, + "logits/chosen": -2.487417697906494, + "logits/rejected": -1.6886987686157227, + "logps/chosen": -801.7160034179688, + "logps/rejected": -2227.923095703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.289459228515625, + "rewards/margins": 14.536616325378418, + "rewards/rejected": -21.82607650756836, + "step": 33170 + }, + { + "epoch": 1.98, + "learning_rate": 1.5674412224704902e-06, + "logits/chosen": -2.493717670440674, + "logits/rejected": -1.7967045307159424, + "logps/chosen": -850.3826904296875, + "logps/rejected": -2167.593994140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.7360944747924805, + "rewards/margins": 13.504312515258789, + "rewards/rejected": -21.240406036376953, + "step": 33180 + }, + { + "epoch": 1.98, + "learning_rate": 1.565832070515239e-06, + "logits/chosen": -2.4662563800811768, + "logits/rejected": -1.7109405994415283, + "logps/chosen": -852.3167724609375, + "logps/rejected": -2215.387939453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.797574043273926, + "rewards/margins": 13.91753101348877, + "rewards/rejected": -21.715103149414062, + "step": 33190 + }, + { + "epoch": 1.98, + "learning_rate": 1.5642233682684157e-06, + "logits/chosen": -2.456761360168457, + "logits/rejected": -1.7339986562728882, + "logps/chosen": -849.0524291992188, + "logps/rejected": -2304.490234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.781613349914551, + "rewards/margins": 14.816922187805176, + "rewards/rejected": -22.598535537719727, + "step": 33200 + }, + { + "epoch": 1.98, + "learning_rate": 1.5626151165044522e-06, + "logits/chosen": -2.473609447479248, + "logits/rejected": -1.7876532077789307, + "logps/chosen": -847.2556762695312, + "logps/rejected": -2335.231689453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.746962547302246, + "rewards/margins": 15.163223266601562, + "rewards/rejected": -22.91018295288086, + "step": 33210 + }, + { + "epoch": 1.98, + "learning_rate": 1.5610073159975589e-06, + "logits/chosen": -2.499821186065674, + "logits/rejected": -1.7704699039459229, + "logps/chosen": -879.4815673828125, + "logps/rejected": -2304.296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.12637996673584, + "rewards/margins": 14.477409362792969, + "rewards/rejected": -22.603790283203125, + "step": 33220 + }, + { + "epoch": 1.98, + "learning_rate": 1.5593999675217325e-06, + "logits/chosen": -2.533313274383545, + "logits/rejected": -1.7308590412139893, + "logps/chosen": -839.4559326171875, + "logps/rejected": -2283.151611328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.686106204986572, + "rewards/margins": 14.702384948730469, + "rewards/rejected": -22.388492584228516, + "step": 33230 + }, + { + "epoch": 1.98, + "learning_rate": 1.557793071850749e-06, + "logits/chosen": -2.521751880645752, + "logits/rejected": -1.8032829761505127, + "logps/chosen": -825.3387451171875, + "logps/rejected": -2246.769775390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.547046661376953, + "rewards/margins": 14.474912643432617, + "rewards/rejected": -22.02195930480957, + "step": 33240 + }, + { + "epoch": 1.98, + "learning_rate": 1.5561866297581694e-06, + "logits/chosen": -2.493381977081299, + "logits/rejected": -1.644051194190979, + "logps/chosen": -865.1885986328125, + "logps/rejected": -2270.20068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.972692966461182, + "rewards/margins": 14.288286209106445, + "rewards/rejected": -22.2609806060791, + "step": 33250 + }, + { + "epoch": 1.98, + "learning_rate": 1.5545806420173343e-06, + "logits/chosen": -2.4725167751312256, + "logits/rejected": -1.6194912195205688, + "logps/chosen": -865.6843872070312, + "logps/rejected": -2208.86181640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.980930328369141, + "rewards/margins": 13.679491996765137, + "rewards/rejected": -21.66042137145996, + "step": 33260 + }, + { + "epoch": 1.98, + "learning_rate": 1.552975109401365e-06, + "logits/chosen": -2.5082335472106934, + "logits/rejected": -1.82488214969635, + "logps/chosen": -808.81103515625, + "logps/rejected": -2253.782470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.403273582458496, + "rewards/margins": 14.691081047058105, + "rewards/rejected": -22.0943546295166, + "step": 33270 + }, + { + "epoch": 1.98, + "learning_rate": 1.551370032683167e-06, + "logits/chosen": -2.502988815307617, + "logits/rejected": -1.7267593145370483, + "logps/chosen": -832.1467895507812, + "logps/rejected": -2307.47607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.576571464538574, + "rewards/margins": 15.054408073425293, + "rewards/rejected": -22.6309814453125, + "step": 33280 + }, + { + "epoch": 1.99, + "learning_rate": 1.5497654126354222e-06, + "logits/chosen": -2.5491652488708496, + "logits/rejected": -1.7174313068389893, + "logps/chosen": -880.9259643554688, + "logps/rejected": -2241.3388671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.14500904083252, + "rewards/margins": 13.831293106079102, + "rewards/rejected": -21.976301193237305, + "step": 33290 + }, + { + "epoch": 1.99, + "learning_rate": 1.5481612500305964e-06, + "logits/chosen": -2.5194363594055176, + "logits/rejected": -1.7175605297088623, + "logps/chosen": -839.8053588867188, + "logps/rejected": -2264.228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.678961277008057, + "rewards/margins": 14.506695747375488, + "rewards/rejected": -22.18565559387207, + "step": 33300 + }, + { + "epoch": 1.99, + "learning_rate": 1.546557545640932e-06, + "logits/chosen": -2.5392279624938965, + "logits/rejected": -1.8226649761199951, + "logps/chosen": -862.4558715820312, + "logps/rejected": -2187.59716796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.907596588134766, + "rewards/margins": 13.530746459960938, + "rewards/rejected": -21.438344955444336, + "step": 33310 + }, + { + "epoch": 1.99, + "learning_rate": 1.544954300238454e-06, + "logits/chosen": -2.5276119709014893, + "logits/rejected": -1.8196818828582764, + "logps/chosen": -851.39697265625, + "logps/rejected": -2191.097900390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.854038238525391, + "rewards/margins": 13.6220064163208, + "rewards/rejected": -21.47604751586914, + "step": 33320 + }, + { + "epoch": 1.99, + "learning_rate": 1.5433515145949636e-06, + "logits/chosen": -2.5370707511901855, + "logits/rejected": -1.7680763006210327, + "logps/chosen": -831.056640625, + "logps/rejected": -2278.626708984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.620954990386963, + "rewards/margins": 14.71727180480957, + "rewards/rejected": -22.33822250366211, + "step": 33330 + }, + { + "epoch": 1.99, + "learning_rate": 1.5417491894820408e-06, + "logits/chosen": -2.50211238861084, + "logits/rejected": -1.7309255599975586, + "logps/chosen": -873.2752685546875, + "logps/rejected": -2266.504638671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -8.034585952758789, + "rewards/margins": 14.176742553710938, + "rewards/rejected": -22.211328506469727, + "step": 33340 + }, + { + "epoch": 1.99, + "learning_rate": 1.5401473256710463e-06, + "logits/chosen": -2.5403523445129395, + "logits/rejected": -1.8176568746566772, + "logps/chosen": -835.76171875, + "logps/rejected": -2207.23876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.673165798187256, + "rewards/margins": 13.969035148620605, + "rewards/rejected": -21.642200469970703, + "step": 33350 + }, + { + "epoch": 1.99, + "learning_rate": 1.5385459239331173e-06, + "logits/chosen": -2.4542250633239746, + "logits/rejected": -1.6343265771865845, + "logps/chosen": -853.7556762695312, + "logps/rejected": -2208.961669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.771788597106934, + "rewards/margins": 13.866848945617676, + "rewards/rejected": -21.63863754272461, + "step": 33360 + }, + { + "epoch": 1.99, + "learning_rate": 1.5369449850391682e-06, + "logits/chosen": -2.5120930671691895, + "logits/rejected": -1.7708418369293213, + "logps/chosen": -844.2501220703125, + "logps/rejected": -2230.01513671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.767291069030762, + "rewards/margins": 14.092620849609375, + "rewards/rejected": -21.85991096496582, + "step": 33370 + }, + { + "epoch": 1.99, + "learning_rate": 1.5353445097598912e-06, + "logits/chosen": -2.5541927814483643, + "logits/rejected": -1.8095779418945312, + "logps/chosen": -844.59228515625, + "logps/rejected": -2149.7177734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.661242485046387, + "rewards/margins": 13.381805419921875, + "rewards/rejected": -21.043048858642578, + "step": 33380 + }, + { + "epoch": 1.99, + "learning_rate": 1.5337444988657546e-06, + "logits/chosen": -2.538386344909668, + "logits/rejected": -1.759080171585083, + "logps/chosen": -836.4430541992188, + "logps/rejected": -2161.994384765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.651192665100098, + "rewards/margins": 13.536412239074707, + "rewards/rejected": -21.187602996826172, + "step": 33390 + }, + { + "epoch": 1.99, + "learning_rate": 1.532144953127004e-06, + "logits/chosen": -2.443730115890503, + "logits/rejected": -1.7711683511734009, + "logps/chosen": -868.0143432617188, + "logps/rejected": -2236.15966796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.957781791687012, + "rewards/margins": 13.956108093261719, + "rewards/rejected": -21.913890838623047, + "step": 33400 + }, + { + "epoch": 1.99, + "learning_rate": 1.5305458733136608e-06, + "logits/chosen": -2.443990468978882, + "logits/rejected": -1.6992244720458984, + "logps/chosen": -844.0784912109375, + "logps/rejected": -2174.27880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.773224830627441, + "rewards/margins": 13.53022575378418, + "rewards/rejected": -21.303449630737305, + "step": 33410 + }, + { + "epoch": 1.99, + "learning_rate": 1.5289472601955219e-06, + "logits/chosen": -2.5222878456115723, + "logits/rejected": -1.8030275106430054, + "logps/chosen": -821.6402587890625, + "logps/rejected": -2268.353271484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.474295139312744, + "rewards/margins": 14.778103828430176, + "rewards/rejected": -22.252397537231445, + "step": 33420 + }, + { + "epoch": 1.99, + "learning_rate": 1.527349114542159e-06, + "logits/chosen": -2.503178358078003, + "logits/rejected": -1.7834854125976562, + "logps/chosen": -815.2890014648438, + "logps/rejected": -2225.91552734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.441809177398682, + "rewards/margins": 14.380434036254883, + "rewards/rejected": -21.822240829467773, + "step": 33430 + }, + { + "epoch": 1.99, + "learning_rate": 1.525751437122921e-06, + "logits/chosen": -2.51002836227417, + "logits/rejected": -1.6888240575790405, + "logps/chosen": -817.4244384765625, + "logps/rejected": -2194.149169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.44698429107666, + "rewards/margins": 14.057207107543945, + "rewards/rejected": -21.50419044494629, + "step": 33440 + }, + { + "epoch": 1.99, + "learning_rate": 1.5241542287069273e-06, + "logits/chosen": -2.426516056060791, + "logits/rejected": -1.6531133651733398, + "logps/chosen": -863.9744873046875, + "logps/rejected": -2197.700439453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.857166290283203, + "rewards/margins": 13.684282302856445, + "rewards/rejected": -21.541446685791016, + "step": 33450 + }, + { + "epoch": 2.0, + "learning_rate": 1.5225574900630768e-06, + "logits/chosen": -2.4678795337677, + "logits/rejected": -1.7763763666152954, + "logps/chosen": -824.7625122070312, + "logps/rejected": -2200.204833984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.561892032623291, + "rewards/margins": 13.99452018737793, + "rewards/rejected": -21.556413650512695, + "step": 33460 + }, + { + "epoch": 2.0, + "learning_rate": 1.5209612219600366e-06, + "logits/chosen": -2.4843380451202393, + "logits/rejected": -1.7145531177520752, + "logps/chosen": -829.0211791992188, + "logps/rejected": -2192.800048828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.558680057525635, + "rewards/margins": 13.941370964050293, + "rewards/rejected": -21.500049591064453, + "step": 33470 + }, + { + "epoch": 2.0, + "learning_rate": 1.5193654251662531e-06, + "logits/chosen": -2.418146848678589, + "logits/rejected": -1.6816599369049072, + "logps/chosen": -835.8590087890625, + "logps/rejected": -2275.331298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.6183929443359375, + "rewards/margins": 14.689976692199707, + "rewards/rejected": -22.308368682861328, + "step": 33480 + }, + { + "epoch": 2.0, + "learning_rate": 1.5177701004499406e-06, + "logits/chosen": -2.482309103012085, + "logits/rejected": -1.633080244064331, + "logps/chosen": -816.392578125, + "logps/rejected": -2219.3046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4780168533325195, + "rewards/margins": 14.265625, + "rewards/rejected": -21.743642807006836, + "step": 33490 + }, + { + "epoch": 2.0, + "learning_rate": 1.5161752485790902e-06, + "logits/chosen": -2.460669755935669, + "logits/rejected": -1.7023576498031616, + "logps/chosen": -861.0277099609375, + "logps/rejected": -2257.14013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.8818535804748535, + "rewards/margins": 14.248967170715332, + "rewards/rejected": -22.130821228027344, + "step": 33500 + }, + { + "epoch": 2.0, + "learning_rate": 1.514580870321462e-06, + "logits/chosen": -2.4999241828918457, + "logits/rejected": -1.6424472332000732, + "logps/chosen": -828.4031372070312, + "logps/rejected": -2228.143798828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.553446292877197, + "rewards/margins": 14.289329528808594, + "rewards/rejected": -21.842777252197266, + "step": 33510 + }, + { + "epoch": 2.0, + "learning_rate": 1.5129869664445901e-06, + "logits/chosen": -2.518287181854248, + "logits/rejected": -1.7683494091033936, + "logps/chosen": -834.5494384765625, + "logps/rejected": -2313.61669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.625245094299316, + "rewards/margins": 15.05589485168457, + "rewards/rejected": -22.681140899658203, + "step": 33520 + }, + { + "epoch": 2.0, + "learning_rate": 1.5113935377157803e-06, + "logits/chosen": -2.535391330718994, + "logits/rejected": -1.7124515771865845, + "logps/chosen": -818.9515991210938, + "logps/rejected": -2217.400634765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.446432590484619, + "rewards/margins": 14.290956497192383, + "rewards/rejected": -21.737388610839844, + "step": 33530 + }, + { + "epoch": 2.0, + "learning_rate": 1.509800584902108e-06, + "logits/chosen": -2.483447313308716, + "logits/rejected": -1.7377487421035767, + "logps/chosen": -828.7464599609375, + "logps/rejected": -2315.857666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.514716148376465, + "rewards/margins": 15.188504219055176, + "rewards/rejected": -22.703218460083008, + "step": 33540 + }, + { + "epoch": 2.0, + "learning_rate": 1.5082081087704226e-06, + "logits/chosen": -2.4659831523895264, + "logits/rejected": -1.7727457284927368, + "logps/chosen": -793.9671630859375, + "logps/rejected": -2140.21240234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.281069278717041, + "rewards/margins": 13.685575485229492, + "rewards/rejected": -20.966646194458008, + "step": 33550 + }, + { + "epoch": 2.0, + "learning_rate": 1.50661611008734e-06, + "logits/chosen": -2.5120015144348145, + "logits/rejected": -1.7616174221038818, + "logps/chosen": -814.3489379882812, + "logps/rejected": -2217.27783203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.513110160827637, + "rewards/margins": 14.222860336303711, + "rewards/rejected": -21.735971450805664, + "step": 33560 + }, + { + "epoch": 2.0, + "learning_rate": 1.5050245896192503e-06, + "logits/chosen": -2.495100975036621, + "logits/rejected": -1.6308307647705078, + "logps/chosen": -798.6842041015625, + "logps/rejected": -2265.315185546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.307834625244141, + "rewards/margins": 14.897178649902344, + "rewards/rejected": -22.205013275146484, + "step": 33570 + }, + { + "epoch": 2.0, + "learning_rate": 1.5034335481323095e-06, + "logits/chosen": -2.5445504188537598, + "logits/rejected": -1.790350317955017, + "logps/chosen": -834.1920776367188, + "logps/rejected": -2259.364013671875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5844316482543945, + "rewards/margins": 14.564271926879883, + "rewards/rejected": -22.148704528808594, + "step": 33580 + }, + { + "epoch": 2.0, + "learning_rate": 1.501842986392447e-06, + "logits/chosen": -2.515228748321533, + "logits/rejected": -1.8214280605316162, + "logps/chosen": -822.0877685546875, + "logps/rejected": -2262.303955078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.536437034606934, + "rewards/margins": 14.649014472961426, + "rewards/rejected": -22.18545150756836, + "step": 33590 + }, + { + "epoch": 2.0, + "learning_rate": 1.5002529051653576e-06, + "logits/chosen": -2.457879066467285, + "logits/rejected": -1.6738977432250977, + "logps/chosen": -829.7557373046875, + "logps/rejected": -2167.21484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.563883304595947, + "rewards/margins": 13.654609680175781, + "rewards/rejected": -21.21849250793457, + "step": 33600 + }, + { + "epoch": 2.0, + "learning_rate": 1.4986633052165066e-06, + "logits/chosen": -2.5030531883239746, + "logits/rejected": -1.8013007640838623, + "logps/chosen": -832.1778564453125, + "logps/rejected": -2257.845703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.623265743255615, + "rewards/margins": 14.5217866897583, + "rewards/rejected": -22.14505386352539, + "step": 33610 + }, + { + "epoch": 2.0, + "learning_rate": 1.497074187311128e-06, + "logits/chosen": -2.5257697105407715, + "logits/rejected": -1.7938207387924194, + "logps/chosen": -826.2330322265625, + "logps/rejected": -2245.955078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.592460632324219, + "rewards/margins": 14.4349946975708, + "rewards/rejected": -22.027454376220703, + "step": 33620 + }, + { + "epoch": 2.01, + "learning_rate": 1.4954855522142225e-06, + "logits/chosen": -2.5148394107818604, + "logits/rejected": -1.8305637836456299, + "logps/chosen": -830.9190673828125, + "logps/rejected": -2230.630615234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.620333671569824, + "rewards/margins": 14.25010871887207, + "rewards/rejected": -21.870441436767578, + "step": 33630 + }, + { + "epoch": 2.01, + "learning_rate": 1.4938974006905598e-06, + "logits/chosen": -2.492222309112549, + "logits/rejected": -1.704440712928772, + "logps/chosen": -806.4608154296875, + "logps/rejected": -2200.287353515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.373170375823975, + "rewards/margins": 14.185598373413086, + "rewards/rejected": -21.558773040771484, + "step": 33640 + }, + { + "epoch": 2.01, + "learning_rate": 1.4923097335046755e-06, + "logits/chosen": -2.503605365753174, + "logits/rejected": -1.8557937145233154, + "logps/chosen": -829.8333129882812, + "logps/rejected": -2145.41748046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5730743408203125, + "rewards/margins": 13.440347671508789, + "rewards/rejected": -21.01342010498047, + "step": 33650 + }, + { + "epoch": 2.01, + "learning_rate": 1.4907225514208724e-06, + "logits/chosen": -2.492401123046875, + "logits/rejected": -1.7569392919540405, + "logps/chosen": -846.4881591796875, + "logps/rejected": -2238.087158203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.762886047363281, + "rewards/margins": 14.163113594055176, + "rewards/rejected": -21.92599868774414, + "step": 33660 + }, + { + "epoch": 2.01, + "learning_rate": 1.4891358552032204e-06, + "logits/chosen": -2.4779632091522217, + "logits/rejected": -1.7924779653549194, + "logps/chosen": -818.7111206054688, + "logps/rejected": -2198.20458984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4635329246521, + "rewards/margins": 14.088811874389648, + "rewards/rejected": -21.55234718322754, + "step": 33670 + }, + { + "epoch": 2.01, + "learning_rate": 1.4875496456155547e-06, + "logits/chosen": -2.529367685317993, + "logits/rejected": -1.696367621421814, + "logps/chosen": -806.3927612304688, + "logps/rejected": -2233.51318359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.241968631744385, + "rewards/margins": 14.660585403442383, + "rewards/rejected": -21.90255355834961, + "step": 33680 + }, + { + "epoch": 2.01, + "learning_rate": 1.4859639234214774e-06, + "logits/chosen": -2.520516872406006, + "logits/rejected": -1.8665649890899658, + "logps/chosen": -818.2225341796875, + "logps/rejected": -2212.50146484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5042405128479, + "rewards/margins": 14.171867370605469, + "rewards/rejected": -21.676105499267578, + "step": 33690 + }, + { + "epoch": 2.01, + "learning_rate": 1.484378689384353e-06, + "logits/chosen": -2.5092313289642334, + "logits/rejected": -1.8200931549072266, + "logps/chosen": -846.9058837890625, + "logps/rejected": -2198.3828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.764723300933838, + "rewards/margins": 13.778033256530762, + "rewards/rejected": -21.54275894165039, + "step": 33700 + }, + { + "epoch": 2.01, + "learning_rate": 1.4827939442673158e-06, + "logits/chosen": -2.476611375808716, + "logits/rejected": -1.7027738094329834, + "logps/chosen": -826.6685791015625, + "logps/rejected": -2181.86669921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.583849906921387, + "rewards/margins": 13.7979097366333, + "rewards/rejected": -21.381759643554688, + "step": 33710 + }, + { + "epoch": 2.01, + "learning_rate": 1.48120968883326e-06, + "logits/chosen": -2.5191650390625, + "logits/rejected": -1.8151267766952515, + "logps/chosen": -808.339599609375, + "logps/rejected": -2320.525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.420987606048584, + "rewards/margins": 15.3374662399292, + "rewards/rejected": -22.758455276489258, + "step": 33720 + }, + { + "epoch": 2.01, + "learning_rate": 1.479625923844848e-06, + "logits/chosen": -2.5084187984466553, + "logits/rejected": -1.7817739248275757, + "logps/chosen": -800.5814819335938, + "logps/rejected": -2228.41357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.334344387054443, + "rewards/margins": 14.493046760559082, + "rewards/rejected": -21.827390670776367, + "step": 33730 + }, + { + "epoch": 2.01, + "learning_rate": 1.478042650064502e-06, + "logits/chosen": -2.493483066558838, + "logits/rejected": -1.680929183959961, + "logps/chosen": -833.2234497070312, + "logps/rejected": -2210.66943359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5648088455200195, + "rewards/margins": 14.10792064666748, + "rewards/rejected": -21.6727294921875, + "step": 33740 + }, + { + "epoch": 2.01, + "learning_rate": 1.4764598682544124e-06, + "logits/chosen": -2.4670307636260986, + "logits/rejected": -1.603276014328003, + "logps/chosen": -860.0578002929688, + "logps/rejected": -2316.669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.911660194396973, + "rewards/margins": 14.811184883117676, + "rewards/rejected": -22.722843170166016, + "step": 33750 + }, + { + "epoch": 2.01, + "learning_rate": 1.4748775791765282e-06, + "logits/chosen": -2.5108821392059326, + "logits/rejected": -1.7470277547836304, + "logps/chosen": -826.0265502929688, + "logps/rejected": -2211.7177734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.54456090927124, + "rewards/margins": 14.14348316192627, + "rewards/rejected": -21.68804359436035, + "step": 33760 + }, + { + "epoch": 2.01, + "learning_rate": 1.4732957835925654e-06, + "logits/chosen": -2.4952120780944824, + "logits/rejected": -1.8307205438613892, + "logps/chosen": -844.0558471679688, + "logps/rejected": -2247.91943359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.710041046142578, + "rewards/margins": 14.334424018859863, + "rewards/rejected": -22.044464111328125, + "step": 33770 + }, + { + "epoch": 2.01, + "learning_rate": 1.4717144822639988e-06, + "logits/chosen": -2.4849021434783936, + "logits/rejected": -1.6890252828598022, + "logps/chosen": -807.8711547851562, + "logps/rejected": -2152.804443359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3336181640625, + "rewards/margins": 13.743354797363281, + "rewards/rejected": -21.07697296142578, + "step": 33780 + }, + { + "epoch": 2.01, + "learning_rate": 1.4701336759520667e-06, + "logits/chosen": -2.535773992538452, + "logits/rejected": -1.8757350444793701, + "logps/chosen": -833.75830078125, + "logps/rejected": -2150.12646484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.612715244293213, + "rewards/margins": 13.450881958007812, + "rewards/rejected": -21.063596725463867, + "step": 33790 + }, + { + "epoch": 2.02, + "learning_rate": 1.4685533654177708e-06, + "logits/chosen": -2.5311756134033203, + "logits/rejected": -1.878584623336792, + "logps/chosen": -845.1170043945312, + "logps/rejected": -2253.260986328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.776968479156494, + "rewards/margins": 14.324270248413086, + "rewards/rejected": -22.101238250732422, + "step": 33800 + }, + { + "epoch": 2.02, + "learning_rate": 1.4669735514218709e-06, + "logits/chosen": -2.522918224334717, + "logits/rejected": -1.7990665435791016, + "logps/chosen": -813.9261474609375, + "logps/rejected": -2180.4326171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.391079902648926, + "rewards/margins": 13.971168518066406, + "rewards/rejected": -21.362247467041016, + "step": 33810 + }, + { + "epoch": 2.02, + "learning_rate": 1.4653942347248908e-06, + "logits/chosen": -2.5092873573303223, + "logits/rejected": -1.8102060556411743, + "logps/chosen": -816.0382080078125, + "logps/rejected": -2222.0234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4549431800842285, + "rewards/margins": 14.321139335632324, + "rewards/rejected": -21.776084899902344, + "step": 33820 + }, + { + "epoch": 2.02, + "learning_rate": 1.4638154160871121e-06, + "logits/chosen": -2.528470277786255, + "logits/rejected": -1.6754118204116821, + "logps/chosen": -819.8615112304688, + "logps/rejected": -2187.593994140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.517744541168213, + "rewards/margins": 13.908607482910156, + "rewards/rejected": -21.426353454589844, + "step": 33830 + }, + { + "epoch": 2.02, + "learning_rate": 1.46223709626858e-06, + "logits/chosen": -2.498284339904785, + "logits/rejected": -1.7785112857818604, + "logps/chosen": -869.0535888671875, + "logps/rejected": -2221.000732421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.991789817810059, + "rewards/margins": 13.776341438293457, + "rewards/rejected": -21.76813316345215, + "step": 33840 + }, + { + "epoch": 2.02, + "learning_rate": 1.4606592760290953e-06, + "logits/chosen": -2.4885973930358887, + "logits/rejected": -1.8206313848495483, + "logps/chosen": -820.5280151367188, + "logps/rejected": -2160.041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.503076076507568, + "rewards/margins": 13.663281440734863, + "rewards/rejected": -21.16635513305664, + "step": 33850 + }, + { + "epoch": 2.02, + "learning_rate": 1.459081956128223e-06, + "logits/chosen": -2.4988820552825928, + "logits/rejected": -1.7086679935455322, + "logps/chosen": -827.0617065429688, + "logps/rejected": -2241.331298828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.570122718811035, + "rewards/margins": 14.402132034301758, + "rewards/rejected": -21.97225570678711, + "step": 33860 + }, + { + "epoch": 2.02, + "learning_rate": 1.457505137325283e-06, + "logits/chosen": -2.4908649921417236, + "logits/rejected": -1.7669140100479126, + "logps/chosen": -811.8192138671875, + "logps/rejected": -2224.60888671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.349475860595703, + "rewards/margins": 14.466423034667969, + "rewards/rejected": -21.815900802612305, + "step": 33870 + }, + { + "epoch": 2.02, + "learning_rate": 1.455928820379357e-06, + "logits/chosen": -2.534273147583008, + "logits/rejected": -1.7565491199493408, + "logps/chosen": -793.9933471679688, + "logps/rejected": -2153.291748046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.257293701171875, + "rewards/margins": 13.844232559204102, + "rewards/rejected": -21.101526260375977, + "step": 33880 + }, + { + "epoch": 2.02, + "learning_rate": 1.4543530060492833e-06, + "logits/chosen": -2.496473550796509, + "logits/rejected": -1.7429348230361938, + "logps/chosen": -795.1064453125, + "logps/rejected": -2226.411376953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.274217128753662, + "rewards/margins": 14.553388595581055, + "rewards/rejected": -21.827608108520508, + "step": 33890 + }, + { + "epoch": 2.02, + "learning_rate": 1.452777695093659e-06, + "logits/chosen": -2.519883632659912, + "logits/rejected": -1.7837903499603271, + "logps/chosen": -812.0925903320312, + "logps/rejected": -2171.466796875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.417477607727051, + "rewards/margins": 13.862100601196289, + "rewards/rejected": -21.279577255249023, + "step": 33900 + }, + { + "epoch": 2.02, + "learning_rate": 1.4512028882708393e-06, + "logits/chosen": -2.477027654647827, + "logits/rejected": -1.7885358333587646, + "logps/chosen": -816.9631958007812, + "logps/rejected": -2194.94775390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.449967384338379, + "rewards/margins": 14.056940078735352, + "rewards/rejected": -21.506908416748047, + "step": 33910 + }, + { + "epoch": 2.02, + "learning_rate": 1.4496285863389356e-06, + "logits/chosen": -2.453795909881592, + "logits/rejected": -1.7176004648208618, + "logps/chosen": -840.62109375, + "logps/rejected": -2213.78662109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.675930976867676, + "rewards/margins": 14.003067016601562, + "rewards/rejected": -21.678997039794922, + "step": 33920 + }, + { + "epoch": 2.02, + "learning_rate": 1.448054790055817e-06, + "logits/chosen": -2.5384716987609863, + "logits/rejected": -1.7982845306396484, + "logps/chosen": -776.6824951171875, + "logps/rejected": -2183.340087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.090447425842285, + "rewards/margins": 14.286874771118164, + "rewards/rejected": -21.377321243286133, + "step": 33930 + }, + { + "epoch": 2.02, + "learning_rate": 1.4464815001791094e-06, + "logits/chosen": -2.4880778789520264, + "logits/rejected": -1.7707399129867554, + "logps/chosen": -804.034912109375, + "logps/rejected": -2243.502685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.390605926513672, + "rewards/margins": 14.595799446105957, + "rewards/rejected": -21.986408233642578, + "step": 33940 + }, + { + "epoch": 2.02, + "learning_rate": 1.4449087174661935e-06, + "logits/chosen": -2.520165205001831, + "logits/rejected": -1.7851011753082275, + "logps/chosen": -807.7804565429688, + "logps/rejected": -2182.822265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.343629360198975, + "rewards/margins": 14.049283027648926, + "rewards/rejected": -21.392913818359375, + "step": 33950 + }, + { + "epoch": 2.03, + "learning_rate": 1.443336442674208e-06, + "logits/chosen": -2.512277126312256, + "logits/rejected": -1.7969592809677124, + "logps/chosen": -809.3818359375, + "logps/rejected": -2135.913330078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.374057769775391, + "rewards/margins": 13.554300308227539, + "rewards/rejected": -20.928356170654297, + "step": 33960 + }, + { + "epoch": 2.03, + "learning_rate": 1.4417646765600458e-06, + "logits/chosen": -2.5013561248779297, + "logits/rejected": -1.6373188495635986, + "logps/chosen": -808.5926513671875, + "logps/rejected": -2245.150146484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.409049987792969, + "rewards/margins": 14.612385749816895, + "rewards/rejected": -22.021434783935547, + "step": 33970 + }, + { + "epoch": 2.03, + "learning_rate": 1.4401934198803543e-06, + "logits/chosen": -2.5489532947540283, + "logits/rejected": -1.763492226600647, + "logps/chosen": -792.9866333007812, + "logps/rejected": -2208.072509765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2095465660095215, + "rewards/margins": 14.426358222961426, + "rewards/rejected": -21.63590431213379, + "step": 33980 + }, + { + "epoch": 2.03, + "learning_rate": 1.438622673391537e-06, + "logits/chosen": -2.488987445831299, + "logits/rejected": -1.8240163326263428, + "logps/chosen": -766.8116455078125, + "logps/rejected": -2162.716064453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9517645835876465, + "rewards/margins": 14.23042106628418, + "rewards/rejected": -21.182186126708984, + "step": 33990 + }, + { + "epoch": 2.03, + "learning_rate": 1.4370524378497516e-06, + "logits/chosen": -2.480522632598877, + "logits/rejected": -1.7152312994003296, + "logps/chosen": -807.2941284179688, + "logps/rejected": -2181.47314453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.291916847229004, + "rewards/margins": 14.07972526550293, + "rewards/rejected": -21.37164306640625, + "step": 34000 + }, + { + "epoch": 2.03, + "learning_rate": 1.4354827140109088e-06, + "logits/chosen": -2.5202689170837402, + "logits/rejected": -1.8007488250732422, + "logps/chosen": -815.5300903320312, + "logps/rejected": -2188.972900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.418951511383057, + "rewards/margins": 14.027300834655762, + "rewards/rejected": -21.44625473022461, + "step": 34010 + }, + { + "epoch": 2.03, + "learning_rate": 1.4339135026306738e-06, + "logits/chosen": -2.4864866733551025, + "logits/rejected": -1.7495863437652588, + "logps/chosen": -828.4036254882812, + "logps/rejected": -2237.15576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.57230281829834, + "rewards/margins": 14.347102165222168, + "rewards/rejected": -21.919404983520508, + "step": 34020 + }, + { + "epoch": 2.03, + "learning_rate": 1.4323448044644654e-06, + "logits/chosen": -2.4807345867156982, + "logits/rejected": -1.6991357803344727, + "logps/chosen": -847.7528076171875, + "logps/rejected": -2200.72412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.761141300201416, + "rewards/margins": 13.80388069152832, + "rewards/rejected": -21.56502342224121, + "step": 34030 + }, + { + "epoch": 2.03, + "learning_rate": 1.4307766202674543e-06, + "logits/chosen": -2.465510129928589, + "logits/rejected": -1.6397091150283813, + "logps/chosen": -817.8120727539062, + "logps/rejected": -2226.212158203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.420828819274902, + "rewards/margins": 14.386955261230469, + "rewards/rejected": -21.807785034179688, + "step": 34040 + }, + { + "epoch": 2.03, + "learning_rate": 1.4292089507945655e-06, + "logits/chosen": -2.519322395324707, + "logits/rejected": -1.8445560932159424, + "logps/chosen": -820.5148315429688, + "logps/rejected": -2241.32373046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.498929023742676, + "rewards/margins": 14.462064743041992, + "rewards/rejected": -21.96099281311035, + "step": 34050 + }, + { + "epoch": 2.03, + "learning_rate": 1.4276417968004719e-06, + "logits/chosen": -2.501847743988037, + "logits/rejected": -1.7244113683700562, + "logps/chosen": -800.4287109375, + "logps/rejected": -2214.47216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.217789649963379, + "rewards/margins": 14.485086441040039, + "rewards/rejected": -21.702877044677734, + "step": 34060 + }, + { + "epoch": 2.03, + "learning_rate": 1.426075159039606e-06, + "logits/chosen": -2.4514613151550293, + "logits/rejected": -1.6074641942977905, + "logps/chosen": -828.9700927734375, + "logps/rejected": -2188.42529296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.581017971038818, + "rewards/margins": 13.865213394165039, + "rewards/rejected": -21.446231842041016, + "step": 34070 + }, + { + "epoch": 2.03, + "learning_rate": 1.424509038266143e-06, + "logits/chosen": -2.5047526359558105, + "logits/rejected": -1.766370177268982, + "logps/chosen": -786.0737915039062, + "logps/rejected": -2224.169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.146354675292969, + "rewards/margins": 14.658287048339844, + "rewards/rejected": -21.804641723632812, + "step": 34080 + }, + { + "epoch": 2.03, + "learning_rate": 1.4229434352340169e-06, + "logits/chosen": -2.5508923530578613, + "logits/rejected": -1.765088677406311, + "logps/chosen": -771.650634765625, + "logps/rejected": -2247.245361328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.983827114105225, + "rewards/margins": 15.05407428741455, + "rewards/rejected": -22.037900924682617, + "step": 34090 + }, + { + "epoch": 2.03, + "learning_rate": 1.4213783506969053e-06, + "logits/chosen": -2.515076160430908, + "logits/rejected": -1.79736328125, + "logps/chosen": -805.4554443359375, + "logps/rejected": -2218.13232421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.362597465515137, + "rewards/margins": 14.364858627319336, + "rewards/rejected": -21.727458953857422, + "step": 34100 + }, + { + "epoch": 2.03, + "learning_rate": 1.4198137854082443e-06, + "logits/chosen": -2.532916307449341, + "logits/rejected": -1.80194890499115, + "logps/chosen": -762.36083984375, + "logps/rejected": -2200.09423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.949564456939697, + "rewards/margins": 14.593923568725586, + "rewards/rejected": -21.543487548828125, + "step": 34110 + }, + { + "epoch": 2.03, + "learning_rate": 1.4182497401212109e-06, + "logits/chosen": -2.5005576610565186, + "logits/rejected": -1.7718505859375, + "logps/chosen": -784.3308715820312, + "logps/rejected": -2175.597412109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.116877555847168, + "rewards/margins": 14.207311630249023, + "rewards/rejected": -21.32419204711914, + "step": 34120 + }, + { + "epoch": 2.04, + "learning_rate": 1.4166862155887413e-06, + "logits/chosen": -2.5031192302703857, + "logits/rejected": -1.851976752281189, + "logps/chosen": -789.4632568359375, + "logps/rejected": -2218.521484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.197033882141113, + "rewards/margins": 14.52307415008545, + "rewards/rejected": -21.720108032226562, + "step": 34130 + }, + { + "epoch": 2.04, + "learning_rate": 1.4151232125635123e-06, + "logits/chosen": -2.515578031539917, + "logits/rejected": -1.7519655227661133, + "logps/chosen": -832.6257934570312, + "logps/rejected": -2208.9716796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.578873634338379, + "rewards/margins": 14.05981159210205, + "rewards/rejected": -21.638687133789062, + "step": 34140 + }, + { + "epoch": 2.04, + "learning_rate": 1.4135607317979549e-06, + "logits/chosen": -2.5049822330474854, + "logits/rejected": -1.676924467086792, + "logps/chosen": -801.8446044921875, + "logps/rejected": -2153.69140625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.335791110992432, + "rewards/margins": 13.774999618530273, + "rewards/rejected": -21.110790252685547, + "step": 34150 + }, + { + "epoch": 2.04, + "learning_rate": 1.4119987740442473e-06, + "logits/chosen": -2.5404129028320312, + "logits/rejected": -1.8777889013290405, + "logps/chosen": -814.3598022460938, + "logps/rejected": -2182.986328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.475754737854004, + "rewards/margins": 13.919219970703125, + "rewards/rejected": -21.394973754882812, + "step": 34160 + }, + { + "epoch": 2.04, + "learning_rate": 1.4104373400543162e-06, + "logits/chosen": -2.4972524642944336, + "logits/rejected": -1.8018391132354736, + "logps/chosen": -849.6744995117188, + "logps/rejected": -2212.3251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.785027980804443, + "rewards/margins": 13.894268989562988, + "rewards/rejected": -21.67930030822754, + "step": 34170 + }, + { + "epoch": 2.04, + "learning_rate": 1.4088764305798358e-06, + "logits/chosen": -2.4835879802703857, + "logits/rejected": -1.7230949401855469, + "logps/chosen": -807.40087890625, + "logps/rejected": -2156.290771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.353536128997803, + "rewards/margins": 13.779680252075195, + "rewards/rejected": -21.133216857910156, + "step": 34180 + }, + { + "epoch": 2.04, + "learning_rate": 1.4073160463722274e-06, + "logits/chosen": -2.466811418533325, + "logits/rejected": -1.8107086420059204, + "logps/chosen": -812.6070556640625, + "logps/rejected": -2224.072509765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.412637233734131, + "rewards/margins": 14.373525619506836, + "rewards/rejected": -21.78616714477539, + "step": 34190 + }, + { + "epoch": 2.04, + "learning_rate": 1.405756188182661e-06, + "logits/chosen": -2.5183205604553223, + "logits/rejected": -1.757310152053833, + "logps/chosen": -798.6788940429688, + "logps/rejected": -2247.516357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.332934379577637, + "rewards/margins": 14.702199935913086, + "rewards/rejected": -22.035133361816406, + "step": 34200 + }, + { + "epoch": 2.04, + "learning_rate": 1.4041968567620508e-06, + "logits/chosen": -2.5232670307159424, + "logits/rejected": -1.8189222812652588, + "logps/chosen": -830.1964111328125, + "logps/rejected": -2236.4140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.589951992034912, + "rewards/margins": 14.340237617492676, + "rewards/rejected": -21.930187225341797, + "step": 34210 + }, + { + "epoch": 2.04, + "learning_rate": 1.4026380528610605e-06, + "logits/chosen": -2.4609978199005127, + "logits/rejected": -1.716183066368103, + "logps/chosen": -816.3709716796875, + "logps/rejected": -2278.06689453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.481976509094238, + "rewards/margins": 14.842763900756836, + "rewards/rejected": -22.32474136352539, + "step": 34220 + }, + { + "epoch": 2.04, + "learning_rate": 1.4010797772300972e-06, + "logits/chosen": -2.499457359313965, + "logits/rejected": -1.7338424921035767, + "logps/chosen": -830.6826171875, + "logps/rejected": -2250.523193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.538830757141113, + "rewards/margins": 14.521125793457031, + "rewards/rejected": -22.05995750427246, + "step": 34230 + }, + { + "epoch": 2.04, + "learning_rate": 1.3995220306193153e-06, + "logits/chosen": -2.5387425422668457, + "logits/rejected": -1.6714270114898682, + "logps/chosen": -823.1107177734375, + "logps/rejected": -2236.374267578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.526161193847656, + "rewards/margins": 14.38208293914795, + "rewards/rejected": -21.90824317932129, + "step": 34240 + }, + { + "epoch": 2.04, + "learning_rate": 1.3979648137786136e-06, + "logits/chosen": -2.516902446746826, + "logits/rejected": -1.7098089456558228, + "logps/chosen": -810.763671875, + "logps/rejected": -2257.50830078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.394152641296387, + "rewards/margins": 14.7387113571167, + "rewards/rejected": -22.132863998413086, + "step": 34250 + }, + { + "epoch": 2.04, + "learning_rate": 1.396408127457637e-06, + "logits/chosen": -2.5138823986053467, + "logits/rejected": -1.7918081283569336, + "logps/chosen": -787.5375366210938, + "logps/rejected": -2152.002685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.200006008148193, + "rewards/margins": 13.876543045043945, + "rewards/rejected": -21.076547622680664, + "step": 34260 + }, + { + "epoch": 2.04, + "learning_rate": 1.394851972405773e-06, + "logits/chosen": -2.444857120513916, + "logits/rejected": -1.7116763591766357, + "logps/chosen": -796.8357543945312, + "logps/rejected": -2238.62939453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.292193412780762, + "rewards/margins": 14.66880989074707, + "rewards/rejected": -21.96100616455078, + "step": 34270 + }, + { + "epoch": 2.04, + "learning_rate": 1.393296349372156e-06, + "logits/chosen": -2.5300135612487793, + "logits/rejected": -1.7771097421646118, + "logps/chosen": -807.907958984375, + "logps/rejected": -2277.6298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3668999671936035, + "rewards/margins": 14.967839241027832, + "rewards/rejected": -22.33473777770996, + "step": 34280 + }, + { + "epoch": 2.04, + "learning_rate": 1.3917412591056623e-06, + "logits/chosen": -2.4969825744628906, + "logits/rejected": -1.7854877710342407, + "logps/chosen": -809.2081298828125, + "logps/rejected": -2166.295654296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.422126770019531, + "rewards/margins": 13.797212600708008, + "rewards/rejected": -21.219337463378906, + "step": 34290 + }, + { + "epoch": 2.05, + "learning_rate": 1.390186702354912e-06, + "logits/chosen": -2.5350446701049805, + "logits/rejected": -1.77541983127594, + "logps/chosen": -818.230712890625, + "logps/rejected": -2199.02099609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.518530368804932, + "rewards/margins": 14.027021408081055, + "rewards/rejected": -21.545551300048828, + "step": 34300 + }, + { + "epoch": 2.05, + "learning_rate": 1.3886326798682691e-06, + "logits/chosen": -2.5678534507751465, + "logits/rejected": -1.8574835062026978, + "logps/chosen": -841.4056396484375, + "logps/rejected": -2403.12548828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.649685859680176, + "rewards/margins": 15.936233520507812, + "rewards/rejected": -23.585918426513672, + "step": 34310 + }, + { + "epoch": 2.05, + "learning_rate": 1.3870791923938408e-06, + "logits/chosen": -2.4721696376800537, + "logits/rejected": -1.721997857093811, + "logps/chosen": -810.27392578125, + "logps/rejected": -2251.363525390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.374458312988281, + "rewards/margins": 14.69279956817627, + "rewards/rejected": -22.067256927490234, + "step": 34320 + }, + { + "epoch": 2.05, + "learning_rate": 1.3855262406794733e-06, + "logits/chosen": -2.5438640117645264, + "logits/rejected": -1.6497876644134521, + "logps/chosen": -798.5442504882812, + "logps/rejected": -2210.00927734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.259193420410156, + "rewards/margins": 14.402918815612793, + "rewards/rejected": -21.662113189697266, + "step": 34330 + }, + { + "epoch": 2.05, + "learning_rate": 1.3839738254727617e-06, + "logits/chosen": -2.4921631813049316, + "logits/rejected": -1.7729063034057617, + "logps/chosen": -788.6676025390625, + "logps/rejected": -2239.535400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.2452592849731445, + "rewards/margins": 14.702966690063477, + "rewards/rejected": -21.948225021362305, + "step": 34340 + }, + { + "epoch": 2.05, + "learning_rate": 1.3824219475210337e-06, + "logits/chosen": -2.5219860076904297, + "logits/rejected": -1.7862632274627686, + "logps/chosen": -812.9715576171875, + "logps/rejected": -2197.457763671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.391839504241943, + "rewards/margins": 14.139782905578613, + "rewards/rejected": -21.53162384033203, + "step": 34350 + }, + { + "epoch": 2.05, + "learning_rate": 1.3808706075713682e-06, + "logits/chosen": -2.537139892578125, + "logits/rejected": -1.7249910831451416, + "logps/chosen": -799.6058959960938, + "logps/rejected": -2139.88720703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3189697265625, + "rewards/margins": 13.642303466796875, + "rewards/rejected": -20.961273193359375, + "step": 34360 + }, + { + "epoch": 2.05, + "learning_rate": 1.379319806370576e-06, + "logits/chosen": -2.5144615173339844, + "logits/rejected": -1.7678959369659424, + "logps/chosen": -792.0695190429688, + "logps/rejected": -2204.08740234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.270102500915527, + "rewards/margins": 14.322677612304688, + "rewards/rejected": -21.592782974243164, + "step": 34370 + }, + { + "epoch": 2.05, + "learning_rate": 1.3777695446652167e-06, + "logits/chosen": -2.523193120956421, + "logits/rejected": -1.7774581909179688, + "logps/chosen": -795.4185791015625, + "logps/rejected": -2240.257568359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.240029811859131, + "rewards/margins": 14.727131843566895, + "rewards/rejected": -21.967161178588867, + "step": 34380 + }, + { + "epoch": 2.05, + "learning_rate": 1.3762198232015823e-06, + "logits/chosen": -2.5283429622650146, + "logits/rejected": -1.8008285760879517, + "logps/chosen": -819.12548828125, + "logps/rejected": -2197.19921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5232343673706055, + "rewards/margins": 14.010421752929688, + "rewards/rejected": -21.53365707397461, + "step": 34390 + }, + { + "epoch": 2.05, + "learning_rate": 1.3746706427257122e-06, + "logits/chosen": -2.530510425567627, + "logits/rejected": -1.787541151046753, + "logps/chosen": -787.8037719726562, + "logps/rejected": -2173.997802734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1503801345825195, + "rewards/margins": 14.16205883026123, + "rewards/rejected": -21.31243896484375, + "step": 34400 + }, + { + "epoch": 2.05, + "learning_rate": 1.3731220039833798e-06, + "logits/chosen": -2.498396396636963, + "logits/rejected": -1.7950083017349243, + "logps/chosen": -818.5872192382812, + "logps/rejected": -2197.937744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4527387619018555, + "rewards/margins": 14.073068618774414, + "rewards/rejected": -21.525806427001953, + "step": 34410 + }, + { + "epoch": 2.05, + "learning_rate": 1.3715739077201002e-06, + "logits/chosen": -2.52899169921875, + "logits/rejected": -1.7399189472198486, + "logps/chosen": -808.1429443359375, + "logps/rejected": -2222.594970703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.380139350891113, + "rewards/margins": 14.411417961120605, + "rewards/rejected": -21.79155921936035, + "step": 34420 + }, + { + "epoch": 2.05, + "learning_rate": 1.370026354681127e-06, + "logits/chosen": -2.4842047691345215, + "logits/rejected": -1.7452627420425415, + "logps/chosen": -794.3866577148438, + "logps/rejected": -2157.595703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.234076023101807, + "rewards/margins": 13.893983840942383, + "rewards/rejected": -21.128063201904297, + "step": 34430 + }, + { + "epoch": 2.05, + "learning_rate": 1.3684793456114526e-06, + "logits/chosen": -2.4574973583221436, + "logits/rejected": -1.7642682790756226, + "logps/chosen": -843.6238403320312, + "logps/rejected": -2201.60498046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.712979316711426, + "rewards/margins": 13.862848281860352, + "rewards/rejected": -21.575824737548828, + "step": 34440 + }, + { + "epoch": 2.05, + "learning_rate": 1.3669328812558064e-06, + "logits/chosen": -2.493861675262451, + "logits/rejected": -1.685726523399353, + "logps/chosen": -785.5330810546875, + "logps/rejected": -2230.067138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.070505619049072, + "rewards/margins": 14.790141105651855, + "rewards/rejected": -21.860645294189453, + "step": 34450 + }, + { + "epoch": 2.05, + "learning_rate": 1.3653869623586568e-06, + "logits/chosen": -2.527322769165039, + "logits/rejected": -1.7490707635879517, + "logps/chosen": -818.6444702148438, + "logps/rejected": -2189.93505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.480867862701416, + "rewards/margins": 13.982742309570312, + "rewards/rejected": -21.46360969543457, + "step": 34460 + }, + { + "epoch": 2.06, + "learning_rate": 1.3638415896642093e-06, + "logits/chosen": -2.5469272136688232, + "logits/rejected": -1.8571422100067139, + "logps/chosen": -818.6285400390625, + "logps/rejected": -2292.29736328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.426120758056641, + "rewards/margins": 15.05920696258545, + "rewards/rejected": -22.485328674316406, + "step": 34470 + }, + { + "epoch": 2.06, + "learning_rate": 1.3622967639164059e-06, + "logits/chosen": -2.5160446166992188, + "logits/rejected": -1.7880566120147705, + "logps/chosen": -821.7969970703125, + "logps/rejected": -2302.359619140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5180511474609375, + "rewards/margins": 15.0651216506958, + "rewards/rejected": -22.583171844482422, + "step": 34480 + }, + { + "epoch": 2.06, + "learning_rate": 1.3607524858589256e-06, + "logits/chosen": -2.5739071369171143, + "logits/rejected": -1.8381913900375366, + "logps/chosen": -816.77294921875, + "logps/rejected": -2261.61669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.43662166595459, + "rewards/margins": 14.74498462677002, + "rewards/rejected": -22.18160629272461, + "step": 34490 + }, + { + "epoch": 2.06, + "learning_rate": 1.359208756235184e-06, + "logits/chosen": -2.5172760486602783, + "logits/rejected": -1.6628952026367188, + "logps/chosen": -805.3494262695312, + "logps/rejected": -2127.262451171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.331387519836426, + "rewards/margins": 13.508112907409668, + "rewards/rejected": -20.83949851989746, + "step": 34500 + }, + { + "epoch": 2.06, + "learning_rate": 1.357665575788333e-06, + "logits/chosen": -2.489934206008911, + "logits/rejected": -1.8193798065185547, + "logps/chosen": -778.5245971679688, + "logps/rejected": -2164.257080078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0729522705078125, + "rewards/margins": 14.13226318359375, + "rewards/rejected": -21.205217361450195, + "step": 34510 + }, + { + "epoch": 2.06, + "learning_rate": 1.356122945261259e-06, + "logits/chosen": -2.5119714736938477, + "logits/rejected": -1.757227897644043, + "logps/chosen": -807.6915283203125, + "logps/rejected": -2178.265869140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.384145259857178, + "rewards/margins": 13.971354484558105, + "rewards/rejected": -21.355499267578125, + "step": 34520 + }, + { + "epoch": 2.06, + "learning_rate": 1.3545808653965847e-06, + "logits/chosen": -2.493941307067871, + "logits/rejected": -1.7679609060287476, + "logps/chosen": -809.5314331054688, + "logps/rejected": -2158.583740234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3864593505859375, + "rewards/margins": 13.74388313293457, + "rewards/rejected": -21.130342483520508, + "step": 34530 + }, + { + "epoch": 2.06, + "learning_rate": 1.3530393369366674e-06, + "logits/chosen": -2.5002918243408203, + "logits/rejected": -1.7042357921600342, + "logps/chosen": -796.5634155273438, + "logps/rejected": -2280.86669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.267889499664307, + "rewards/margins": 15.094965934753418, + "rewards/rejected": -22.362857818603516, + "step": 34540 + }, + { + "epoch": 2.06, + "learning_rate": 1.3514983606235982e-06, + "logits/chosen": -2.517615556716919, + "logits/rejected": -1.804373025894165, + "logps/chosen": -797.4029541015625, + "logps/rejected": -2267.802001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.262181282043457, + "rewards/margins": 14.970476150512695, + "rewards/rejected": -22.232656478881836, + "step": 34550 + }, + { + "epoch": 2.06, + "learning_rate": 1.349957937199204e-06, + "logits/chosen": -2.5277302265167236, + "logits/rejected": -1.75689697265625, + "logps/chosen": -817.4351196289062, + "logps/rejected": -2197.221923828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4109296798706055, + "rewards/margins": 14.11932373046875, + "rewards/rejected": -21.53025245666504, + "step": 34560 + }, + { + "epoch": 2.06, + "learning_rate": 1.348418067405044e-06, + "logits/chosen": -2.5092737674713135, + "logits/rejected": -1.8004671335220337, + "logps/chosen": -798.6683349609375, + "logps/rejected": -2149.906494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.330264091491699, + "rewards/margins": 13.71953296661377, + "rewards/rejected": -21.0497989654541, + "step": 34570 + }, + { + "epoch": 2.06, + "learning_rate": 1.3468787519824118e-06, + "logits/chosen": -2.486771583557129, + "logits/rejected": -1.8487085103988647, + "logps/chosen": -825.2203979492188, + "logps/rejected": -2213.345947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.4788970947265625, + "rewards/margins": 14.199323654174805, + "rewards/rejected": -21.678220748901367, + "step": 34580 + }, + { + "epoch": 2.06, + "learning_rate": 1.3453399916723343e-06, + "logits/chosen": -2.528424024581909, + "logits/rejected": -1.8214601278305054, + "logps/chosen": -761.6395263671875, + "logps/rejected": -2262.77099609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.875138759613037, + "rewards/margins": 15.31170654296875, + "rewards/rejected": -22.186843872070312, + "step": 34590 + }, + { + "epoch": 2.06, + "learning_rate": 1.3438017872155684e-06, + "logits/chosen": -2.457334041595459, + "logits/rejected": -1.7455918788909912, + "logps/chosen": -795.9315795898438, + "logps/rejected": -2170.0537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.238150596618652, + "rewards/margins": 14.015439987182617, + "rewards/rejected": -21.253591537475586, + "step": 34600 + }, + { + "epoch": 2.06, + "learning_rate": 1.3422641393526093e-06, + "logits/chosen": -2.4972217082977295, + "logits/rejected": -1.795231819152832, + "logps/chosen": -802.5797729492188, + "logps/rejected": -2282.41015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.296549320220947, + "rewards/margins": 15.076533317565918, + "rewards/rejected": -22.37308120727539, + "step": 34610 + }, + { + "epoch": 2.06, + "learning_rate": 1.3407270488236769e-06, + "logits/chosen": -2.47100830078125, + "logits/rejected": -1.6924337148666382, + "logps/chosen": -826.7725830078125, + "logps/rejected": -2219.13427734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.553122043609619, + "rewards/margins": 14.19572925567627, + "rewards/rejected": -21.748851776123047, + "step": 34620 + }, + { + "epoch": 2.06, + "learning_rate": 1.3391905163687299e-06, + "logits/chosen": -2.495325803756714, + "logits/rejected": -1.7847445011138916, + "logps/chosen": -820.1849365234375, + "logps/rejected": -2168.59423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.483916282653809, + "rewards/margins": 13.776254653930664, + "rewards/rejected": -21.260168075561523, + "step": 34630 + }, + { + "epoch": 2.07, + "learning_rate": 1.3376545427274518e-06, + "logits/chosen": -2.455998182296753, + "logits/rejected": -1.7321354150772095, + "logps/chosen": -802.4401245117188, + "logps/rejected": -2202.46484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.318572998046875, + "rewards/margins": 14.26146411895752, + "rewards/rejected": -21.580039978027344, + "step": 34640 + }, + { + "epoch": 2.07, + "learning_rate": 1.3361191286392644e-06, + "logits/chosen": -2.5318989753723145, + "logits/rejected": -1.8519957065582275, + "logps/chosen": -795.5228881835938, + "logps/rejected": -2268.722900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.240797519683838, + "rewards/margins": 15.004323959350586, + "rewards/rejected": -22.245121002197266, + "step": 34650 + }, + { + "epoch": 2.07, + "learning_rate": 1.3345842748433113e-06, + "logits/chosen": -2.51600980758667, + "logits/rejected": -1.6499464511871338, + "logps/chosen": -810.2525634765625, + "logps/rejected": -2281.2197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.423464775085449, + "rewards/margins": 14.938644409179688, + "rewards/rejected": -22.36210823059082, + "step": 34660 + }, + { + "epoch": 2.07, + "learning_rate": 1.3330499820784753e-06, + "logits/chosen": -2.509093999862671, + "logits/rejected": -1.7927805185317993, + "logps/chosen": -810.0060424804688, + "logps/rejected": -2214.026611328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.38205099105835, + "rewards/margins": 14.318690299987793, + "rewards/rejected": -21.700740814208984, + "step": 34670 + }, + { + "epoch": 2.07, + "learning_rate": 1.3315162510833623e-06, + "logits/chosen": -2.513042449951172, + "logits/rejected": -1.7473480701446533, + "logps/chosen": -790.429931640625, + "logps/rejected": -2152.13232421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.236876487731934, + "rewards/margins": 13.848329544067383, + "rewards/rejected": -21.085205078125, + "step": 34680 + }, + { + "epoch": 2.07, + "learning_rate": 1.3299830825963108e-06, + "logits/chosen": -2.504176139831543, + "logits/rejected": -1.809409737586975, + "logps/chosen": -802.7638549804688, + "logps/rejected": -2189.394775390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.342413425445557, + "rewards/margins": 14.107324600219727, + "rewards/rejected": -21.449737548828125, + "step": 34690 + }, + { + "epoch": 2.07, + "learning_rate": 1.3284504773553889e-06, + "logits/chosen": -2.4617526531219482, + "logits/rejected": -1.7847099304199219, + "logps/chosen": -819.0577392578125, + "logps/rejected": -2232.755126953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5128960609436035, + "rewards/margins": 14.365511894226074, + "rewards/rejected": -21.878408432006836, + "step": 34700 + }, + { + "epoch": 2.07, + "learning_rate": 1.3269184360983919e-06, + "logits/chosen": -2.4528801441192627, + "logits/rejected": -1.728219985961914, + "logps/chosen": -789.573974609375, + "logps/rejected": -2210.92724609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1494879722595215, + "rewards/margins": 14.511924743652344, + "rewards/rejected": -21.66141128540039, + "step": 34710 + }, + { + "epoch": 2.07, + "learning_rate": 1.325386959562845e-06, + "logits/chosen": -2.514634370803833, + "logits/rejected": -1.8637981414794922, + "logps/chosen": -782.4089965820312, + "logps/rejected": -2140.462158203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.094649314880371, + "rewards/margins": 13.873323440551758, + "rewards/rejected": -20.967971801757812, + "step": 34720 + }, + { + "epoch": 2.07, + "learning_rate": 1.3238560484860008e-06, + "logits/chosen": -2.5251643657684326, + "logits/rejected": -1.8345845937728882, + "logps/chosen": -815.6251831054688, + "logps/rejected": -2320.75, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.369692325592041, + "rewards/margins": 15.39610767364502, + "rewards/rejected": -22.765798568725586, + "step": 34730 + }, + { + "epoch": 2.07, + "learning_rate": 1.3223257036048395e-06, + "logits/chosen": -2.5265631675720215, + "logits/rejected": -1.8014147281646729, + "logps/chosen": -807.2095947265625, + "logps/rejected": -2234.87255859375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.345133304595947, + "rewards/margins": 14.547691345214844, + "rewards/rejected": -21.892822265625, + "step": 34740 + }, + { + "epoch": 2.07, + "learning_rate": 1.3207959256560699e-06, + "logits/chosen": -2.5096588134765625, + "logits/rejected": -1.7773797512054443, + "logps/chosen": -818.380859375, + "logps/rejected": -2359.66796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.540708065032959, + "rewards/margins": 15.615072250366211, + "rewards/rejected": -23.155780792236328, + "step": 34750 + }, + { + "epoch": 2.07, + "learning_rate": 1.3192667153761263e-06, + "logits/chosen": -2.4820916652679443, + "logits/rejected": -1.6814100742340088, + "logps/chosen": -808.8173217773438, + "logps/rejected": -2313.9765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3797149658203125, + "rewards/margins": 15.31364631652832, + "rewards/rejected": -22.693363189697266, + "step": 34760 + }, + { + "epoch": 2.07, + "learning_rate": 1.3177380735011714e-06, + "logits/chosen": -2.517345905303955, + "logits/rejected": -1.8202388286590576, + "logps/chosen": -807.4417724609375, + "logps/rejected": -2244.39599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.350807189941406, + "rewards/margins": 14.655540466308594, + "rewards/rejected": -22.00634765625, + "step": 34770 + }, + { + "epoch": 2.07, + "learning_rate": 1.3162100007670922e-06, + "logits/chosen": -2.4541287422180176, + "logits/rejected": -1.7364451885223389, + "logps/chosen": -815.5255126953125, + "logps/rejected": -2206.645751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.380183219909668, + "rewards/margins": 14.238546371459961, + "rewards/rejected": -21.618732452392578, + "step": 34780 + }, + { + "epoch": 2.07, + "learning_rate": 1.3146824979095042e-06, + "logits/chosen": -2.518389940261841, + "logits/rejected": -1.8141193389892578, + "logps/chosen": -800.7112426757812, + "logps/rejected": -2165.831298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.245606422424316, + "rewards/margins": 13.970483779907227, + "rewards/rejected": -21.216087341308594, + "step": 34790 + }, + { + "epoch": 2.08, + "learning_rate": 1.3131555656637459e-06, + "logits/chosen": -2.5471994876861572, + "logits/rejected": -1.833653450012207, + "logps/chosen": -805.3031005859375, + "logps/rejected": -2202.95361328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.359830379486084, + "rewards/margins": 14.22161865234375, + "rewards/rejected": -21.58144760131836, + "step": 34800 + }, + { + "epoch": 2.08, + "learning_rate": 1.3116292047648834e-06, + "logits/chosen": -2.5081703662872314, + "logits/rejected": -1.8227312564849854, + "logps/chosen": -821.4228515625, + "logps/rejected": -2266.66455078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.5440354347229, + "rewards/margins": 14.688859939575195, + "rewards/rejected": -22.232894897460938, + "step": 34810 + }, + { + "epoch": 2.08, + "learning_rate": 1.3101034159477066e-06, + "logits/chosen": -2.502800226211548, + "logits/rejected": -1.7801271677017212, + "logps/chosen": -835.6481323242188, + "logps/rejected": -2298.56982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.67772912979126, + "rewards/margins": 14.8773193359375, + "rewards/rejected": -22.55504608154297, + "step": 34820 + }, + { + "epoch": 2.08, + "learning_rate": 1.3085781999467303e-06, + "logits/chosen": -2.5139145851135254, + "logits/rejected": -1.7778466939926147, + "logps/chosen": -808.8766479492188, + "logps/rejected": -2183.044677734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.3918561935424805, + "rewards/margins": 14.004404067993164, + "rewards/rejected": -21.396257400512695, + "step": 34830 + }, + { + "epoch": 2.08, + "learning_rate": 1.3070535574961935e-06, + "logits/chosen": -2.5286993980407715, + "logits/rejected": -1.8556945323944092, + "logps/chosen": -791.0948486328125, + "logps/rejected": -2259.6201171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.19167947769165, + "rewards/margins": 14.964681625366211, + "rewards/rejected": -22.156360626220703, + "step": 34840 + }, + { + "epoch": 2.08, + "learning_rate": 1.3055294893300584e-06, + "logits/chosen": -2.5054328441619873, + "logits/rejected": -1.7828699350357056, + "logps/chosen": -809.8742065429688, + "logps/rejected": -2161.008056640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.390506744384766, + "rewards/margins": 13.787820816040039, + "rewards/rejected": -21.178329467773438, + "step": 34850 + }, + { + "epoch": 2.08, + "learning_rate": 1.3040059961820135e-06, + "logits/chosen": -2.5405287742614746, + "logits/rejected": -1.7897207736968994, + "logps/chosen": -758.90673828125, + "logps/rejected": -2076.332763671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.896737575531006, + "rewards/margins": 13.419672012329102, + "rewards/rejected": -20.316410064697266, + "step": 34860 + }, + { + "epoch": 2.08, + "learning_rate": 1.3024830787854643e-06, + "logits/chosen": -2.5315659046173096, + "logits/rejected": -1.798990249633789, + "logps/chosen": -812.3602905273438, + "logps/rejected": -2209.906005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.405311584472656, + "rewards/margins": 14.243309020996094, + "rewards/rejected": -21.64862060546875, + "step": 34870 + }, + { + "epoch": 2.08, + "learning_rate": 1.3009607378735477e-06, + "logits/chosen": -2.515860080718994, + "logits/rejected": -1.7381019592285156, + "logps/chosen": -808.0928955078125, + "logps/rejected": -2142.47509765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.39577579498291, + "rewards/margins": 13.59552001953125, + "rewards/rejected": -20.99129295349121, + "step": 34880 + }, + { + "epoch": 2.08, + "learning_rate": 1.2994389741791152e-06, + "logits/chosen": -2.483663320541382, + "logits/rejected": -1.6500955820083618, + "logps/chosen": -797.860595703125, + "logps/rejected": -2200.30126953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.311213493347168, + "rewards/margins": 14.250442504882812, + "rewards/rejected": -21.561656951904297, + "step": 34890 + }, + { + "epoch": 2.08, + "learning_rate": 1.2979177884347467e-06, + "logits/chosen": -2.5160491466522217, + "logits/rejected": -1.8222248554229736, + "logps/chosen": -782.2860107421875, + "logps/rejected": -2172.06005859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1596174240112305, + "rewards/margins": 14.111261367797852, + "rewards/rejected": -21.2708797454834, + "step": 34900 + }, + { + "epoch": 2.08, + "learning_rate": 1.2963971813727377e-06, + "logits/chosen": -2.5597829818725586, + "logits/rejected": -1.7712081670761108, + "logps/chosen": -767.5783081054688, + "logps/rejected": -2193.233154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.933169364929199, + "rewards/margins": 14.565454483032227, + "rewards/rejected": -21.49862289428711, + "step": 34910 + }, + { + "epoch": 2.08, + "learning_rate": 1.294877153725112e-06, + "logits/chosen": -2.467837333679199, + "logits/rejected": -1.6451876163482666, + "logps/chosen": -773.7822875976562, + "logps/rejected": -2076.495361328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.999284267425537, + "rewards/margins": 13.32348918914795, + "rewards/rejected": -20.32277488708496, + "step": 34920 + }, + { + "epoch": 2.08, + "learning_rate": 1.2933577062236085e-06, + "logits/chosen": -2.530395984649658, + "logits/rejected": -1.7589704990386963, + "logps/chosen": -778.6793212890625, + "logps/rejected": -2283.983642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.047213554382324, + "rewards/margins": 15.336706161499023, + "rewards/rejected": -22.383920669555664, + "step": 34930 + }, + { + "epoch": 2.08, + "learning_rate": 1.2918388395996894e-06, + "logits/chosen": -2.560093402862549, + "logits/rejected": -1.8280225992202759, + "logps/chosen": -764.8896484375, + "logps/rejected": -2175.24072265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.929444313049316, + "rewards/margins": 14.388376235961914, + "rewards/rejected": -21.317819595336914, + "step": 34940 + }, + { + "epoch": 2.08, + "learning_rate": 1.2903205545845378e-06, + "logits/chosen": -2.534644603729248, + "logits/rejected": -1.8274320363998413, + "logps/chosen": -790.5472412109375, + "logps/rejected": -2265.8359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.224372863769531, + "rewards/margins": 14.97332763671875, + "rewards/rejected": -22.19770050048828, + "step": 34950 + }, + { + "epoch": 2.08, + "learning_rate": 1.288802851909056e-06, + "logits/chosen": -2.4403793811798096, + "logits/rejected": -1.759124755859375, + "logps/chosen": -793.7973022460938, + "logps/rejected": -2137.794677734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.22207498550415, + "rewards/margins": 13.722038269042969, + "rewards/rejected": -20.944110870361328, + "step": 34960 + }, + { + "epoch": 2.09, + "learning_rate": 1.2872857323038657e-06, + "logits/chosen": -2.54378342628479, + "logits/rejected": -1.7931878566741943, + "logps/chosen": -766.8672485351562, + "logps/rejected": -2201.69091796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9246368408203125, + "rewards/margins": 14.654594421386719, + "rewards/rejected": -21.579233169555664, + "step": 34970 + }, + { + "epoch": 2.09, + "learning_rate": 1.285769196499308e-06, + "logits/chosen": -2.51115345954895, + "logits/rejected": -1.8159217834472656, + "logps/chosen": -761.2904052734375, + "logps/rejected": -2108.93017578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.945725440979004, + "rewards/margins": 13.701403617858887, + "rewards/rejected": -20.647130966186523, + "step": 34980 + }, + { + "epoch": 2.09, + "learning_rate": 1.2842532452254442e-06, + "logits/chosen": -2.4949393272399902, + "logits/rejected": -1.7156842947006226, + "logps/chosen": -752.3436889648438, + "logps/rejected": -2179.467041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.847900390625, + "rewards/margins": 14.511503219604492, + "rewards/rejected": -21.359403610229492, + "step": 34990 + }, + { + "epoch": 2.09, + "learning_rate": 1.282737879212052e-06, + "logits/chosen": -2.51965594291687, + "logits/rejected": -1.7309939861297607, + "logps/chosen": -753.8159790039062, + "logps/rejected": -2213.89306640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.782755374908447, + "rewards/margins": 14.922477722167969, + "rewards/rejected": -21.70523452758789, + "step": 35000 + }, + { + "epoch": 2.09, + "eval_logits/chosen": -2.4632515907287598, + "eval_logits/rejected": -1.9800091981887817, + "eval_logps/chosen": -817.44775390625, + "eval_logps/rejected": -2071.88623046875, + "eval_loss": 2.3736276489216834e-05, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": -7.503733158111572, + "eval_rewards/margins": 12.759647369384766, + "eval_rewards/rejected": -20.263381958007812, + "eval_runtime": 3.9014, + "eval_samples_per_second": 1.282, + "eval_steps_per_second": 0.256, + "step": 35000 + }, + { + "epoch": 2.09, + "learning_rate": 1.28122309918863e-06, + "logits/chosen": -2.493863821029663, + "logits/rejected": -1.7761589288711548, + "logps/chosen": -779.875, + "logps/rejected": -2239.20751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.034693241119385, + "rewards/margins": 14.918821334838867, + "rewards/rejected": -21.95351791381836, + "step": 35010 + }, + { + "epoch": 2.09, + "learning_rate": 1.279708905884392e-06, + "logits/chosen": -2.4750921726226807, + "logits/rejected": -1.768174409866333, + "logps/chosen": -755.0006103515625, + "logps/rejected": -2111.52734375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.839388370513916, + "rewards/margins": 13.839503288269043, + "rewards/rejected": -20.678890228271484, + "step": 35020 + }, + { + "epoch": 2.09, + "learning_rate": 1.2781953000282712e-06, + "logits/chosen": -2.5255022048950195, + "logits/rejected": -1.8301340341567993, + "logps/chosen": -758.30126953125, + "logps/rejected": -2231.516845703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.924992561340332, + "rewards/margins": 14.933561325073242, + "rewards/rejected": -21.85855484008789, + "step": 35030 + }, + { + "epoch": 2.09, + "learning_rate": 1.2766822823489175e-06, + "logits/chosen": -2.5160019397735596, + "logits/rejected": -1.7099788188934326, + "logps/chosen": -765.557373046875, + "logps/rejected": -2140.430419921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.91058349609375, + "rewards/margins": 14.044891357421875, + "rewards/rejected": -20.955474853515625, + "step": 35040 + }, + { + "epoch": 2.09, + "learning_rate": 1.2751698535746976e-06, + "logits/chosen": -2.5070643424987793, + "logits/rejected": -1.6588973999023438, + "logps/chosen": -765.4444580078125, + "logps/rejected": -2250.58984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.92401647567749, + "rewards/margins": 15.130536079406738, + "rewards/rejected": -22.054553985595703, + "step": 35050 + }, + { + "epoch": 2.09, + "learning_rate": 1.2736580144336941e-06, + "logits/chosen": -2.5255322456359863, + "logits/rejected": -1.772782564163208, + "logps/chosen": -763.9199829101562, + "logps/rejected": -2171.54150390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.94464635848999, + "rewards/margins": 14.334625244140625, + "rewards/rejected": -21.279272079467773, + "step": 35060 + }, + { + "epoch": 2.09, + "learning_rate": 1.2721467656537074e-06, + "logits/chosen": -2.5325794219970703, + "logits/rejected": -1.8266637325286865, + "logps/chosen": -795.626708984375, + "logps/rejected": -2188.731689453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.246699333190918, + "rewards/margins": 14.1951904296875, + "rewards/rejected": -21.441890716552734, + "step": 35070 + }, + { + "epoch": 2.09, + "learning_rate": 1.2706361079622514e-06, + "logits/chosen": -2.5467278957366943, + "logits/rejected": -1.902722716331482, + "logps/chosen": -774.6323852539062, + "logps/rejected": -2195.666748046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.049210548400879, + "rewards/margins": 14.465089797973633, + "rewards/rejected": -21.514301300048828, + "step": 35080 + }, + { + "epoch": 2.09, + "learning_rate": 1.2691260420865582e-06, + "logits/chosen": -2.5613765716552734, + "logits/rejected": -1.8324003219604492, + "logps/chosen": -782.9759521484375, + "logps/rejected": -2231.0224609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1499505043029785, + "rewards/margins": 14.723426818847656, + "rewards/rejected": -21.873376846313477, + "step": 35090 + }, + { + "epoch": 2.09, + "learning_rate": 1.2676165687535719e-06, + "logits/chosen": -2.54140043258667, + "logits/rejected": -1.7386831045150757, + "logps/chosen": -785.3960571289062, + "logps/rejected": -2243.388671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.1618828773498535, + "rewards/margins": 14.817594528198242, + "rewards/rejected": -21.979476928710938, + "step": 35100 + }, + { + "epoch": 2.09, + "learning_rate": 1.266107688689955e-06, + "logits/chosen": -2.5072546005249023, + "logits/rejected": -1.769521713256836, + "logps/chosen": -755.4143676757812, + "logps/rejected": -2187.22314453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.861536502838135, + "rewards/margins": 14.563687324523926, + "rewards/rejected": -21.42522430419922, + "step": 35110 + }, + { + "epoch": 2.09, + "learning_rate": 1.2645994026220789e-06, + "logits/chosen": -2.5386016368865967, + "logits/rejected": -1.8022708892822266, + "logps/chosen": -784.450439453125, + "logps/rejected": -2206.08154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.159038543701172, + "rewards/margins": 14.456991195678711, + "rewards/rejected": -21.616031646728516, + "step": 35120 + }, + { + "epoch": 2.09, + "learning_rate": 1.2630917112760365e-06, + "logits/chosen": -2.5130481719970703, + "logits/rejected": -1.824979543685913, + "logps/chosen": -755.7005615234375, + "logps/rejected": -2190.8193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.917962074279785, + "rewards/margins": 14.550722122192383, + "rewards/rejected": -21.468685150146484, + "step": 35130 + }, + { + "epoch": 2.1, + "learning_rate": 1.261584615377627e-06, + "logits/chosen": -2.5016815662384033, + "logits/rejected": -1.7003854513168335, + "logps/chosen": -782.4766845703125, + "logps/rejected": -2170.5302734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.067788600921631, + "rewards/margins": 14.180340766906738, + "rewards/rejected": -21.248130798339844, + "step": 35140 + }, + { + "epoch": 2.1, + "learning_rate": 1.2600781156523695e-06, + "logits/chosen": -2.4971961975097656, + "logits/rejected": -1.7406575679779053, + "logps/chosen": -760.4486694335938, + "logps/rejected": -2150.88037109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.926634311676025, + "rewards/margins": 14.12077522277832, + "rewards/rejected": -21.047409057617188, + "step": 35150 + }, + { + "epoch": 2.1, + "learning_rate": 1.2585722128254896e-06, + "logits/chosen": -2.5131640434265137, + "logits/rejected": -1.7590163946151733, + "logps/chosen": -766.4987182617188, + "logps/rejected": -2173.13623046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.958189487457275, + "rewards/margins": 14.322608947753906, + "rewards/rejected": -21.280799865722656, + "step": 35160 + }, + { + "epoch": 2.1, + "learning_rate": 1.2570669076219322e-06, + "logits/chosen": -2.477689027786255, + "logits/rejected": -1.6795263290405273, + "logps/chosen": -793.2244873046875, + "logps/rejected": -2157.77197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.241261959075928, + "rewards/margins": 13.906590461730957, + "rewards/rejected": -21.14785385131836, + "step": 35170 + }, + { + "epoch": 2.1, + "learning_rate": 1.2555622007663482e-06, + "logits/chosen": -2.5274648666381836, + "logits/rejected": -1.8773510456085205, + "logps/chosen": -766.1098022460938, + "logps/rejected": -2207.69970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.961302280426025, + "rewards/margins": 14.68235969543457, + "rewards/rejected": -21.643661499023438, + "step": 35180 + }, + { + "epoch": 2.1, + "learning_rate": 1.2540580929831065e-06, + "logits/chosen": -2.4492805004119873, + "logits/rejected": -1.666002631187439, + "logps/chosen": -762.7594604492188, + "logps/rejected": -2231.232666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.941233158111572, + "rewards/margins": 14.92901611328125, + "rewards/rejected": -21.870250701904297, + "step": 35190 + }, + { + "epoch": 2.1, + "learning_rate": 1.2525545849962817e-06, + "logits/chosen": -2.511404275894165, + "logits/rejected": -1.7908422946929932, + "logps/chosen": -762.40234375, + "logps/rejected": -2119.818115234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.935793876647949, + "rewards/margins": 13.806129455566406, + "rewards/rejected": -20.741924285888672, + "step": 35200 + }, + { + "epoch": 2.1, + "learning_rate": 1.251051677529664e-06, + "logits/chosen": -2.5348618030548096, + "logits/rejected": -1.7890703678131104, + "logps/chosen": -767.2861328125, + "logps/rejected": -2143.676025390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9003705978393555, + "rewards/margins": 14.083111763000488, + "rewards/rejected": -20.98348045349121, + "step": 35210 + }, + { + "epoch": 2.1, + "learning_rate": 1.249549371306753e-06, + "logits/chosen": -2.5344696044921875, + "logits/rejected": -1.7154639959335327, + "logps/chosen": -766.6055908203125, + "logps/rejected": -2257.061279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.940455436706543, + "rewards/margins": 15.19518756866455, + "rewards/rejected": -22.13564109802246, + "step": 35220 + }, + { + "epoch": 2.1, + "learning_rate": 1.2480476670507586e-06, + "logits/chosen": -2.4941744804382324, + "logits/rejected": -1.6548881530761719, + "logps/chosen": -791.9114379882812, + "logps/rejected": -2192.3876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.220780372619629, + "rewards/margins": 14.26945972442627, + "rewards/rejected": -21.490238189697266, + "step": 35230 + }, + { + "epoch": 2.1, + "learning_rate": 1.2465465654846012e-06, + "logits/chosen": -2.542238712310791, + "logits/rejected": -1.8667757511138916, + "logps/chosen": -753.8477783203125, + "logps/rejected": -2227.921142578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8266921043396, + "rewards/margins": 15.00721263885498, + "rewards/rejected": -21.833904266357422, + "step": 35240 + }, + { + "epoch": 2.1, + "learning_rate": 1.2450460673309115e-06, + "logits/chosen": -2.5405900478363037, + "logits/rejected": -1.8778146505355835, + "logps/chosen": -764.174560546875, + "logps/rejected": -2106.381591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.993325233459473, + "rewards/margins": 13.639516830444336, + "rewards/rejected": -20.632841110229492, + "step": 35250 + }, + { + "epoch": 2.1, + "learning_rate": 1.2435461733120289e-06, + "logits/chosen": -2.499565362930298, + "logits/rejected": -1.7913424968719482, + "logps/chosen": -779.1949462890625, + "logps/rejected": -2165.77197265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.070684909820557, + "rewards/margins": 14.148025512695312, + "rewards/rejected": -21.21870994567871, + "step": 35260 + }, + { + "epoch": 2.1, + "learning_rate": 1.2420468841500023e-06, + "logits/chosen": -2.5154812335968018, + "logits/rejected": -1.7499526739120483, + "logps/chosen": -781.2235107421875, + "logps/rejected": -2190.246826171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.111737251281738, + "rewards/margins": 14.335515975952148, + "rewards/rejected": -21.447250366210938, + "step": 35270 + }, + { + "epoch": 2.1, + "learning_rate": 1.2405482005665894e-06, + "logits/chosen": -2.504633903503418, + "logits/rejected": -1.839787244796753, + "logps/chosen": -773.8240356445312, + "logps/rejected": -2273.861083984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.012209892272949, + "rewards/margins": 15.270624160766602, + "rewards/rejected": -22.282833099365234, + "step": 35280 + }, + { + "epoch": 2.1, + "learning_rate": 1.2390501232832569e-06, + "logits/chosen": -2.527256727218628, + "logits/rejected": -1.7486133575439453, + "logps/chosen": -756.231689453125, + "logps/rejected": -2233.87451171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.844770908355713, + "rewards/margins": 15.04747486114502, + "rewards/rejected": -21.89224624633789, + "step": 35290 + }, + { + "epoch": 2.1, + "learning_rate": 1.2375526530211793e-06, + "logits/chosen": -2.5326733589172363, + "logits/rejected": -1.8790134191513062, + "logps/chosen": -791.2854614257812, + "logps/rejected": -2271.482177734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.240699768066406, + "rewards/margins": 15.02497386932373, + "rewards/rejected": -22.265674591064453, + "step": 35300 + }, + { + "epoch": 2.11, + "learning_rate": 1.236055790501238e-06, + "logits/chosen": -2.527858018875122, + "logits/rejected": -1.7361268997192383, + "logps/chosen": -762.3004760742188, + "logps/rejected": -2257.587890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.914552211761475, + "rewards/margins": 15.223806381225586, + "rewards/rejected": -22.13835906982422, + "step": 35310 + }, + { + "epoch": 2.11, + "learning_rate": 1.2345595364440233e-06, + "logits/chosen": -2.481924533843994, + "logits/rejected": -1.847536325454712, + "logps/chosen": -776.4842529296875, + "logps/rejected": -2183.201904296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0170793533325195, + "rewards/margins": 14.36125659942627, + "rewards/rejected": -21.37833595275879, + "step": 35320 + }, + { + "epoch": 2.11, + "learning_rate": 1.2330638915698316e-06, + "logits/chosen": -2.535855531692505, + "logits/rejected": -1.763109564781189, + "logps/chosen": -765.4485473632812, + "logps/rejected": -2252.76904296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.89141845703125, + "rewards/margins": 15.184221267700195, + "rewards/rejected": -22.075639724731445, + "step": 35330 + }, + { + "epoch": 2.11, + "learning_rate": 1.231568856598666e-06, + "logits/chosen": -2.5181872844696045, + "logits/rejected": -1.8237661123275757, + "logps/chosen": -763.3728637695312, + "logps/rejected": -2210.21630859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.914176940917969, + "rewards/margins": 14.73481559753418, + "rewards/rejected": -21.648990631103516, + "step": 35340 + }, + { + "epoch": 2.11, + "learning_rate": 1.230074432250237e-06, + "logits/chosen": -2.5184953212738037, + "logits/rejected": -1.7959696054458618, + "logps/chosen": -757.4229736328125, + "logps/rejected": -2156.96142578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.895613193511963, + "rewards/margins": 14.239511489868164, + "rewards/rejected": -21.13512420654297, + "step": 35350 + }, + { + "epoch": 2.11, + "learning_rate": 1.22858061924396e-06, + "logits/chosen": -2.4597039222717285, + "logits/rejected": -1.8264068365097046, + "logps/chosen": -806.5313110351562, + "logps/rejected": -2259.10888671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.319169521331787, + "rewards/margins": 14.833189010620117, + "rewards/rejected": -22.152360916137695, + "step": 35360 + }, + { + "epoch": 2.11, + "learning_rate": 1.2270874182989566e-06, + "logits/chosen": -2.519334554672241, + "logits/rejected": -1.9138520956039429, + "logps/chosen": -759.2161254882812, + "logps/rejected": -2174.520263671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.901740074157715, + "rewards/margins": 14.393510818481445, + "rewards/rejected": -21.295249938964844, + "step": 35370 + }, + { + "epoch": 2.11, + "learning_rate": 1.225594830134055e-06, + "logits/chosen": -2.5551857948303223, + "logits/rejected": -1.8547738790512085, + "logps/chosen": -763.0902099609375, + "logps/rejected": -2115.551513671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.898102760314941, + "rewards/margins": 13.836024284362793, + "rewards/rejected": -20.734127044677734, + "step": 35380 + }, + { + "epoch": 2.11, + "learning_rate": 1.2241028554677838e-06, + "logits/chosen": -2.5506930351257324, + "logits/rejected": -1.8079029321670532, + "logps/chosen": -762.3350830078125, + "logps/rejected": -2228.41357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.896676063537598, + "rewards/margins": 14.938359260559082, + "rewards/rejected": -21.835033416748047, + "step": 35390 + }, + { + "epoch": 2.11, + "learning_rate": 1.2226114950183836e-06, + "logits/chosen": -2.480607509613037, + "logits/rejected": -1.779334306716919, + "logps/chosen": -784.0593872070312, + "logps/rejected": -2193.01904296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.146989345550537, + "rewards/margins": 14.338401794433594, + "rewards/rejected": -21.485393524169922, + "step": 35400 + }, + { + "epoch": 2.11, + "learning_rate": 1.221120749503792e-06, + "logits/chosen": -2.5450439453125, + "logits/rejected": -1.8652617931365967, + "logps/chosen": -752.1537475585938, + "logps/rejected": -2142.37060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.823051452636719, + "rewards/margins": 14.172945976257324, + "rewards/rejected": -20.995996475219727, + "step": 35410 + }, + { + "epoch": 2.11, + "learning_rate": 1.2196306196416566e-06, + "logits/chosen": -2.513885259628296, + "logits/rejected": -1.8046365976333618, + "logps/chosen": -782.6736450195312, + "logps/rejected": -2109.344482421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.119285583496094, + "rewards/margins": 13.530838012695312, + "rewards/rejected": -20.650121688842773, + "step": 35420 + }, + { + "epoch": 2.11, + "learning_rate": 1.2181411061493229e-06, + "logits/chosen": -2.5208191871643066, + "logits/rejected": -1.7490818500518799, + "logps/chosen": -754.7550659179688, + "logps/rejected": -2193.770751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.830178737640381, + "rewards/margins": 14.663790702819824, + "rewards/rejected": -21.493968963623047, + "step": 35430 + }, + { + "epoch": 2.11, + "learning_rate": 1.2166522097438462e-06, + "logits/chosen": -2.5072011947631836, + "logits/rejected": -1.7749497890472412, + "logps/chosen": -719.0769653320312, + "logps/rejected": -2072.44970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.555998802185059, + "rewards/margins": 13.731892585754395, + "rewards/rejected": -20.287891387939453, + "step": 35440 + }, + { + "epoch": 2.11, + "learning_rate": 1.2151639311419775e-06, + "logits/chosen": -2.5199027061462402, + "logits/rejected": -1.8160051107406616, + "logps/chosen": -757.8133544921875, + "logps/rejected": -2159.763671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.859648704528809, + "rewards/margins": 14.301446914672852, + "rewards/rejected": -21.16109275817871, + "step": 35450 + }, + { + "epoch": 2.11, + "learning_rate": 1.213676271060178e-06, + "logits/chosen": -2.5655527114868164, + "logits/rejected": -1.8296695947647095, + "logps/chosen": -762.249267578125, + "logps/rejected": -2218.29443359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.936408042907715, + "rewards/margins": 14.819585800170898, + "rewards/rejected": -21.755992889404297, + "step": 35460 + }, + { + "epoch": 2.12, + "learning_rate": 1.212189230214604e-06, + "logits/chosen": -2.5286924839019775, + "logits/rejected": -1.705267310142517, + "logps/chosen": -761.1336669921875, + "logps/rejected": -2226.680908203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.833254814147949, + "rewards/margins": 14.976516723632812, + "rewards/rejected": -21.809772491455078, + "step": 35470 + }, + { + "epoch": 2.12, + "learning_rate": 1.2107028093211183e-06, + "logits/chosen": -2.528210163116455, + "logits/rejected": -1.6627299785614014, + "logps/chosen": -761.9856567382812, + "logps/rejected": -2104.13427734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.884787559509277, + "rewards/margins": 13.70622730255127, + "rewards/rejected": -20.591014862060547, + "step": 35480 + }, + { + "epoch": 2.12, + "learning_rate": 1.2092170090952838e-06, + "logits/chosen": -2.5042948722839355, + "logits/rejected": -1.74440598487854, + "logps/chosen": -775.145751953125, + "logps/rejected": -2164.60400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.051469326019287, + "rewards/margins": 14.155248641967773, + "rewards/rejected": -21.20671844482422, + "step": 35490 + }, + { + "epoch": 2.12, + "learning_rate": 1.2077318302523643e-06, + "logits/chosen": -2.539139747619629, + "logits/rejected": -1.8452341556549072, + "logps/chosen": -777.6885986328125, + "logps/rejected": -2195.36279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.013825416564941, + "rewards/margins": 14.498037338256836, + "rewards/rejected": -21.51186752319336, + "step": 35500 + }, + { + "epoch": 2.12, + "learning_rate": 1.2062472735073258e-06, + "logits/chosen": -2.5144972801208496, + "logits/rejected": -1.773206353187561, + "logps/chosen": -779.6707763671875, + "logps/rejected": -2087.9990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.159323215484619, + "rewards/margins": 13.276895523071289, + "rewards/rejected": -20.436222076416016, + "step": 35510 + }, + { + "epoch": 2.12, + "learning_rate": 1.204763339574833e-06, + "logits/chosen": -2.4852659702301025, + "logits/rejected": -1.819026231765747, + "logps/chosen": -770.87451171875, + "logps/rejected": -2136.97265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.0007123947143555, + "rewards/margins": 13.932759284973145, + "rewards/rejected": -20.9334716796875, + "step": 35520 + }, + { + "epoch": 2.12, + "learning_rate": 1.203280029169252e-06, + "logits/chosen": -2.4880270957946777, + "logits/rejected": -1.7189327478408813, + "logps/chosen": -757.5884399414062, + "logps/rejected": -2144.89990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.783506870269775, + "rewards/margins": 14.231244087219238, + "rewards/rejected": -21.014751434326172, + "step": 35530 + }, + { + "epoch": 2.12, + "learning_rate": 1.2017973430046475e-06, + "logits/chosen": -2.54349946975708, + "logits/rejected": -1.843909502029419, + "logps/chosen": -738.9613037109375, + "logps/rejected": -2215.08349609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.701947212219238, + "rewards/margins": 15.004844665527344, + "rewards/rejected": -21.7067928314209, + "step": 35540 + }, + { + "epoch": 2.12, + "learning_rate": 1.2003152817947878e-06, + "logits/chosen": -2.5145351886749268, + "logits/rejected": -1.7624866962432861, + "logps/chosen": -764.8316040039062, + "logps/rejected": -2134.0302734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.951152801513672, + "rewards/margins": 13.945849418640137, + "rewards/rejected": -20.897005081176758, + "step": 35550 + }, + { + "epoch": 2.12, + "learning_rate": 1.1988338462531335e-06, + "logits/chosen": -2.493203639984131, + "logits/rejected": -1.8175649642944336, + "logps/chosen": -779.061767578125, + "logps/rejected": -2217.333984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.057988166809082, + "rewards/margins": 14.6707763671875, + "rewards/rejected": -21.728763580322266, + "step": 35560 + }, + { + "epoch": 2.12, + "learning_rate": 1.197353037092849e-06, + "logits/chosen": -2.4978957176208496, + "logits/rejected": -1.7381460666656494, + "logps/chosen": -774.1071166992188, + "logps/rejected": -2130.068115234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.981685638427734, + "rewards/margins": 13.876266479492188, + "rewards/rejected": -20.857954025268555, + "step": 35570 + }, + { + "epoch": 2.12, + "learning_rate": 1.1958728550267958e-06, + "logits/chosen": -2.515733480453491, + "logits/rejected": -1.8209270238876343, + "logps/chosen": -776.6763916015625, + "logps/rejected": -2253.37255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.107924461364746, + "rewards/margins": 14.980929374694824, + "rewards/rejected": -22.08885383605957, + "step": 35580 + }, + { + "epoch": 2.12, + "learning_rate": 1.194393300767534e-06, + "logits/chosen": -2.517120838165283, + "logits/rejected": -1.8077774047851562, + "logps/chosen": -767.2945556640625, + "logps/rejected": -2212.826904296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.982731819152832, + "rewards/margins": 14.700889587402344, + "rewards/rejected": -21.68362045288086, + "step": 35590 + }, + { + "epoch": 2.12, + "learning_rate": 1.19291437502732e-06, + "logits/chosen": -2.4962525367736816, + "logits/rejected": -1.732994794845581, + "logps/chosen": -757.9554443359375, + "logps/rejected": -2228.772705078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.860767364501953, + "rewards/margins": 14.988809585571289, + "rewards/rejected": -21.84957504272461, + "step": 35600 + }, + { + "epoch": 2.12, + "learning_rate": 1.1914360785181099e-06, + "logits/chosen": -2.5409770011901855, + "logits/rejected": -1.7637784481048584, + "logps/chosen": -764.4785766601562, + "logps/rejected": -2230.077880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9566450119018555, + "rewards/margins": 14.908312797546387, + "rewards/rejected": -21.864959716796875, + "step": 35610 + }, + { + "epoch": 2.12, + "learning_rate": 1.1899584119515541e-06, + "logits/chosen": -2.537996768951416, + "logits/rejected": -1.8800480365753174, + "logps/chosen": -745.6024169921875, + "logps/rejected": -2209.75537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.743767738342285, + "rewards/margins": 14.92174243927002, + "rewards/rejected": -21.665508270263672, + "step": 35620 + }, + { + "epoch": 2.12, + "learning_rate": 1.1884813760390026e-06, + "logits/chosen": -2.5446434020996094, + "logits/rejected": -1.9018738269805908, + "logps/chosen": -737.2198486328125, + "logps/rejected": -2248.9912109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6692938804626465, + "rewards/margins": 15.378705978393555, + "rewards/rejected": -22.04800033569336, + "step": 35630 + }, + { + "epoch": 2.13, + "learning_rate": 1.1870049714915e-06, + "logits/chosen": -2.544764995574951, + "logits/rejected": -1.8091055154800415, + "logps/chosen": -746.7921142578125, + "logps/rejected": -2255.990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.717887878417969, + "rewards/margins": 15.405370712280273, + "rewards/rejected": -22.123258590698242, + "step": 35640 + }, + { + "epoch": 2.13, + "learning_rate": 1.1855291990197878e-06, + "logits/chosen": -2.4776535034179688, + "logits/rejected": -1.7586002349853516, + "logps/chosen": -750.110107421875, + "logps/rejected": -2224.25341796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8436689376831055, + "rewards/margins": 14.973515510559082, + "rewards/rejected": -21.817184448242188, + "step": 35650 + }, + { + "epoch": 2.13, + "learning_rate": 1.1840540593343012e-06, + "logits/chosen": -2.5017237663269043, + "logits/rejected": -1.7569961547851562, + "logps/chosen": -768.2918090820312, + "logps/rejected": -2142.35986328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.007707118988037, + "rewards/margins": 13.982221603393555, + "rewards/rejected": -20.98992919921875, + "step": 35660 + }, + { + "epoch": 2.13, + "learning_rate": 1.182579553145175e-06, + "logits/chosen": -2.5282676219940186, + "logits/rejected": -1.7605063915252686, + "logps/chosen": -753.0733032226562, + "logps/rejected": -2197.18408203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.880921363830566, + "rewards/margins": 14.655904769897461, + "rewards/rejected": -21.53682518005371, + "step": 35670 + }, + { + "epoch": 2.13, + "learning_rate": 1.1811056811622335e-06, + "logits/chosen": -2.514751434326172, + "logits/rejected": -1.8359451293945312, + "logps/chosen": -767.3258666992188, + "logps/rejected": -2190.841796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9829864501953125, + "rewards/margins": 14.494099617004395, + "rewards/rejected": -21.47708511352539, + "step": 35680 + }, + { + "epoch": 2.13, + "learning_rate": 1.1796324440950022e-06, + "logits/chosen": -2.504743814468384, + "logits/rejected": -1.7842490673065186, + "logps/chosen": -734.0618896484375, + "logps/rejected": -2149.21630859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6676130294799805, + "rewards/margins": 14.37205696105957, + "rewards/rejected": -21.0396728515625, + "step": 35690 + }, + { + "epoch": 2.13, + "learning_rate": 1.1781598426526935e-06, + "logits/chosen": -2.4700024127960205, + "logits/rejected": -1.9012635946273804, + "logps/chosen": -745.00927734375, + "logps/rejected": -2145.51025390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.734686851501465, + "rewards/margins": 14.2791109085083, + "rewards/rejected": -21.013797760009766, + "step": 35700 + }, + { + "epoch": 2.13, + "learning_rate": 1.1766878775442217e-06, + "logits/chosen": -2.496178150177002, + "logits/rejected": -1.7768771648406982, + "logps/chosen": -730.37353515625, + "logps/rejected": -2151.24365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.599545478820801, + "rewards/margins": 14.482586860656738, + "rewards/rejected": -21.082134246826172, + "step": 35710 + }, + { + "epoch": 2.13, + "learning_rate": 1.1752165494781867e-06, + "logits/chosen": -2.4877963066101074, + "logits/rejected": -1.7051067352294922, + "logps/chosen": -724.90234375, + "logps/rejected": -2192.704345703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.547292232513428, + "rewards/margins": 14.932449340820312, + "rewards/rejected": -21.479740142822266, + "step": 35720 + }, + { + "epoch": 2.13, + "learning_rate": 1.1737458591628898e-06, + "logits/chosen": -2.475821018218994, + "logits/rejected": -1.7455928325653076, + "logps/chosen": -740.7293701171875, + "logps/rejected": -2171.160400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.712590217590332, + "rewards/margins": 14.556386947631836, + "rewards/rejected": -21.26897430419922, + "step": 35730 + }, + { + "epoch": 2.13, + "learning_rate": 1.1722758073063184e-06, + "logits/chosen": -2.5279381275177, + "logits/rejected": -1.8597373962402344, + "logps/chosen": -746.1775512695312, + "logps/rejected": -2182.692138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.785155296325684, + "rewards/margins": 14.604316711425781, + "rewards/rejected": -21.389474868774414, + "step": 35740 + }, + { + "epoch": 2.13, + "learning_rate": 1.1708063946161557e-06, + "logits/chosen": -2.5289268493652344, + "logits/rejected": -1.7989673614501953, + "logps/chosen": -761.3115234375, + "logps/rejected": -2208.991455078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.960137367248535, + "rewards/margins": 14.687795639038086, + "rewards/rejected": -21.647933959960938, + "step": 35750 + }, + { + "epoch": 2.13, + "learning_rate": 1.1693376217997795e-06, + "logits/chosen": -2.542133331298828, + "logits/rejected": -1.8015763759613037, + "logps/chosen": -727.239013671875, + "logps/rejected": -2113.4140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6396660804748535, + "rewards/margins": 14.057214736938477, + "rewards/rejected": -20.696880340576172, + "step": 35760 + }, + { + "epoch": 2.13, + "learning_rate": 1.1678694895642534e-06, + "logits/chosen": -2.565883159637451, + "logits/rejected": -1.8954356908798218, + "logps/chosen": -757.8001098632812, + "logps/rejected": -2223.015869140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.873465061187744, + "rewards/margins": 14.90948486328125, + "rewards/rejected": -21.782949447631836, + "step": 35770 + }, + { + "epoch": 2.13, + "learning_rate": 1.1664019986163397e-06, + "logits/chosen": -2.477648973464966, + "logits/rejected": -1.6972516775131226, + "logps/chosen": -759.6632080078125, + "logps/rejected": -2125.4375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.844779968261719, + "rewards/margins": 13.965911865234375, + "rewards/rejected": -20.810691833496094, + "step": 35780 + }, + { + "epoch": 2.13, + "learning_rate": 1.164935149662485e-06, + "logits/chosen": -2.480358600616455, + "logits/rejected": -1.7526578903198242, + "logps/chosen": -753.9169311523438, + "logps/rejected": -2055.201416015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.892584323883057, + "rewards/margins": 13.224644660949707, + "rewards/rejected": -20.117229461669922, + "step": 35790 + }, + { + "epoch": 2.13, + "learning_rate": 1.1634689434088343e-06, + "logits/chosen": -2.494393825531006, + "logits/rejected": -1.8745445013046265, + "logps/chosen": -760.4014892578125, + "logps/rejected": -2315.51416015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8350629806518555, + "rewards/margins": 15.868589401245117, + "rewards/rejected": -22.70365333557129, + "step": 35800 + }, + { + "epoch": 2.14, + "learning_rate": 1.1620033805612153e-06, + "logits/chosen": -2.5407891273498535, + "logits/rejected": -1.8092048168182373, + "logps/chosen": -753.5479736328125, + "logps/rejected": -2152.63037109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.819807529449463, + "rewards/margins": 14.255261421203613, + "rewards/rejected": -21.0750675201416, + "step": 35810 + }, + { + "epoch": 2.14, + "learning_rate": 1.1605384618251533e-06, + "logits/chosen": -2.535147190093994, + "logits/rejected": -1.8346465826034546, + "logps/chosen": -726.1865234375, + "logps/rejected": -2221.88720703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5054168701171875, + "rewards/margins": 15.277097702026367, + "rewards/rejected": -21.782512664794922, + "step": 35820 + }, + { + "epoch": 2.14, + "learning_rate": 1.1590741879058582e-06, + "logits/chosen": -2.5370211601257324, + "logits/rejected": -1.8507074117660522, + "logps/chosen": -775.9602661132812, + "logps/rejected": -2249.76416015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.090360164642334, + "rewards/margins": 14.972384452819824, + "rewards/rejected": -22.062742233276367, + "step": 35830 + }, + { + "epoch": 2.14, + "learning_rate": 1.1576105595082319e-06, + "logits/chosen": -2.492814302444458, + "logits/rejected": -1.7816028594970703, + "logps/chosen": -756.2520751953125, + "logps/rejected": -2185.86181640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.870455265045166, + "rewards/margins": 14.541524887084961, + "rewards/rejected": -21.4119815826416, + "step": 35840 + }, + { + "epoch": 2.14, + "learning_rate": 1.156147577336865e-06, + "logits/chosen": -2.514139413833618, + "logits/rejected": -1.811819076538086, + "logps/chosen": -749.4071044921875, + "logps/rejected": -2181.6875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.782573699951172, + "rewards/margins": 14.586074829101562, + "rewards/rejected": -21.368648529052734, + "step": 35850 + }, + { + "epoch": 2.14, + "learning_rate": 1.1546852420960372e-06, + "logits/chosen": -2.4707236289978027, + "logits/rejected": -1.7054551839828491, + "logps/chosen": -738.0206298828125, + "logps/rejected": -2252.49951171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6606903076171875, + "rewards/margins": 15.419893264770508, + "rewards/rejected": -22.080585479736328, + "step": 35860 + }, + { + "epoch": 2.14, + "learning_rate": 1.1532235544897169e-06, + "logits/chosen": -2.5055553913116455, + "logits/rejected": -1.8260772228240967, + "logps/chosen": -780.2334594726562, + "logps/rejected": -2170.908203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.096200466156006, + "rewards/margins": 14.168869018554688, + "rewards/rejected": -21.26506996154785, + "step": 35870 + }, + { + "epoch": 2.14, + "learning_rate": 1.1517625152215603e-06, + "logits/chosen": -2.526804208755493, + "logits/rejected": -1.7922340631484985, + "logps/chosen": -749.2906494140625, + "logps/rejected": -2182.97265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.820781707763672, + "rewards/margins": 14.56261157989502, + "rewards/rejected": -21.383392333984375, + "step": 35880 + }, + { + "epoch": 2.14, + "learning_rate": 1.1503021249949115e-06, + "logits/chosen": -2.5200672149658203, + "logits/rejected": -1.9075943231582642, + "logps/chosen": -763.5596923828125, + "logps/rejected": -2206.37060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8944878578186035, + "rewards/margins": 14.733146667480469, + "rewards/rejected": -21.627635955810547, + "step": 35890 + }, + { + "epoch": 2.14, + "learning_rate": 1.1488423845128024e-06, + "logits/chosen": -2.5323266983032227, + "logits/rejected": -1.7435439825057983, + "logps/chosen": -750.6034545898438, + "logps/rejected": -2156.688232421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.743447780609131, + "rewards/margins": 14.379191398620605, + "rewards/rejected": -21.122638702392578, + "step": 35900 + }, + { + "epoch": 2.14, + "learning_rate": 1.1473832944779525e-06, + "logits/chosen": -2.562857151031494, + "logits/rejected": -1.8644033670425415, + "logps/chosen": -747.5382690429688, + "logps/rejected": -2202.771240234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.769830226898193, + "rewards/margins": 14.809762954711914, + "rewards/rejected": -21.579593658447266, + "step": 35910 + }, + { + "epoch": 2.14, + "learning_rate": 1.1459248555927682e-06, + "logits/chosen": -2.4508674144744873, + "logits/rejected": -1.717153549194336, + "logps/chosen": -763.8430786132812, + "logps/rejected": -2155.39453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.960906028747559, + "rewards/margins": 14.147028923034668, + "rewards/rejected": -21.107934951782227, + "step": 35920 + }, + { + "epoch": 2.14, + "learning_rate": 1.1444670685593392e-06, + "logits/chosen": -2.5180561542510986, + "logits/rejected": -1.8427133560180664, + "logps/chosen": -744.1820678710938, + "logps/rejected": -2186.14501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.70874547958374, + "rewards/margins": 14.70733642578125, + "rewards/rejected": -21.416080474853516, + "step": 35930 + }, + { + "epoch": 2.14, + "learning_rate": 1.1430099340794482e-06, + "logits/chosen": -2.5219502449035645, + "logits/rejected": -1.791283369064331, + "logps/chosen": -790.2024536132812, + "logps/rejected": -2176.63623046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.232775688171387, + "rewards/margins": 14.099794387817383, + "rewards/rejected": -21.332569122314453, + "step": 35940 + }, + { + "epoch": 2.14, + "learning_rate": 1.1415534528545556e-06, + "logits/chosen": -2.5074615478515625, + "logits/rejected": -1.8409404754638672, + "logps/chosen": -754.4575805664062, + "logps/rejected": -2138.861328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8069305419921875, + "rewards/margins": 14.129270553588867, + "rewards/rejected": -20.936199188232422, + "step": 35950 + }, + { + "epoch": 2.14, + "learning_rate": 1.1400976255858155e-06, + "logits/chosen": -2.49489164352417, + "logits/rejected": -1.7633451223373413, + "logps/chosen": -738.8604736328125, + "logps/rejected": -2217.88525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.67383337020874, + "rewards/margins": 15.061668395996094, + "rewards/rejected": -21.735504150390625, + "step": 35960 + }, + { + "epoch": 2.14, + "learning_rate": 1.138642452974059e-06, + "logits/chosen": -2.501620054244995, + "logits/rejected": -1.8117144107818604, + "logps/chosen": -730.2259521484375, + "logps/rejected": -2177.91650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.597209930419922, + "rewards/margins": 14.74298095703125, + "rewards/rejected": -21.340190887451172, + "step": 35970 + }, + { + "epoch": 2.15, + "learning_rate": 1.1371879357198093e-06, + "logits/chosen": -2.523405075073242, + "logits/rejected": -1.8224945068359375, + "logps/chosen": -750.9777221679688, + "logps/rejected": -2186.89306640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.831197261810303, + "rewards/margins": 14.593994140625, + "rewards/rejected": -21.425188064575195, + "step": 35980 + }, + { + "epoch": 2.15, + "learning_rate": 1.1357340745232698e-06, + "logits/chosen": -2.4813313484191895, + "logits/rejected": -1.8075981140136719, + "logps/chosen": -758.008056640625, + "logps/rejected": -2126.77978515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.871662139892578, + "rewards/margins": 13.962496757507324, + "rewards/rejected": -20.83415985107422, + "step": 35990 + }, + { + "epoch": 2.15, + "learning_rate": 1.1342808700843297e-06, + "logits/chosen": -2.5128588676452637, + "logits/rejected": -1.7729383707046509, + "logps/chosen": -752.0088500976562, + "logps/rejected": -2205.031005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.803342342376709, + "rewards/margins": 14.801420211791992, + "rewards/rejected": -21.604761123657227, + "step": 36000 + }, + { + "epoch": 2.15, + "learning_rate": 1.1328283231025623e-06, + "logits/chosen": -2.5655581951141357, + "logits/rejected": -1.8405323028564453, + "logps/chosen": -744.388427734375, + "logps/rejected": -2141.725341796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.713602542877197, + "rewards/margins": 14.268285751342773, + "rewards/rejected": -20.981889724731445, + "step": 36010 + }, + { + "epoch": 2.15, + "learning_rate": 1.1313764342772205e-06, + "logits/chosen": -2.4882702827453613, + "logits/rejected": -1.7330381870269775, + "logps/chosen": -726.984375, + "logps/rejected": -2249.17333984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.594526767730713, + "rewards/margins": 15.454843521118164, + "rewards/rejected": -22.04936981201172, + "step": 36020 + }, + { + "epoch": 2.15, + "learning_rate": 1.1299252043072478e-06, + "logits/chosen": -2.512143611907959, + "logits/rejected": -1.7493377923965454, + "logps/chosen": -746.5374145507812, + "logps/rejected": -2199.236572265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.765392303466797, + "rewards/margins": 14.789543151855469, + "rewards/rejected": -21.554935455322266, + "step": 36030 + }, + { + "epoch": 2.15, + "learning_rate": 1.1284746338912625e-06, + "logits/chosen": -2.5037171840667725, + "logits/rejected": -1.8359928131103516, + "logps/chosen": -749.2532348632812, + "logps/rejected": -2178.41357421875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.775437355041504, + "rewards/margins": 14.569314956665039, + "rewards/rejected": -21.344751358032227, + "step": 36040 + }, + { + "epoch": 2.15, + "learning_rate": 1.1270247237275727e-06, + "logits/chosen": -2.51896333694458, + "logits/rejected": -1.6695019006729126, + "logps/chosen": -718.0870971679688, + "logps/rejected": -2257.984130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.497839450836182, + "rewards/margins": 15.626510620117188, + "rewards/rejected": -22.12434959411621, + "step": 36050 + }, + { + "epoch": 2.15, + "learning_rate": 1.1255754745141617e-06, + "logits/chosen": -2.5530059337615967, + "logits/rejected": -1.8276456594467163, + "logps/chosen": -721.3043212890625, + "logps/rejected": -2192.46435546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4886369705200195, + "rewards/margins": 14.992042541503906, + "rewards/rejected": -21.48067855834961, + "step": 36060 + }, + { + "epoch": 2.15, + "learning_rate": 1.1241268869487015e-06, + "logits/chosen": -2.5106923580169678, + "logits/rejected": -1.8719699382781982, + "logps/chosen": -729.6141357421875, + "logps/rejected": -2141.23291015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.546237945556641, + "rewards/margins": 14.431805610656738, + "rewards/rejected": -20.978046417236328, + "step": 36070 + }, + { + "epoch": 2.15, + "learning_rate": 1.1226789617285386e-06, + "logits/chosen": -2.509291172027588, + "logits/rejected": -1.789780616760254, + "logps/chosen": -729.4879150390625, + "logps/rejected": -2214.42919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.614711761474609, + "rewards/margins": 15.078893661499023, + "rewards/rejected": -21.693607330322266, + "step": 36080 + }, + { + "epoch": 2.15, + "learning_rate": 1.1212316995507079e-06, + "logits/chosen": -2.499502658843994, + "logits/rejected": -1.8272678852081299, + "logps/chosen": -711.18701171875, + "logps/rejected": -2116.894775390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.400221824645996, + "rewards/margins": 14.331494331359863, + "rewards/rejected": -20.73171615600586, + "step": 36090 + }, + { + "epoch": 2.15, + "learning_rate": 1.1197851011119182e-06, + "logits/chosen": -2.5311429500579834, + "logits/rejected": -1.7827308177947998, + "logps/chosen": -763.2019653320312, + "logps/rejected": -2117.2109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.938861846923828, + "rewards/margins": 13.811742782592773, + "rewards/rejected": -20.75060272216797, + "step": 36100 + }, + { + "epoch": 2.15, + "learning_rate": 1.1183391671085636e-06, + "logits/chosen": -2.532536029815674, + "logits/rejected": -1.794154405593872, + "logps/chosen": -733.3709106445312, + "logps/rejected": -2271.83984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.645447731018066, + "rewards/margins": 15.627924919128418, + "rewards/rejected": -22.273372650146484, + "step": 36110 + }, + { + "epoch": 2.15, + "learning_rate": 1.1168938982367162e-06, + "logits/chosen": -2.527935743331909, + "logits/rejected": -1.7952884435653687, + "logps/chosen": -714.9805908203125, + "logps/rejected": -2171.212890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.453343391418457, + "rewards/margins": 14.80895709991455, + "rewards/rejected": -21.26230239868164, + "step": 36120 + }, + { + "epoch": 2.15, + "learning_rate": 1.1154492951921284e-06, + "logits/chosen": -2.523693084716797, + "logits/rejected": -1.8753879070281982, + "logps/chosen": -718.6148681640625, + "logps/rejected": -2163.47607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.398961544036865, + "rewards/margins": 14.7914457321167, + "rewards/rejected": -21.19040870666504, + "step": 36130 + }, + { + "epoch": 2.16, + "learning_rate": 1.1140053586702324e-06, + "logits/chosen": -2.4959495067596436, + "logits/rejected": -1.790357232093811, + "logps/chosen": -725.5186767578125, + "logps/rejected": -2185.529296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.553740501403809, + "rewards/margins": 14.857671737670898, + "rewards/rejected": -21.41141128540039, + "step": 36140 + }, + { + "epoch": 2.16, + "learning_rate": 1.112562089366139e-06, + "logits/chosen": -2.5232596397399902, + "logits/rejected": -1.7872425317764282, + "logps/chosen": -720.401123046875, + "logps/rejected": -2162.041748046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.50452184677124, + "rewards/margins": 14.660244941711426, + "rewards/rejected": -21.164766311645508, + "step": 36150 + }, + { + "epoch": 2.16, + "learning_rate": 1.1111194879746381e-06, + "logits/chosen": -2.505572557449341, + "logits/rejected": -1.791094183921814, + "logps/chosen": -727.8565673828125, + "logps/rejected": -2159.02490234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.561145782470703, + "rewards/margins": 14.584619522094727, + "rewards/rejected": -21.145767211914062, + "step": 36160 + }, + { + "epoch": 2.16, + "learning_rate": 1.1096775551901978e-06, + "logits/chosen": -2.523850679397583, + "logits/rejected": -1.7592811584472656, + "logps/chosen": -732.0433349609375, + "logps/rejected": -2230.423095703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.575064659118652, + "rewards/margins": 15.292948722839355, + "rewards/rejected": -21.868011474609375, + "step": 36170 + }, + { + "epoch": 2.16, + "learning_rate": 1.108236291706965e-06, + "logits/chosen": -2.5047314167022705, + "logits/rejected": -1.8241684436798096, + "logps/chosen": -739.26904296875, + "logps/rejected": -2117.244384765625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.615367889404297, + "rewards/margins": 14.11192512512207, + "rewards/rejected": -20.727296829223633, + "step": 36180 + }, + { + "epoch": 2.16, + "learning_rate": 1.1067956982187636e-06, + "logits/chosen": -2.5366203784942627, + "logits/rejected": -1.7459303140640259, + "logps/chosen": -731.1875610351562, + "logps/rejected": -2156.1181640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.577692985534668, + "rewards/margins": 14.532930374145508, + "rewards/rejected": -21.11062240600586, + "step": 36190 + }, + { + "epoch": 2.16, + "learning_rate": 1.1053557754190952e-06, + "logits/chosen": -2.5290346145629883, + "logits/rejected": -1.8400533199310303, + "logps/chosen": -734.2506103515625, + "logps/rejected": -2127.99560546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.642634391784668, + "rewards/margins": 14.189180374145508, + "rewards/rejected": -20.83181381225586, + "step": 36200 + }, + { + "epoch": 2.16, + "learning_rate": 1.1039165240011388e-06, + "logits/chosen": -2.5290074348449707, + "logits/rejected": -1.8413642644882202, + "logps/chosen": -719.2359619140625, + "logps/rejected": -2163.906982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.535046577453613, + "rewards/margins": 14.675775527954102, + "rewards/rejected": -21.210819244384766, + "step": 36210 + }, + { + "epoch": 2.16, + "learning_rate": 1.10247794465775e-06, + "logits/chosen": -2.5281660556793213, + "logits/rejected": -1.8346529006958008, + "logps/chosen": -714.928955078125, + "logps/rejected": -2199.6806640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.430540561676025, + "rewards/margins": 15.11846923828125, + "rewards/rejected": -21.54901123046875, + "step": 36220 + }, + { + "epoch": 2.16, + "learning_rate": 1.1010400380814607e-06, + "logits/chosen": -2.5562186241149902, + "logits/rejected": -1.7767174243927002, + "logps/chosen": -732.8300170898438, + "logps/rejected": -2128.19580078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.617484092712402, + "rewards/margins": 14.21997356414795, + "rewards/rejected": -20.837459564208984, + "step": 36230 + }, + { + "epoch": 2.16, + "learning_rate": 1.0996028049644792e-06, + "logits/chosen": -2.502934694290161, + "logits/rejected": -1.8133357763290405, + "logps/chosen": -759.2138061523438, + "logps/rejected": -2272.986328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.850099086761475, + "rewards/margins": 15.439371109008789, + "rewards/rejected": -22.28946876525879, + "step": 36240 + }, + { + "epoch": 2.16, + "learning_rate": 1.0981662459986895e-06, + "logits/chosen": -2.488891124725342, + "logits/rejected": -1.8030894994735718, + "logps/chosen": -732.1536865234375, + "logps/rejected": -2198.95654296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.638877868652344, + "rewards/margins": 14.881752014160156, + "rewards/rejected": -21.520631790161133, + "step": 36250 + }, + { + "epoch": 2.16, + "learning_rate": 1.0967303618756512e-06, + "logits/chosen": -2.5590500831604004, + "logits/rejected": -1.8616644144058228, + "logps/chosen": -747.121826171875, + "logps/rejected": -2176.91845703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.739178657531738, + "rewards/margins": 14.57976245880127, + "rewards/rejected": -21.318941116333008, + "step": 36260 + }, + { + "epoch": 2.16, + "learning_rate": 1.095295153286599e-06, + "logits/chosen": -2.4937260150909424, + "logits/rejected": -1.7350845336914062, + "logps/chosen": -739.5205688476562, + "logps/rejected": -2101.44677734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.665534973144531, + "rewards/margins": 13.910329818725586, + "rewards/rejected": -20.575864791870117, + "step": 36270 + }, + { + "epoch": 2.16, + "learning_rate": 1.0938606209224425e-06, + "logits/chosen": -2.5155320167541504, + "logits/rejected": -1.7996906042099, + "logps/chosen": -711.7093505859375, + "logps/rejected": -2206.581298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.400063514709473, + "rewards/margins": 15.221026420593262, + "rewards/rejected": -21.621089935302734, + "step": 36280 + }, + { + "epoch": 2.16, + "learning_rate": 1.0924267654737636e-06, + "logits/chosen": -2.486814498901367, + "logits/rejected": -1.8321349620819092, + "logps/chosen": -736.4801025390625, + "logps/rejected": -2140.1591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.686756134033203, + "rewards/margins": 14.2708158493042, + "rewards/rejected": -20.95757484436035, + "step": 36290 + }, + { + "epoch": 2.16, + "learning_rate": 1.090993587630824e-06, + "logits/chosen": -2.442216396331787, + "logits/rejected": -1.744870901107788, + "logps/chosen": -754.5819091796875, + "logps/rejected": -2164.66357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.844460964202881, + "rewards/margins": 14.366351127624512, + "rewards/rejected": -21.2108097076416, + "step": 36300 + }, + { + "epoch": 2.17, + "learning_rate": 1.0895610880835511e-06, + "logits/chosen": -2.526796817779541, + "logits/rejected": -1.7969520092010498, + "logps/chosen": -742.7174072265625, + "logps/rejected": -2219.247314453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.675379753112793, + "rewards/margins": 15.060157775878906, + "rewards/rejected": -21.735538482666016, + "step": 36310 + }, + { + "epoch": 2.17, + "learning_rate": 1.0881292675215544e-06, + "logits/chosen": -2.5277259349823, + "logits/rejected": -1.8747230768203735, + "logps/chosen": -724.2097778320312, + "logps/rejected": -2209.703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5545196533203125, + "rewards/margins": 15.111303329467773, + "rewards/rejected": -21.665822982788086, + "step": 36320 + }, + { + "epoch": 2.17, + "learning_rate": 1.0866981266341084e-06, + "logits/chosen": -2.589630603790283, + "logits/rejected": -1.8972713947296143, + "logps/chosen": -710.7987060546875, + "logps/rejected": -2139.30810546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.421146392822266, + "rewards/margins": 14.524656295776367, + "rewards/rejected": -20.945804595947266, + "step": 36330 + }, + { + "epoch": 2.17, + "learning_rate": 1.0852676661101679e-06, + "logits/chosen": -2.550546884536743, + "logits/rejected": -1.8258686065673828, + "logps/chosen": -717.8485107421875, + "logps/rejected": -2157.16259765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.47092342376709, + "rewards/margins": 14.660677909851074, + "rewards/rejected": -21.131601333618164, + "step": 36340 + }, + { + "epoch": 2.17, + "learning_rate": 1.0838378866383534e-06, + "logits/chosen": -2.5230724811553955, + "logits/rejected": -1.8628267049789429, + "logps/chosen": -741.9857788085938, + "logps/rejected": -2128.340087890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.721230506896973, + "rewards/margins": 14.12157917022705, + "rewards/rejected": -20.842809677124023, + "step": 36350 + }, + { + "epoch": 2.17, + "learning_rate": 1.082408788906964e-06, + "logits/chosen": -2.5619773864746094, + "logits/rejected": -1.7888305187225342, + "logps/chosen": -713.9691162109375, + "logps/rejected": -2102.66064453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.448087215423584, + "rewards/margins": 14.157445907592773, + "rewards/rejected": -20.605533599853516, + "step": 36360 + }, + { + "epoch": 2.17, + "learning_rate": 1.0809803736039645e-06, + "logits/chosen": -2.504878282546997, + "logits/rejected": -1.8260726928710938, + "logps/chosen": -692.4595947265625, + "logps/rejected": -2091.551025390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.19794225692749, + "rewards/margins": 14.267499923706055, + "rewards/rejected": -20.46544075012207, + "step": 36370 + }, + { + "epoch": 2.17, + "learning_rate": 1.0795526414169952e-06, + "logits/chosen": -2.498077869415283, + "logits/rejected": -1.8455225229263306, + "logps/chosen": -760.8302612304688, + "logps/rejected": -2164.08642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.856328010559082, + "rewards/margins": 14.343358039855957, + "rewards/rejected": -21.199684143066406, + "step": 36380 + }, + { + "epoch": 2.17, + "learning_rate": 1.078125593033366e-06, + "logits/chosen": -2.512528657913208, + "logits/rejected": -1.8444980382919312, + "logps/chosen": -729.1696166992188, + "logps/rejected": -2151.215576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.583991050720215, + "rewards/margins": 14.486822128295898, + "rewards/rejected": -21.070812225341797, + "step": 36390 + }, + { + "epoch": 2.17, + "learning_rate": 1.0766992291400582e-06, + "logits/chosen": -2.5593647956848145, + "logits/rejected": -1.8844821453094482, + "logps/chosen": -712.0161743164062, + "logps/rejected": -2158.42578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.36964750289917, + "rewards/margins": 14.772405624389648, + "rewards/rejected": -21.142053604125977, + "step": 36400 + }, + { + "epoch": 2.17, + "learning_rate": 1.0752735504237237e-06, + "logits/chosen": -2.536837339401245, + "logits/rejected": -1.8664642572402954, + "logps/chosen": -698.7872314453125, + "logps/rejected": -2204.94384765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.284140110015869, + "rewards/margins": 15.316250801086426, + "rewards/rejected": -21.600391387939453, + "step": 36410 + }, + { + "epoch": 2.17, + "learning_rate": 1.0738485575706834e-06, + "logits/chosen": -2.5345351696014404, + "logits/rejected": -1.892656922340393, + "logps/chosen": -705.0363159179688, + "logps/rejected": -2230.321044921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.288783073425293, + "rewards/margins": 15.568819999694824, + "rewards/rejected": -21.857601165771484, + "step": 36420 + }, + { + "epoch": 2.17, + "learning_rate": 1.0724242512669294e-06, + "logits/chosen": -2.5781478881835938, + "logits/rejected": -1.7743892669677734, + "logps/chosen": -697.6058959960938, + "logps/rejected": -2171.008056640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.300671577453613, + "rewards/margins": 14.9656400680542, + "rewards/rejected": -21.266311645507812, + "step": 36430 + }, + { + "epoch": 2.17, + "learning_rate": 1.0710006321981229e-06, + "logits/chosen": -2.516024589538574, + "logits/rejected": -1.72567617893219, + "logps/chosen": -718.8870849609375, + "logps/rejected": -2201.8603515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.367763519287109, + "rewards/margins": 15.208346366882324, + "rewards/rejected": -21.576114654541016, + "step": 36440 + }, + { + "epoch": 2.17, + "learning_rate": 1.0695777010495936e-06, + "logits/chosen": -2.5344974994659424, + "logits/rejected": -1.8126938343048096, + "logps/chosen": -738.824462890625, + "logps/rejected": -2142.70703125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.724618434906006, + "rewards/margins": 14.258285522460938, + "rewards/rejected": -20.982906341552734, + "step": 36450 + }, + { + "epoch": 2.17, + "learning_rate": 1.0681554585063408e-06, + "logits/chosen": -2.5319840908050537, + "logits/rejected": -1.8042590618133545, + "logps/chosen": -746.2347412109375, + "logps/rejected": -2172.558349609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.624146461486816, + "rewards/margins": 14.663780212402344, + "rewards/rejected": -21.287927627563477, + "step": 36460 + }, + { + "epoch": 2.17, + "learning_rate": 1.066733905253032e-06, + "logits/chosen": -2.4820611476898193, + "logits/rejected": -1.7552067041397095, + "logps/chosen": -754.16748046875, + "logps/rejected": -2147.27685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.799371242523193, + "rewards/margins": 14.23486042022705, + "rewards/rejected": -21.034231185913086, + "step": 36470 + }, + { + "epoch": 2.18, + "learning_rate": 1.065313041974003e-06, + "logits/chosen": -2.4702517986297607, + "logits/rejected": -1.75014328956604, + "logps/chosen": -732.713623046875, + "logps/rejected": -2199.035888671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.650580406188965, + "rewards/margins": 14.899978637695312, + "rewards/rejected": -21.550559997558594, + "step": 36480 + }, + { + "epoch": 2.18, + "learning_rate": 1.0638928693532574e-06, + "logits/chosen": -2.5655646324157715, + "logits/rejected": -1.859185814857483, + "logps/chosen": -757.024169921875, + "logps/rejected": -2190.68212890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.847219944000244, + "rewards/margins": 14.621421813964844, + "rewards/rejected": -21.46864128112793, + "step": 36490 + }, + { + "epoch": 2.18, + "learning_rate": 1.0624733880744659e-06, + "logits/chosen": -2.560088872909546, + "logits/rejected": -1.7700904607772827, + "logps/chosen": -741.1049194335938, + "logps/rejected": -2189.1904296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.693215370178223, + "rewards/margins": 14.767939567565918, + "rewards/rejected": -21.461153030395508, + "step": 36500 + }, + { + "epoch": 2.18, + "learning_rate": 1.0610545988209671e-06, + "logits/chosen": -2.5083673000335693, + "logits/rejected": -1.8541768789291382, + "logps/chosen": -737.521728515625, + "logps/rejected": -2118.48291015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.703909873962402, + "rewards/margins": 14.029184341430664, + "rewards/rejected": -20.733095169067383, + "step": 36510 + }, + { + "epoch": 2.18, + "learning_rate": 1.0596365022757664e-06, + "logits/chosen": -2.47698712348938, + "logits/rejected": -1.7216129302978516, + "logps/chosen": -743.0454711914062, + "logps/rejected": -2250.38916015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7103986740112305, + "rewards/margins": 15.350184440612793, + "rewards/rejected": -22.060583114624023, + "step": 36520 + }, + { + "epoch": 2.18, + "learning_rate": 1.0582190991215357e-06, + "logits/chosen": -2.5181918144226074, + "logits/rejected": -1.7517248392105103, + "logps/chosen": -725.8543090820312, + "logps/rejected": -2155.36669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.588305473327637, + "rewards/margins": 14.521505355834961, + "rewards/rejected": -21.109811782836914, + "step": 36530 + }, + { + "epoch": 2.18, + "learning_rate": 1.0568023900406108e-06, + "logits/chosen": -2.5063109397888184, + "logits/rejected": -1.8018999099731445, + "logps/chosen": -783.28125, + "logps/rejected": -2161.99560546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.128782749176025, + "rewards/margins": 14.033720016479492, + "rewards/rejected": -21.16250228881836, + "step": 36540 + }, + { + "epoch": 2.18, + "learning_rate": 1.0553863757149984e-06, + "logits/chosen": -2.4993412494659424, + "logits/rejected": -1.7842519283294678, + "logps/chosen": -735.8226318359375, + "logps/rejected": -2067.892822265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.651754856109619, + "rewards/margins": 13.593009948730469, + "rewards/rejected": -20.24476432800293, + "step": 36550 + }, + { + "epoch": 2.18, + "learning_rate": 1.0539710568263647e-06, + "logits/chosen": -2.541208028793335, + "logits/rejected": -1.8383013010025024, + "logps/chosen": -761.8404541015625, + "logps/rejected": -2226.71826171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.875504493713379, + "rewards/margins": 14.940996170043945, + "rewards/rejected": -21.816499710083008, + "step": 36560 + }, + { + "epoch": 2.18, + "learning_rate": 1.0525564340560476e-06, + "logits/chosen": -2.4630165100097656, + "logits/rejected": -1.7955677509307861, + "logps/chosen": -747.6807861328125, + "logps/rejected": -2203.288818359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.736358642578125, + "rewards/margins": 14.856539726257324, + "rewards/rejected": -21.592899322509766, + "step": 36570 + }, + { + "epoch": 2.18, + "learning_rate": 1.051142508085043e-06, + "logits/chosen": -2.492215633392334, + "logits/rejected": -1.76254403591156, + "logps/chosen": -749.2369384765625, + "logps/rejected": -2316.7802734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.751173496246338, + "rewards/margins": 15.978642463684082, + "rewards/rejected": -22.729816436767578, + "step": 36580 + }, + { + "epoch": 2.18, + "learning_rate": 1.0497292795940182e-06, + "logits/chosen": -2.49731707572937, + "logits/rejected": -1.796403169631958, + "logps/chosen": -729.5859985351562, + "logps/rejected": -2124.103759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.605279445648193, + "rewards/margins": 14.20037841796875, + "rewards/rejected": -20.8056583404541, + "step": 36590 + }, + { + "epoch": 2.18, + "learning_rate": 1.048316749263298e-06, + "logits/chosen": -2.454397439956665, + "logits/rejected": -1.6947482824325562, + "logps/chosen": -763.2264404296875, + "logps/rejected": -2216.10888671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.928356170654297, + "rewards/margins": 14.78064250946045, + "rewards/rejected": -21.708995819091797, + "step": 36600 + }, + { + "epoch": 2.18, + "learning_rate": 1.046904917772878e-06, + "logits/chosen": -2.584670305252075, + "logits/rejected": -1.761897325515747, + "logps/chosen": -726.1922607421875, + "logps/rejected": -2310.031982421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.535420894622803, + "rewards/margins": 16.118749618530273, + "rewards/rejected": -22.654170989990234, + "step": 36610 + }, + { + "epoch": 2.18, + "learning_rate": 1.0454937858024108e-06, + "logits/chosen": -2.5366344451904297, + "logits/rejected": -1.811802625656128, + "logps/chosen": -739.3796997070312, + "logps/rejected": -2178.094970703125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.619696617126465, + "rewards/margins": 14.699485778808594, + "rewards/rejected": -21.319181442260742, + "step": 36620 + }, + { + "epoch": 2.18, + "learning_rate": 1.044083354031217e-06, + "logits/chosen": -2.5031096935272217, + "logits/rejected": -1.8429725170135498, + "logps/chosen": -708.31787109375, + "logps/rejected": -2167.069580078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.309622764587402, + "rewards/margins": 14.924028396606445, + "rewards/rejected": -21.233652114868164, + "step": 36630 + }, + { + "epoch": 2.18, + "learning_rate": 1.0426736231382778e-06, + "logits/chosen": -2.5231590270996094, + "logits/rejected": -1.7628676891326904, + "logps/chosen": -728.9053344726562, + "logps/rejected": -2238.862548828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5983567237854, + "rewards/margins": 15.357172012329102, + "rewards/rejected": -21.955528259277344, + "step": 36640 + }, + { + "epoch": 2.19, + "learning_rate": 1.041264593802238e-06, + "logits/chosen": -2.5866270065307617, + "logits/rejected": -1.8505408763885498, + "logps/chosen": -714.36328125, + "logps/rejected": -2133.4541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.472143650054932, + "rewards/margins": 14.423007011413574, + "rewards/rejected": -20.89514923095703, + "step": 36650 + }, + { + "epoch": 2.19, + "learning_rate": 1.039856266701404e-06, + "logits/chosen": -2.541597604751587, + "logits/rejected": -1.8380954265594482, + "logps/chosen": -719.2694702148438, + "logps/rejected": -2186.046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.477649688720703, + "rewards/margins": 14.930821418762207, + "rewards/rejected": -21.408472061157227, + "step": 36660 + }, + { + "epoch": 2.19, + "learning_rate": 1.0384486425137447e-06, + "logits/chosen": -2.5130257606506348, + "logits/rejected": -1.7656543254852295, + "logps/chosen": -717.1725463867188, + "logps/rejected": -2180.60205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.454419136047363, + "rewards/margins": 14.909246444702148, + "rewards/rejected": -21.363666534423828, + "step": 36670 + }, + { + "epoch": 2.19, + "learning_rate": 1.03704172191689e-06, + "logits/chosen": -2.5087268352508545, + "logits/rejected": -1.8187839984893799, + "logps/chosen": -717.0073852539062, + "logps/rejected": -2175.52587890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.417993068695068, + "rewards/margins": 14.900487899780273, + "rewards/rejected": -21.3184814453125, + "step": 36680 + }, + { + "epoch": 2.19, + "learning_rate": 1.035635505588132e-06, + "logits/chosen": -2.5520949363708496, + "logits/rejected": -1.8380552530288696, + "logps/chosen": -729.7551879882812, + "logps/rejected": -2209.485595703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.602476596832275, + "rewards/margins": 15.050453186035156, + "rewards/rejected": -21.652929306030273, + "step": 36690 + }, + { + "epoch": 2.19, + "learning_rate": 1.0342299942044229e-06, + "logits/chosen": -2.5768351554870605, + "logits/rejected": -1.7872085571289062, + "logps/chosen": -715.4131469726562, + "logps/rejected": -2198.910400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.466471195220947, + "rewards/margins": 15.087717056274414, + "rewards/rejected": -21.554187774658203, + "step": 36700 + }, + { + "epoch": 2.19, + "learning_rate": 1.0328251884423756e-06, + "logits/chosen": -2.4958841800689697, + "logits/rejected": -1.7039854526519775, + "logps/chosen": -702.8746337890625, + "logps/rejected": -2239.34619140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.329391002655029, + "rewards/margins": 15.617230415344238, + "rewards/rejected": -21.94662094116211, + "step": 36710 + }, + { + "epoch": 2.19, + "learning_rate": 1.0314210889782642e-06, + "logits/chosen": -2.541149854660034, + "logits/rejected": -1.797663927078247, + "logps/chosen": -728.7770385742188, + "logps/rejected": -2223.39794921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.543600559234619, + "rewards/margins": 15.24903678894043, + "rewards/rejected": -21.79263687133789, + "step": 36720 + }, + { + "epoch": 2.19, + "learning_rate": 1.0300176964880218e-06, + "logits/chosen": -2.545048475265503, + "logits/rejected": -1.8242181539535522, + "logps/chosen": -741.51416015625, + "logps/rejected": -2145.239501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.655267238616943, + "rewards/margins": 14.353022575378418, + "rewards/rejected": -21.008289337158203, + "step": 36730 + }, + { + "epoch": 2.19, + "learning_rate": 1.0286150116472415e-06, + "logits/chosen": -2.569582462310791, + "logits/rejected": -1.8633997440338135, + "logps/chosen": -739.1622314453125, + "logps/rejected": -2282.970947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.727301597595215, + "rewards/margins": 15.675868034362793, + "rewards/rejected": -22.403169631958008, + "step": 36740 + }, + { + "epoch": 2.19, + "learning_rate": 1.0272130351311758e-06, + "logits/chosen": -2.509294033050537, + "logits/rejected": -1.7839972972869873, + "logps/chosen": -709.1687622070312, + "logps/rejected": -2092.900146484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.464056491851807, + "rewards/margins": 14.02314567565918, + "rewards/rejected": -20.487201690673828, + "step": 36750 + }, + { + "epoch": 2.19, + "learning_rate": 1.025811767614736e-06, + "logits/chosen": -2.5650649070739746, + "logits/rejected": -1.8761848211288452, + "logps/chosen": -744.645263671875, + "logps/rejected": -2200.99853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.802866458892822, + "rewards/margins": 14.766688346862793, + "rewards/rejected": -21.56955337524414, + "step": 36760 + }, + { + "epoch": 2.19, + "learning_rate": 1.0244112097724928e-06, + "logits/chosen": -2.5467400550842285, + "logits/rejected": -1.786940336227417, + "logps/chosen": -734.6610107421875, + "logps/rejected": -2138.096923828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.601435661315918, + "rewards/margins": 14.33655834197998, + "rewards/rejected": -20.937992095947266, + "step": 36770 + }, + { + "epoch": 2.19, + "learning_rate": 1.0230113622786744e-06, + "logits/chosen": -2.535945415496826, + "logits/rejected": -1.8873176574707031, + "logps/chosen": -722.7086791992188, + "logps/rejected": -2228.093505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.518153190612793, + "rewards/margins": 15.313631057739258, + "rewards/rejected": -21.831783294677734, + "step": 36780 + }, + { + "epoch": 2.19, + "learning_rate": 1.0216122258071672e-06, + "logits/chosen": -2.5222151279449463, + "logits/rejected": -1.8744617700576782, + "logps/chosen": -753.8499755859375, + "logps/rejected": -2203.38525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.8390631675720215, + "rewards/margins": 14.759844779968262, + "rewards/rejected": -21.59891128540039, + "step": 36790 + }, + { + "epoch": 2.19, + "learning_rate": 1.0202138010315168e-06, + "logits/chosen": -2.5376386642456055, + "logits/rejected": -1.8813979625701904, + "logps/chosen": -746.8878173828125, + "logps/rejected": -2211.29541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.768685340881348, + "rewards/margins": 14.914019584655762, + "rewards/rejected": -21.68270492553711, + "step": 36800 + }, + { + "epoch": 2.19, + "learning_rate": 1.0188160886249219e-06, + "logits/chosen": -2.5489864349365234, + "logits/rejected": -1.8704414367675781, + "logps/chosen": -739.5614013671875, + "logps/rejected": -2152.38525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6560258865356445, + "rewards/margins": 14.435022354125977, + "rewards/rejected": -21.091049194335938, + "step": 36810 + }, + { + "epoch": 2.2, + "learning_rate": 1.0174190892602446e-06, + "logits/chosen": -2.4833860397338867, + "logits/rejected": -1.733485221862793, + "logps/chosen": -727.641845703125, + "logps/rejected": -2193.568115234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5258612632751465, + "rewards/margins": 14.9666748046875, + "rewards/rejected": -21.492534637451172, + "step": 36820 + }, + { + "epoch": 2.2, + "learning_rate": 1.016022803609997e-06, + "logits/chosen": -2.5192673206329346, + "logits/rejected": -1.8227615356445312, + "logps/chosen": -738.6990966796875, + "logps/rejected": -2200.154541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.65056848526001, + "rewards/margins": 14.912915229797363, + "rewards/rejected": -21.56348419189453, + "step": 36830 + }, + { + "epoch": 2.2, + "learning_rate": 1.0146272323463548e-06, + "logits/chosen": -2.5229952335357666, + "logits/rejected": -1.701653242111206, + "logps/chosen": -702.1336669921875, + "logps/rejected": -2126.913818359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.340004920959473, + "rewards/margins": 14.490522384643555, + "rewards/rejected": -20.83052635192871, + "step": 36840 + }, + { + "epoch": 2.2, + "learning_rate": 1.0132323761411422e-06, + "logits/chosen": -2.496959686279297, + "logits/rejected": -1.7579078674316406, + "logps/chosen": -724.1912841796875, + "logps/rejected": -2147.33447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5885419845581055, + "rewards/margins": 14.456153869628906, + "rewards/rejected": -21.044696807861328, + "step": 36850 + }, + { + "epoch": 2.2, + "learning_rate": 1.0118382356658466e-06, + "logits/chosen": -2.485285997390747, + "logits/rejected": -1.727863073348999, + "logps/chosen": -749.4027099609375, + "logps/rejected": -2178.2958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.745793342590332, + "rewards/margins": 14.579751968383789, + "rewards/rejected": -21.325542449951172, + "step": 36860 + }, + { + "epoch": 2.2, + "learning_rate": 1.0104448115916035e-06, + "logits/chosen": -2.489489793777466, + "logits/rejected": -1.8615272045135498, + "logps/chosen": -723.3147583007812, + "logps/rejected": -2204.505615234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.561132907867432, + "rewards/margins": 15.041049003601074, + "rewards/rejected": -21.602182388305664, + "step": 36870 + }, + { + "epoch": 2.2, + "learning_rate": 1.0090521045892106e-06, + "logits/chosen": -2.5291640758514404, + "logits/rejected": -1.8643735647201538, + "logps/chosen": -717.6648559570312, + "logps/rejected": -2278.57177734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.491267204284668, + "rewards/margins": 15.85992431640625, + "rewards/rejected": -22.3511905670166, + "step": 36880 + }, + { + "epoch": 2.2, + "learning_rate": 1.0076601153291152e-06, + "logits/chosen": -2.558851480484009, + "logits/rejected": -1.7491302490234375, + "logps/chosen": -714.673583984375, + "logps/rejected": -2176.64501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.443614959716797, + "rewards/margins": 14.876358032226562, + "rewards/rejected": -21.31997299194336, + "step": 36890 + }, + { + "epoch": 2.2, + "learning_rate": 1.0062688444814208e-06, + "logits/chosen": -2.480299472808838, + "logits/rejected": -1.791094183921814, + "logps/chosen": -717.7147827148438, + "logps/rejected": -2154.343017578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.474604606628418, + "rewards/margins": 14.637158393859863, + "rewards/rejected": -21.11176109313965, + "step": 36900 + }, + { + "epoch": 2.2, + "learning_rate": 1.004878292715886e-06, + "logits/chosen": -2.50447154045105, + "logits/rejected": -1.7802801132202148, + "logps/chosen": -745.1177368164062, + "logps/rejected": -2152.146240234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.747804164886475, + "rewards/margins": 14.335653305053711, + "rewards/rejected": -21.083457946777344, + "step": 36910 + }, + { + "epoch": 2.2, + "learning_rate": 1.0034884607019219e-06, + "logits/chosen": -2.51664137840271, + "logits/rejected": -1.7594772577285767, + "logps/chosen": -785.1319580078125, + "logps/rejected": -2234.65771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -7.179473876953125, + "rewards/margins": 14.717710494995117, + "rewards/rejected": -21.89718246459961, + "step": 36920 + }, + { + "epoch": 2.2, + "learning_rate": 1.0020993491085936e-06, + "logits/chosen": -2.5437874794006348, + "logits/rejected": -1.7854273319244385, + "logps/chosen": -713.9067993164062, + "logps/rejected": -2218.236083984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.416539669036865, + "rewards/margins": 15.332757949829102, + "rewards/rejected": -21.749296188354492, + "step": 36930 + }, + { + "epoch": 2.2, + "learning_rate": 1.0007109586046197e-06, + "logits/chosen": -2.465613842010498, + "logits/rejected": -1.8159453868865967, + "logps/chosen": -717.2306518554688, + "logps/rejected": -2108.49560546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.482666969299316, + "rewards/margins": 14.164182662963867, + "rewards/rejected": -20.646848678588867, + "step": 36940 + }, + { + "epoch": 2.2, + "learning_rate": 9.993232898583711e-07, + "logits/chosen": -2.57904314994812, + "logits/rejected": -2.0394768714904785, + "logps/chosen": -724.726318359375, + "logps/rejected": -2116.163330078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.580362796783447, + "rewards/margins": 14.148134231567383, + "rewards/rejected": -20.728496551513672, + "step": 36950 + }, + { + "epoch": 2.2, + "learning_rate": 9.979363435378717e-07, + "logits/chosen": -2.527340888977051, + "logits/rejected": -1.8013060092926025, + "logps/chosen": -720.9981689453125, + "logps/rejected": -2193.909423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4821624755859375, + "rewards/margins": 15.015612602233887, + "rewards/rejected": -21.497774124145508, + "step": 36960 + }, + { + "epoch": 2.2, + "learning_rate": 9.965501203107972e-07, + "logits/chosen": -2.491657018661499, + "logits/rejected": -1.8237301111221313, + "logps/chosen": -763.5576782226562, + "logps/rejected": -2211.464111328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.906500339508057, + "rewards/margins": 14.77155876159668, + "rewards/rejected": -21.678058624267578, + "step": 36970 + }, + { + "epoch": 2.21, + "learning_rate": 9.951646208444758e-07, + "logits/chosen": -2.5537045001983643, + "logits/rejected": -1.783572793006897, + "logps/chosen": -709.1526489257812, + "logps/rejected": -2236.946533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.399862766265869, + "rewards/margins": 15.520655632019043, + "rewards/rejected": -21.920515060424805, + "step": 36980 + }, + { + "epoch": 2.21, + "learning_rate": 9.937798458058864e-07, + "logits/chosen": -2.514359951019287, + "logits/rejected": -1.8078114986419678, + "logps/chosen": -724.0885009765625, + "logps/rejected": -2126.5048828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.538583278656006, + "rewards/margins": 14.28539752960205, + "rewards/rejected": -20.8239803314209, + "step": 36990 + }, + { + "epoch": 2.21, + "learning_rate": 9.923957958616603e-07, + "logits/chosen": -2.4595868587493896, + "logits/rejected": -1.7489608526229858, + "logps/chosen": -733.9801635742188, + "logps/rejected": -2173.64208984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.643853187561035, + "rewards/margins": 14.647054672241211, + "rewards/rejected": -21.290908813476562, + "step": 37000 + }, + { + "epoch": 2.21, + "learning_rate": 9.910124716780788e-07, + "logits/chosen": -2.5538763999938965, + "logits/rejected": -1.8914226293563843, + "logps/chosen": -758.2467041015625, + "logps/rejected": -2249.85546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.865198612213135, + "rewards/margins": 15.194976806640625, + "rewards/rejected": -22.060176849365234, + "step": 37010 + }, + { + "epoch": 2.21, + "learning_rate": 9.896298739210745e-07, + "logits/chosen": -2.5249216556549072, + "logits/rejected": -1.7146451473236084, + "logps/chosen": -710.31005859375, + "logps/rejected": -2073.05224609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4243597984313965, + "rewards/margins": 13.853034973144531, + "rewards/rejected": -20.27739715576172, + "step": 37020 + }, + { + "epoch": 2.21, + "learning_rate": 9.882480032562292e-07, + "logits/chosen": -2.5033164024353027, + "logits/rejected": -1.8142162561416626, + "logps/chosen": -731.788330078125, + "logps/rejected": -2090.44580078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.547917366027832, + "rewards/margins": 13.930273056030273, + "rewards/rejected": -20.47818946838379, + "step": 37030 + }, + { + "epoch": 2.21, + "learning_rate": 9.86866860348776e-07, + "logits/chosen": -2.475759983062744, + "logits/rejected": -1.6334812641143799, + "logps/chosen": -750.9702758789062, + "logps/rejected": -2210.494384765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.775208473205566, + "rewards/margins": 14.900533676147461, + "rewards/rejected": -21.67574119567871, + "step": 37040 + }, + { + "epoch": 2.21, + "learning_rate": 9.85486445863597e-07, + "logits/chosen": -2.549285411834717, + "logits/rejected": -1.7397191524505615, + "logps/chosen": -726.17822265625, + "logps/rejected": -2138.470458984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.557021141052246, + "rewards/margins": 14.389154434204102, + "rewards/rejected": -20.946176528930664, + "step": 37050 + }, + { + "epoch": 2.21, + "learning_rate": 9.841067604652237e-07, + "logits/chosen": -2.5189547538757324, + "logits/rejected": -1.73305344581604, + "logps/chosen": -722.5772705078125, + "logps/rejected": -2153.828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.576226234436035, + "rewards/margins": 14.518521308898926, + "rewards/rejected": -21.094745635986328, + "step": 37060 + }, + { + "epoch": 2.21, + "learning_rate": 9.827278048178374e-07, + "logits/chosen": -2.5206241607666016, + "logits/rejected": -1.8536443710327148, + "logps/chosen": -721.1928100585938, + "logps/rejected": -2181.8203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.531512260437012, + "rewards/margins": 14.8552827835083, + "rewards/rejected": -21.38679313659668, + "step": 37070 + }, + { + "epoch": 2.21, + "learning_rate": 9.813495795852646e-07, + "logits/chosen": -2.4999592304229736, + "logits/rejected": -1.848402976989746, + "logps/chosen": -735.90625, + "logps/rejected": -2185.599365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.706446647644043, + "rewards/margins": 14.704872131347656, + "rewards/rejected": -21.411319732666016, + "step": 37080 + }, + { + "epoch": 2.21, + "learning_rate": 9.799720854309869e-07, + "logits/chosen": -2.518843173980713, + "logits/rejected": -1.793246865272522, + "logps/chosen": -731.7537841796875, + "logps/rejected": -2191.78857421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.638385772705078, + "rewards/margins": 14.81788444519043, + "rewards/rejected": -21.45627212524414, + "step": 37090 + }, + { + "epoch": 2.21, + "learning_rate": 9.785953230181258e-07, + "logits/chosen": -2.4927191734313965, + "logits/rejected": -1.7175744771957397, + "logps/chosen": -714.6959228515625, + "logps/rejected": -2161.76416015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.411746025085449, + "rewards/margins": 14.775347709655762, + "rewards/rejected": -21.187095642089844, + "step": 37100 + }, + { + "epoch": 2.21, + "learning_rate": 9.772192930094588e-07, + "logits/chosen": -2.4628329277038574, + "logits/rejected": -1.831107497215271, + "logps/chosen": -710.5115966796875, + "logps/rejected": -2237.694091796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.446225643157959, + "rewards/margins": 15.479876518249512, + "rewards/rejected": -21.926101684570312, + "step": 37110 + }, + { + "epoch": 2.21, + "learning_rate": 9.758439960674026e-07, + "logits/chosen": -2.555724620819092, + "logits/rejected": -1.9525611400604248, + "logps/chosen": -712.9761962890625, + "logps/rejected": -2132.307861328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.49197244644165, + "rewards/margins": 14.386472702026367, + "rewards/rejected": -20.87844467163086, + "step": 37120 + }, + { + "epoch": 2.21, + "learning_rate": 9.744694328540295e-07, + "logits/chosen": -2.5333619117736816, + "logits/rejected": -1.8362451791763306, + "logps/chosen": -710.841064453125, + "logps/rejected": -2127.50341796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.448956489562988, + "rewards/margins": 14.39777946472168, + "rewards/rejected": -20.846736907958984, + "step": 37130 + }, + { + "epoch": 2.21, + "learning_rate": 9.730956040310499e-07, + "logits/chosen": -2.4811201095581055, + "logits/rejected": -1.7741552591323853, + "logps/chosen": -737.2874755859375, + "logps/rejected": -2185.23681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.651360511779785, + "rewards/margins": 14.750816345214844, + "rewards/rejected": -21.402175903320312, + "step": 37140 + }, + { + "epoch": 2.22, + "learning_rate": 9.717225102598291e-07, + "logits/chosen": -2.516012668609619, + "logits/rejected": -1.8677221536636353, + "logps/chosen": -750.3572998046875, + "logps/rejected": -2167.0478515625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.800392150878906, + "rewards/margins": 14.4296236038208, + "rewards/rejected": -21.23001480102539, + "step": 37150 + }, + { + "epoch": 2.22, + "learning_rate": 9.703501522013712e-07, + "logits/chosen": -2.5134806632995605, + "logits/rejected": -1.7348552942276, + "logps/chosen": -729.21240234375, + "logps/rejected": -2198.04833984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.598785400390625, + "rewards/margins": 14.931695938110352, + "rewards/rejected": -21.530481338500977, + "step": 37160 + }, + { + "epoch": 2.22, + "learning_rate": 9.689785305163307e-07, + "logits/chosen": -2.501286506652832, + "logits/rejected": -1.8477176427841187, + "logps/chosen": -721.4176025390625, + "logps/rejected": -2186.46044921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.474339485168457, + "rewards/margins": 14.946833610534668, + "rewards/rejected": -21.421171188354492, + "step": 37170 + }, + { + "epoch": 2.22, + "learning_rate": 9.67607645865006e-07, + "logits/chosen": -2.5291194915771484, + "logits/rejected": -1.8263769149780273, + "logps/chosen": -722.9398193359375, + "logps/rejected": -2207.38427734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.515697479248047, + "rewards/margins": 15.118108749389648, + "rewards/rejected": -21.633808135986328, + "step": 37180 + }, + { + "epoch": 2.22, + "learning_rate": 9.662374989073409e-07, + "logits/chosen": -2.523622512817383, + "logits/rejected": -1.8323004245758057, + "logps/chosen": -722.154052734375, + "logps/rejected": -2122.134765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.579982757568359, + "rewards/margins": 14.202960014343262, + "rewards/rejected": -20.782941818237305, + "step": 37190 + }, + { + "epoch": 2.22, + "learning_rate": 9.648680903029245e-07, + "logits/chosen": -2.4926857948303223, + "logits/rejected": -1.7834224700927734, + "logps/chosen": -721.4876708984375, + "logps/rejected": -2235.083984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4594597816467285, + "rewards/margins": 15.455322265625, + "rewards/rejected": -21.914783477783203, + "step": 37200 + }, + { + "epoch": 2.22, + "learning_rate": 9.6349942071099e-07, + "logits/chosen": -2.5664234161376953, + "logits/rejected": -1.775864839553833, + "logps/chosen": -722.0391845703125, + "logps/rejected": -2283.701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.475475311279297, + "rewards/margins": 15.922231674194336, + "rewards/rejected": -22.3977108001709, + "step": 37210 + }, + { + "epoch": 2.22, + "learning_rate": 9.621314907904145e-07, + "logits/chosen": -2.4971978664398193, + "logits/rejected": -1.820417046546936, + "logps/chosen": -724.303955078125, + "logps/rejected": -2158.4140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.63388204574585, + "rewards/margins": 14.510696411132812, + "rewards/rejected": -21.144580841064453, + "step": 37220 + }, + { + "epoch": 2.22, + "learning_rate": 9.607643011997195e-07, + "logits/chosen": -2.5167465209960938, + "logits/rejected": -1.8075621128082275, + "logps/chosen": -726.7286987304688, + "logps/rejected": -2187.90576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.580092430114746, + "rewards/margins": 14.866010665893555, + "rewards/rejected": -21.446102142333984, + "step": 37230 + }, + { + "epoch": 2.22, + "learning_rate": 9.593978525970707e-07, + "logits/chosen": -2.503941774368286, + "logits/rejected": -1.753464937210083, + "logps/chosen": -732.6949462890625, + "logps/rejected": -2138.792724609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.657739162445068, + "rewards/margins": 14.281723022460938, + "rewards/rejected": -20.93946075439453, + "step": 37240 + }, + { + "epoch": 2.22, + "learning_rate": 9.580321456402758e-07, + "logits/chosen": -2.5166068077087402, + "logits/rejected": -1.7995517253875732, + "logps/chosen": -743.64599609375, + "logps/rejected": -2192.7802734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.717177391052246, + "rewards/margins": 14.769670486450195, + "rewards/rejected": -21.486846923828125, + "step": 37250 + }, + { + "epoch": 2.22, + "learning_rate": 9.566671809867864e-07, + "logits/chosen": -2.5195021629333496, + "logits/rejected": -1.803846001625061, + "logps/chosen": -732.976806640625, + "logps/rejected": -2209.7578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.61208963394165, + "rewards/margins": 15.044748306274414, + "rewards/rejected": -21.65683937072754, + "step": 37260 + }, + { + "epoch": 2.22, + "learning_rate": 9.553029592936964e-07, + "logits/chosen": -2.4739372730255127, + "logits/rejected": -1.7894399166107178, + "logps/chosen": -710.3533935546875, + "logps/rejected": -2187.5654296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.427159786224365, + "rewards/margins": 15.005681037902832, + "rewards/rejected": -21.43284034729004, + "step": 37270 + }, + { + "epoch": 2.22, + "learning_rate": 9.539394812177422e-07, + "logits/chosen": -2.5614399909973145, + "logits/rejected": -1.8274646997451782, + "logps/chosen": -733.0473022460938, + "logps/rejected": -2188.62353515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.571693420410156, + "rewards/margins": 14.878649711608887, + "rewards/rejected": -21.45034408569336, + "step": 37280 + }, + { + "epoch": 2.22, + "learning_rate": 9.52576747415302e-07, + "logits/chosen": -2.5078468322753906, + "logits/rejected": -1.7253755331039429, + "logps/chosen": -716.9593505859375, + "logps/rejected": -2076.28076171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.42177677154541, + "rewards/margins": 13.906282424926758, + "rewards/rejected": -20.328060150146484, + "step": 37290 + }, + { + "epoch": 2.22, + "learning_rate": 9.512147585423956e-07, + "logits/chosen": -2.545563220977783, + "logits/rejected": -1.7963005304336548, + "logps/chosen": -707.3179931640625, + "logps/rejected": -2096.859130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.339739799499512, + "rewards/margins": 14.169105529785156, + "rewards/rejected": -20.50884437561035, + "step": 37300 + }, + { + "epoch": 2.22, + "learning_rate": 9.498535152546847e-07, + "logits/chosen": -2.5562355518341064, + "logits/rejected": -1.8338474035263062, + "logps/chosen": -705.561767578125, + "logps/rejected": -2142.89501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.39747428894043, + "rewards/margins": 14.595327377319336, + "rewards/rejected": -20.992801666259766, + "step": 37310 + }, + { + "epoch": 2.23, + "learning_rate": 9.484930182074722e-07, + "logits/chosen": -2.4794821739196777, + "logits/rejected": -1.7905467748641968, + "logps/chosen": -726.9210205078125, + "logps/rejected": -2108.016357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.563063144683838, + "rewards/margins": 14.073437690734863, + "rewards/rejected": -20.636499404907227, + "step": 37320 + }, + { + "epoch": 2.23, + "learning_rate": 9.471332680557008e-07, + "logits/chosen": -2.5345592498779297, + "logits/rejected": -1.8700214624404907, + "logps/chosen": -726.4915161132812, + "logps/rejected": -2200.1259765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.587414741516113, + "rewards/margins": 14.970046997070312, + "rewards/rejected": -21.55746078491211, + "step": 37330 + }, + { + "epoch": 2.23, + "learning_rate": 9.457742654539551e-07, + "logits/chosen": -2.5217461585998535, + "logits/rejected": -1.7363630533218384, + "logps/chosen": -734.1458740234375, + "logps/rejected": -2115.06689453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.63098669052124, + "rewards/margins": 14.058308601379395, + "rewards/rejected": -20.689292907714844, + "step": 37340 + }, + { + "epoch": 2.23, + "learning_rate": 9.444160110564563e-07, + "logits/chosen": -2.5018341541290283, + "logits/rejected": -1.7703943252563477, + "logps/chosen": -738.4996337890625, + "logps/rejected": -2224.29052734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6912384033203125, + "rewards/margins": 15.104955673217773, + "rewards/rejected": -21.796192169189453, + "step": 37350 + }, + { + "epoch": 2.23, + "learning_rate": 9.430585055170719e-07, + "logits/chosen": -2.535780429840088, + "logits/rejected": -1.7078866958618164, + "logps/chosen": -737.2105712890625, + "logps/rejected": -2088.76123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6750054359436035, + "rewards/margins": 13.767763137817383, + "rewards/rejected": -20.442768096923828, + "step": 37360 + }, + { + "epoch": 2.23, + "learning_rate": 9.417017494893013e-07, + "logits/chosen": -2.5118250846862793, + "logits/rejected": -1.781725287437439, + "logps/chosen": -711.6666259765625, + "logps/rejected": -2214.21044921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.426690101623535, + "rewards/margins": 15.270452499389648, + "rewards/rejected": -21.6971435546875, + "step": 37370 + }, + { + "epoch": 2.23, + "learning_rate": 9.403457436262906e-07, + "logits/chosen": -2.497576951980591, + "logits/rejected": -1.733129858970642, + "logps/chosen": -723.4484252929688, + "logps/rejected": -2195.41845703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.560809135437012, + "rewards/margins": 14.949193954467773, + "rewards/rejected": -21.51000213623047, + "step": 37380 + }, + { + "epoch": 2.23, + "learning_rate": 9.389904885808171e-07, + "logits/chosen": -2.5344223976135254, + "logits/rejected": -1.8429279327392578, + "logps/chosen": -732.5797729492188, + "logps/rejected": -2165.564697265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.598263740539551, + "rewards/margins": 14.617698669433594, + "rewards/rejected": -21.21596336364746, + "step": 37390 + }, + { + "epoch": 2.23, + "learning_rate": 9.37635985005304e-07, + "logits/chosen": -2.4728264808654785, + "logits/rejected": -1.6917505264282227, + "logps/chosen": -710.130126953125, + "logps/rejected": -2183.414794921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.379796028137207, + "rewards/margins": 15.003911972045898, + "rewards/rejected": -21.383710861206055, + "step": 37400 + }, + { + "epoch": 2.23, + "learning_rate": 9.362822335518062e-07, + "logits/chosen": -2.518205165863037, + "logits/rejected": -1.8067184686660767, + "logps/chosen": -728.3243408203125, + "logps/rejected": -2177.763671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.60095739364624, + "rewards/margins": 14.737649917602539, + "rewards/rejected": -21.338603973388672, + "step": 37410 + }, + { + "epoch": 2.23, + "learning_rate": 9.349292348720232e-07, + "logits/chosen": -2.465925455093384, + "logits/rejected": -1.691076636314392, + "logps/chosen": -722.7376708984375, + "logps/rejected": -2085.874267578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.536993980407715, + "rewards/margins": 13.874361991882324, + "rewards/rejected": -20.411357879638672, + "step": 37420 + }, + { + "epoch": 2.23, + "learning_rate": 9.335769896172855e-07, + "logits/chosen": -2.5255231857299805, + "logits/rejected": -1.8543819189071655, + "logps/chosen": -725.6943359375, + "logps/rejected": -2156.857666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.540258884429932, + "rewards/margins": 14.587350845336914, + "rewards/rejected": -21.127609252929688, + "step": 37430 + }, + { + "epoch": 2.23, + "learning_rate": 9.322254984385651e-07, + "logits/chosen": -2.5251567363739014, + "logits/rejected": -1.9153945446014404, + "logps/chosen": -735.2384643554688, + "logps/rejected": -2203.117919921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.676271915435791, + "rewards/margins": 14.9146728515625, + "rewards/rejected": -21.590944290161133, + "step": 37440 + }, + { + "epoch": 2.23, + "learning_rate": 9.308747619864697e-07, + "logits/chosen": -2.516213893890381, + "logits/rejected": -1.8973493576049805, + "logps/chosen": -711.5090942382812, + "logps/rejected": -2204.69384765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.383312225341797, + "rewards/margins": 15.218721389770508, + "rewards/rejected": -21.602035522460938, + "step": 37450 + }, + { + "epoch": 2.23, + "learning_rate": 9.29524780911244e-07, + "logits/chosen": -2.4813294410705566, + "logits/rejected": -1.6866264343261719, + "logps/chosen": -731.657958984375, + "logps/rejected": -2091.41015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.628629207611084, + "rewards/margins": 13.846954345703125, + "rewards/rejected": -20.475584030151367, + "step": 37460 + }, + { + "epoch": 2.23, + "learning_rate": 9.281755558627686e-07, + "logits/chosen": -2.536954402923584, + "logits/rejected": -1.8116832971572876, + "logps/chosen": -741.01708984375, + "logps/rejected": -2281.05810546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.618117332458496, + "rewards/margins": 15.740900039672852, + "rewards/rejected": -22.35901641845703, + "step": 37470 + }, + { + "epoch": 2.23, + "learning_rate": 9.268270874905605e-07, + "logits/chosen": -2.5372300148010254, + "logits/rejected": -1.7942394018173218, + "logps/chosen": -731.5686645507812, + "logps/rejected": -2220.330322265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.603410243988037, + "rewards/margins": 15.153742790222168, + "rewards/rejected": -21.757152557373047, + "step": 37480 + }, + { + "epoch": 2.24, + "learning_rate": 9.254793764437727e-07, + "logits/chosen": -2.5067615509033203, + "logits/rejected": -1.6499338150024414, + "logps/chosen": -735.7099609375, + "logps/rejected": -2161.46875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.626704216003418, + "rewards/margins": 14.5424222946167, + "rewards/rejected": -21.169126510620117, + "step": 37490 + }, + { + "epoch": 2.24, + "learning_rate": 9.241324233711929e-07, + "logits/chosen": -2.5178062915802, + "logits/rejected": -1.767287015914917, + "logps/chosen": -714.6408081054688, + "logps/rejected": -2156.423095703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.483917236328125, + "rewards/margins": 14.644407272338867, + "rewards/rejected": -21.128324508666992, + "step": 37500 + }, + { + "epoch": 2.24, + "learning_rate": 9.227862289212441e-07, + "logits/chosen": -2.480051279067993, + "logits/rejected": -1.7861436605453491, + "logps/chosen": -741.4561767578125, + "logps/rejected": -2061.916748046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.660578727722168, + "rewards/margins": 13.524615287780762, + "rewards/rejected": -20.185195922851562, + "step": 37510 + }, + { + "epoch": 2.24, + "learning_rate": 9.21440793741985e-07, + "logits/chosen": -2.5237064361572266, + "logits/rejected": -1.8330742120742798, + "logps/chosen": -705.3407592773438, + "logps/rejected": -2167.578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.304418563842773, + "rewards/margins": 14.940930366516113, + "rewards/rejected": -21.245349884033203, + "step": 37520 + }, + { + "epoch": 2.24, + "learning_rate": 9.200961184811075e-07, + "logits/chosen": -2.565438747406006, + "logits/rejected": -1.9078487157821655, + "logps/chosen": -736.0726318359375, + "logps/rejected": -2256.141357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.677733421325684, + "rewards/margins": 15.442973136901855, + "rewards/rejected": -22.120708465576172, + "step": 37530 + }, + { + "epoch": 2.24, + "learning_rate": 9.187522037859384e-07, + "logits/chosen": -2.5281260013580322, + "logits/rejected": -1.885392427444458, + "logps/chosen": -703.791748046875, + "logps/rejected": -2235.28759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.377402305603027, + "rewards/margins": 15.522496223449707, + "rewards/rejected": -21.899898529052734, + "step": 37540 + }, + { + "epoch": 2.24, + "learning_rate": 9.174090503034383e-07, + "logits/chosen": -2.512998580932617, + "logits/rejected": -1.8281065225601196, + "logps/chosen": -725.7396240234375, + "logps/rejected": -2199.32470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.565328121185303, + "rewards/margins": 14.996953010559082, + "rewards/rejected": -21.562280654907227, + "step": 37550 + }, + { + "epoch": 2.24, + "learning_rate": 9.160666586802011e-07, + "logits/chosen": -2.5227484703063965, + "logits/rejected": -1.7662267684936523, + "logps/chosen": -706.4242553710938, + "logps/rejected": -2199.369873046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.30528450012207, + "rewards/margins": 15.245796203613281, + "rewards/rejected": -21.551082611083984, + "step": 37560 + }, + { + "epoch": 2.24, + "learning_rate": 9.147250295624541e-07, + "logits/chosen": -2.5645358562469482, + "logits/rejected": -1.8765027523040771, + "logps/chosen": -733.20654296875, + "logps/rejected": -2238.46728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.653501987457275, + "rewards/margins": 15.2709379196167, + "rewards/rejected": -21.924442291259766, + "step": 37570 + }, + { + "epoch": 2.24, + "learning_rate": 9.13384163596058e-07, + "logits/chosen": -2.4690709114074707, + "logits/rejected": -1.731981635093689, + "logps/chosen": -729.3167114257812, + "logps/rejected": -2164.634521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.607320308685303, + "rewards/margins": 14.598272323608398, + "rewards/rejected": -21.20559310913086, + "step": 37580 + }, + { + "epoch": 2.24, + "learning_rate": 9.12044061426505e-07, + "logits/chosen": -2.514566659927368, + "logits/rejected": -1.7721261978149414, + "logps/chosen": -704.80322265625, + "logps/rejected": -2248.976318359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.313331127166748, + "rewards/margins": 15.72264575958252, + "rewards/rejected": -22.03597640991211, + "step": 37590 + }, + { + "epoch": 2.24, + "learning_rate": 9.107047236989209e-07, + "logits/chosen": -2.571288585662842, + "logits/rejected": -1.916999101638794, + "logps/chosen": -719.41796875, + "logps/rejected": -2175.72021484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.485355377197266, + "rewards/margins": 14.82409954071045, + "rewards/rejected": -21.309459686279297, + "step": 37600 + }, + { + "epoch": 2.24, + "learning_rate": 9.093661510580631e-07, + "logits/chosen": -2.4722955226898193, + "logits/rejected": -1.8300533294677734, + "logps/chosen": -741.0028076171875, + "logps/rejected": -2186.212158203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.703488826751709, + "rewards/margins": 14.721982955932617, + "rewards/rejected": -21.425472259521484, + "step": 37610 + }, + { + "epoch": 2.24, + "learning_rate": 9.080283441483182e-07, + "logits/chosen": -2.540046215057373, + "logits/rejected": -1.7650047540664673, + "logps/chosen": -709.1553955078125, + "logps/rejected": -2279.093017578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.374925136566162, + "rewards/margins": 15.975756645202637, + "rewards/rejected": -22.35068130493164, + "step": 37620 + }, + { + "epoch": 2.24, + "learning_rate": 9.066913036137101e-07, + "logits/chosen": -2.5188536643981934, + "logits/rejected": -1.866044282913208, + "logps/chosen": -730.8206176757812, + "logps/rejected": -2182.795166015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.624661922454834, + "rewards/margins": 14.77466869354248, + "rewards/rejected": -21.399328231811523, + "step": 37630 + }, + { + "epoch": 2.24, + "learning_rate": 9.053550300978861e-07, + "logits/chosen": -2.497671604156494, + "logits/rejected": -1.7493482828140259, + "logps/chosen": -731.2322998046875, + "logps/rejected": -2152.53857421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.597164154052734, + "rewards/margins": 14.479527473449707, + "rewards/rejected": -21.07669448852539, + "step": 37640 + }, + { + "epoch": 2.25, + "learning_rate": 9.040195242441322e-07, + "logits/chosen": -2.517796039581299, + "logits/rejected": -1.8691667318344116, + "logps/chosen": -735.2828369140625, + "logps/rejected": -2086.14892578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.622035026550293, + "rewards/margins": 13.801767349243164, + "rewards/rejected": -20.42380142211914, + "step": 37650 + }, + { + "epoch": 2.25, + "learning_rate": 9.026847866953572e-07, + "logits/chosen": -2.5182719230651855, + "logits/rejected": -1.8351118564605713, + "logps/chosen": -707.8956298828125, + "logps/rejected": -2168.257568359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.347365379333496, + "rewards/margins": 14.88514518737793, + "rewards/rejected": -21.232511520385742, + "step": 37660 + }, + { + "epoch": 2.25, + "learning_rate": 9.013508180941072e-07, + "logits/chosen": -2.5214264392852783, + "logits/rejected": -1.773233413696289, + "logps/chosen": -734.0287475585938, + "logps/rejected": -2191.10595703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.637673854827881, + "rewards/margins": 14.8290433883667, + "rewards/rejected": -21.466716766357422, + "step": 37670 + }, + { + "epoch": 2.25, + "learning_rate": 9.000176190825513e-07, + "logits/chosen": -2.5583293437957764, + "logits/rejected": -1.9252004623413086, + "logps/chosen": -696.3009033203125, + "logps/rejected": -2176.22998046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.279958248138428, + "rewards/margins": 15.044161796569824, + "rewards/rejected": -21.324121475219727, + "step": 37680 + }, + { + "epoch": 2.25, + "learning_rate": 8.986851903024949e-07, + "logits/chosen": -2.513519763946533, + "logits/rejected": -1.8055671453475952, + "logps/chosen": -736.10791015625, + "logps/rejected": -2302.35546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.642978668212891, + "rewards/margins": 15.93378734588623, + "rewards/rejected": -22.576766967773438, + "step": 37690 + }, + { + "epoch": 2.25, + "learning_rate": 8.973535323953667e-07, + "logits/chosen": -2.489487648010254, + "logits/rejected": -1.839775800704956, + "logps/chosen": -708.10888671875, + "logps/rejected": -2198.703369140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.42333984375, + "rewards/margins": 15.121208190917969, + "rewards/rejected": -21.5445499420166, + "step": 37700 + }, + { + "epoch": 2.25, + "learning_rate": 8.960226460022272e-07, + "logits/chosen": -2.502023220062256, + "logits/rejected": -1.685426950454712, + "logps/chosen": -706.6154174804688, + "logps/rejected": -2234.636474609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.372422218322754, + "rewards/margins": 15.519307136535645, + "rewards/rejected": -21.891727447509766, + "step": 37710 + }, + { + "epoch": 2.25, + "learning_rate": 8.946925317637659e-07, + "logits/chosen": -2.5849204063415527, + "logits/rejected": -1.9189453125, + "logps/chosen": -708.2418212890625, + "logps/rejected": -2164.69189453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.408933162689209, + "rewards/margins": 14.794282913208008, + "rewards/rejected": -21.203218460083008, + "step": 37720 + }, + { + "epoch": 2.25, + "learning_rate": 8.933631903202991e-07, + "logits/chosen": -2.5179293155670166, + "logits/rejected": -1.852111577987671, + "logps/chosen": -744.8045654296875, + "logps/rejected": -2174.02587890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7375383377075195, + "rewards/margins": 14.560159683227539, + "rewards/rejected": -21.297697067260742, + "step": 37730 + }, + { + "epoch": 2.25, + "learning_rate": 8.920346223117721e-07, + "logits/chosen": -2.536513090133667, + "logits/rejected": -1.7527393102645874, + "logps/chosen": -715.6411743164062, + "logps/rejected": -2201.666259765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4575514793396, + "rewards/margins": 15.125028610229492, + "rewards/rejected": -21.58258056640625, + "step": 37740 + }, + { + "epoch": 2.25, + "learning_rate": 8.907068283777575e-07, + "logits/chosen": -2.462991237640381, + "logits/rejected": -1.7742106914520264, + "logps/chosen": -706.4168701171875, + "logps/rejected": -2221.854736328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.40169620513916, + "rewards/margins": 15.377695083618164, + "rewards/rejected": -21.779388427734375, + "step": 37750 + }, + { + "epoch": 2.25, + "learning_rate": 8.893798091574551e-07, + "logits/chosen": -2.520962715148926, + "logits/rejected": -1.7158886194229126, + "logps/chosen": -728.907958984375, + "logps/rejected": -2225.0625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.547557830810547, + "rewards/margins": 15.254701614379883, + "rewards/rejected": -21.802257537841797, + "step": 37760 + }, + { + "epoch": 2.25, + "learning_rate": 8.88053565289691e-07, + "logits/chosen": -2.5110671520233154, + "logits/rejected": -1.7666542530059814, + "logps/chosen": -719.0755615234375, + "logps/rejected": -2120.802001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.503350734710693, + "rewards/margins": 14.259733200073242, + "rewards/rejected": -20.763084411621094, + "step": 37770 + }, + { + "epoch": 2.25, + "learning_rate": 8.86728097412922e-07, + "logits/chosen": -2.570711135864258, + "logits/rejected": -1.9758965969085693, + "logps/chosen": -716.4061889648438, + "logps/rejected": -2237.69091796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.465239524841309, + "rewards/margins": 15.46312427520752, + "rewards/rejected": -21.928363800048828, + "step": 37780 + }, + { + "epoch": 2.25, + "learning_rate": 8.854034061652253e-07, + "logits/chosen": -2.5117740631103516, + "logits/rejected": -1.838963508605957, + "logps/chosen": -714.9739990234375, + "logps/rejected": -2108.19873046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.447528839111328, + "rewards/margins": 14.208045959472656, + "rewards/rejected": -20.655574798583984, + "step": 37790 + }, + { + "epoch": 2.25, + "learning_rate": 8.840794921843085e-07, + "logits/chosen": -2.559882640838623, + "logits/rejected": -1.833974838256836, + "logps/chosen": -713.0474853515625, + "logps/rejected": -2120.540283203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.425692081451416, + "rewards/margins": 14.336298942565918, + "rewards/rejected": -20.76198959350586, + "step": 37800 + }, + { + "epoch": 2.25, + "learning_rate": 8.827563561075034e-07, + "logits/chosen": -2.5010132789611816, + "logits/rejected": -1.91107177734375, + "logps/chosen": -727.3428344726562, + "logps/rejected": -2145.694580078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.557975769042969, + "rewards/margins": 14.451150894165039, + "rewards/rejected": -21.009124755859375, + "step": 37810 + }, + { + "epoch": 2.26, + "learning_rate": 8.814339985717677e-07, + "logits/chosen": -2.48629093170166, + "logits/rejected": -1.8288094997406006, + "logps/chosen": -728.4171752929688, + "logps/rejected": -2156.71533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.59328556060791, + "rewards/margins": 14.544305801391602, + "rewards/rejected": -21.137592315673828, + "step": 37820 + }, + { + "epoch": 2.26, + "learning_rate": 8.801124202136846e-07, + "logits/chosen": -2.584961175918579, + "logits/rejected": -1.8711068630218506, + "logps/chosen": -722.8931884765625, + "logps/rejected": -2092.774658203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5474700927734375, + "rewards/margins": 13.931363105773926, + "rewards/rejected": -20.47883415222168, + "step": 37830 + }, + { + "epoch": 2.26, + "learning_rate": 8.78791621669462e-07, + "logits/chosen": -2.530846118927002, + "logits/rejected": -1.8458125591278076, + "logps/chosen": -699.2386474609375, + "logps/rejected": -2207.48779296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.253523349761963, + "rewards/margins": 15.363009452819824, + "rewards/rejected": -21.616533279418945, + "step": 37840 + }, + { + "epoch": 2.26, + "learning_rate": 8.774716035749317e-07, + "logits/chosen": -2.528010845184326, + "logits/rejected": -1.8315051794052124, + "logps/chosen": -714.7796630859375, + "logps/rejected": -2214.796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.420474052429199, + "rewards/margins": 15.271074295043945, + "rewards/rejected": -21.69154930114746, + "step": 37850 + }, + { + "epoch": 2.26, + "learning_rate": 8.761523665655508e-07, + "logits/chosen": -2.4798450469970703, + "logits/rejected": -1.8496472835540771, + "logps/chosen": -734.022705078125, + "logps/rejected": -2198.13525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.638629913330078, + "rewards/margins": 14.898078918457031, + "rewards/rejected": -21.53670883178711, + "step": 37860 + }, + { + "epoch": 2.26, + "learning_rate": 8.748339112764001e-07, + "logits/chosen": -2.5105466842651367, + "logits/rejected": -1.663021445274353, + "logps/chosen": -697.369384765625, + "logps/rejected": -2157.58642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.263101100921631, + "rewards/margins": 14.864538192749023, + "rewards/rejected": -21.127643585205078, + "step": 37870 + }, + { + "epoch": 2.26, + "learning_rate": 8.735162383421844e-07, + "logits/chosen": -2.570192813873291, + "logits/rejected": -1.9058055877685547, + "logps/chosen": -722.0862426757812, + "logps/rejected": -2232.13330078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.490262508392334, + "rewards/margins": 15.381251335144043, + "rewards/rejected": -21.871511459350586, + "step": 37880 + }, + { + "epoch": 2.26, + "learning_rate": 8.721993483972294e-07, + "logits/chosen": -2.5024659633636475, + "logits/rejected": -1.877826452255249, + "logps/chosen": -714.5592651367188, + "logps/rejected": -2175.566162109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.493513584136963, + "rewards/margins": 14.823905944824219, + "rewards/rejected": -21.31741714477539, + "step": 37890 + }, + { + "epoch": 2.26, + "learning_rate": 8.708832420754887e-07, + "logits/chosen": -2.5074691772460938, + "logits/rejected": -1.848022222518921, + "logps/chosen": -720.5335693359375, + "logps/rejected": -2232.403076171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.560563087463379, + "rewards/margins": 15.323348999023438, + "rewards/rejected": -21.883914947509766, + "step": 37900 + }, + { + "epoch": 2.26, + "learning_rate": 8.695679200105331e-07, + "logits/chosen": -2.570492744445801, + "logits/rejected": -1.9025917053222656, + "logps/chosen": -704.4030151367188, + "logps/rejected": -2118.67138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.280501365661621, + "rewards/margins": 14.457748413085938, + "rewards/rejected": -20.738248825073242, + "step": 37910 + }, + { + "epoch": 2.26, + "learning_rate": 8.682533828355616e-07, + "logits/chosen": -2.5141921043395996, + "logits/rejected": -1.7317581176757812, + "logps/chosen": -716.5859375, + "logps/rejected": -2187.91162109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.487110137939453, + "rewards/margins": 14.941373825073242, + "rewards/rejected": -21.428485870361328, + "step": 37920 + }, + { + "epoch": 2.26, + "learning_rate": 8.669396311833892e-07, + "logits/chosen": -2.5444583892822266, + "logits/rejected": -1.8238970041275024, + "logps/chosen": -734.4063110351562, + "logps/rejected": -2242.247314453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.629935264587402, + "rewards/margins": 15.350442886352539, + "rewards/rejected": -21.98038101196289, + "step": 37930 + }, + { + "epoch": 2.26, + "learning_rate": 8.65626665686459e-07, + "logits/chosen": -2.485975980758667, + "logits/rejected": -1.8547124862670898, + "logps/chosen": -758.6243286132812, + "logps/rejected": -2227.415283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.915868282318115, + "rewards/margins": 14.908365249633789, + "rewards/rejected": -21.824230194091797, + "step": 37940 + }, + { + "epoch": 2.26, + "learning_rate": 8.643144869768294e-07, + "logits/chosen": -2.4891104698181152, + "logits/rejected": -1.682451844215393, + "logps/chosen": -747.646240234375, + "logps/rejected": -2061.5546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.822812080383301, + "rewards/margins": 13.368721008300781, + "rewards/rejected": -20.191532135009766, + "step": 37950 + }, + { + "epoch": 2.26, + "learning_rate": 8.630030956861868e-07, + "logits/chosen": -2.5432801246643066, + "logits/rejected": -1.8034225702285767, + "logps/chosen": -715.78857421875, + "logps/rejected": -2110.205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5074663162231445, + "rewards/margins": 14.173876762390137, + "rewards/rejected": -20.68134117126465, + "step": 37960 + }, + { + "epoch": 2.26, + "learning_rate": 8.616924924458322e-07, + "logits/chosen": -2.567303419113159, + "logits/rejected": -1.8337905406951904, + "logps/chosen": -708.9171142578125, + "logps/rejected": -2190.35107421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.335551738739014, + "rewards/margins": 15.10987663269043, + "rewards/rejected": -21.4454288482666, + "step": 37970 + }, + { + "epoch": 2.26, + "learning_rate": 8.6038267788669e-07, + "logits/chosen": -2.5315568447113037, + "logits/rejected": -1.8628990650177002, + "logps/chosen": -732.7645874023438, + "logps/rejected": -2148.34619140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.61484432220459, + "rewards/margins": 14.42023754119873, + "rewards/rejected": -21.035079956054688, + "step": 37980 + }, + { + "epoch": 2.27, + "learning_rate": 8.590736526393073e-07, + "logits/chosen": -2.5350685119628906, + "logits/rejected": -1.7850357294082642, + "logps/chosen": -724.3500366210938, + "logps/rejected": -2159.003662109375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.577616214752197, + "rewards/margins": 14.566263198852539, + "rewards/rejected": -21.143878936767578, + "step": 37990 + }, + { + "epoch": 2.27, + "learning_rate": 8.577654173338456e-07, + "logits/chosen": -2.505831480026245, + "logits/rejected": -1.7446715831756592, + "logps/chosen": -710.57861328125, + "logps/rejected": -2168.42041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.400704383850098, + "rewards/margins": 14.849187850952148, + "rewards/rejected": -21.249889373779297, + "step": 38000 + }, + { + "epoch": 2.27, + "learning_rate": 8.56457972600093e-07, + "logits/chosen": -2.5254578590393066, + "logits/rejected": -1.71369206905365, + "logps/chosen": -721.3118286132812, + "logps/rejected": -2150.519775390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.519467830657959, + "rewards/margins": 14.521322250366211, + "rewards/rejected": -21.040790557861328, + "step": 38010 + }, + { + "epoch": 2.27, + "learning_rate": 8.551513190674495e-07, + "logits/chosen": -2.506855010986328, + "logits/rejected": -1.8534761667251587, + "logps/chosen": -722.5013427734375, + "logps/rejected": -2155.24609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.550764560699463, + "rewards/margins": 14.569560050964355, + "rewards/rejected": -21.120323181152344, + "step": 38020 + }, + { + "epoch": 2.27, + "learning_rate": 8.538454573649418e-07, + "logits/chosen": -2.5640835762023926, + "logits/rejected": -1.7992366552352905, + "logps/chosen": -715.6527099609375, + "logps/rejected": -2234.168212890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.446054935455322, + "rewards/margins": 15.449132919311523, + "rewards/rejected": -21.895187377929688, + "step": 38030 + }, + { + "epoch": 2.27, + "learning_rate": 8.525403881212083e-07, + "logits/chosen": -2.563072681427002, + "logits/rejected": -1.7344591617584229, + "logps/chosen": -720.7810668945312, + "logps/rejected": -2314.63037109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.46471643447876, + "rewards/margins": 16.23123550415039, + "rewards/rejected": -22.69594955444336, + "step": 38040 + }, + { + "epoch": 2.27, + "learning_rate": 8.512361119645126e-07, + "logits/chosen": -2.5398759841918945, + "logits/rejected": -1.7542839050292969, + "logps/chosen": -750.9197387695312, + "logps/rejected": -2173.557861328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.827166557312012, + "rewards/margins": 14.460957527160645, + "rewards/rejected": -21.288122177124023, + "step": 38050 + }, + { + "epoch": 2.27, + "learning_rate": 8.499326295227306e-07, + "logits/chosen": -2.4818930625915527, + "logits/rejected": -1.8044763803482056, + "logps/chosen": -723.7305297851562, + "logps/rejected": -2099.788330078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.5198869705200195, + "rewards/margins": 14.046224594116211, + "rewards/rejected": -20.566110610961914, + "step": 38060 + }, + { + "epoch": 2.27, + "learning_rate": 8.486299414233598e-07, + "logits/chosen": -2.535107135772705, + "logits/rejected": -1.938821792602539, + "logps/chosen": -760.8570556640625, + "logps/rejected": -2129.6171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.907894134521484, + "rewards/margins": 13.955995559692383, + "rewards/rejected": -20.863887786865234, + "step": 38070 + }, + { + "epoch": 2.27, + "learning_rate": 8.473280482935142e-07, + "logits/chosen": -2.543217182159424, + "logits/rejected": -1.8561338186264038, + "logps/chosen": -758.994140625, + "logps/rejected": -2159.17236328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.880544185638428, + "rewards/margins": 14.258976936340332, + "rewards/rejected": -21.139522552490234, + "step": 38080 + }, + { + "epoch": 2.27, + "learning_rate": 8.46026950759925e-07, + "logits/chosen": -2.490055561065674, + "logits/rejected": -1.8170207738876343, + "logps/chosen": -749.1339111328125, + "logps/rejected": -2223.68408203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.834752082824707, + "rewards/margins": 14.957258224487305, + "rewards/rejected": -21.792011260986328, + "step": 38090 + }, + { + "epoch": 2.27, + "learning_rate": 8.447266494489408e-07, + "logits/chosen": -2.4850401878356934, + "logits/rejected": -1.8005831241607666, + "logps/chosen": -762.1116333007812, + "logps/rejected": -2222.0078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.9452409744262695, + "rewards/margins": 14.831392288208008, + "rewards/rejected": -21.77663230895996, + "step": 38100 + }, + { + "epoch": 2.27, + "learning_rate": 8.434271449865264e-07, + "logits/chosen": -2.5139803886413574, + "logits/rejected": -1.869601845741272, + "logps/chosen": -752.2341918945312, + "logps/rejected": -2202.75439453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.790445804595947, + "rewards/margins": 14.796142578125, + "rewards/rejected": -21.586589813232422, + "step": 38110 + }, + { + "epoch": 2.27, + "learning_rate": 8.42128437998263e-07, + "logits/chosen": -2.517202615737915, + "logits/rejected": -1.6710660457611084, + "logps/chosen": -732.4031372070312, + "logps/rejected": -2240.42919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.62490701675415, + "rewards/margins": 15.337946891784668, + "rewards/rejected": -21.962854385375977, + "step": 38120 + }, + { + "epoch": 2.27, + "learning_rate": 8.408305291093488e-07, + "logits/chosen": -2.5236904621124268, + "logits/rejected": -1.7963062524795532, + "logps/chosen": -725.6438598632812, + "logps/rejected": -2171.649658203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.543889045715332, + "rewards/margins": 14.740564346313477, + "rewards/rejected": -21.28445053100586, + "step": 38130 + }, + { + "epoch": 2.27, + "learning_rate": 8.395334189445964e-07, + "logits/chosen": -2.5324926376342773, + "logits/rejected": -1.780417799949646, + "logps/chosen": -727.9900512695312, + "logps/rejected": -2045.999755859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.570314884185791, + "rewards/margins": 13.454150199890137, + "rewards/rejected": -20.024465560913086, + "step": 38140 + }, + { + "epoch": 2.27, + "learning_rate": 8.38237108128436e-07, + "logits/chosen": -2.520057201385498, + "logits/rejected": -1.8217836618423462, + "logps/chosen": -723.3458251953125, + "logps/rejected": -2169.7880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.495431423187256, + "rewards/margins": 14.760843276977539, + "rewards/rejected": -21.256275177001953, + "step": 38150 + }, + { + "epoch": 2.28, + "learning_rate": 8.369415972849087e-07, + "logits/chosen": -2.506354331970215, + "logits/rejected": -1.815625548362732, + "logps/chosen": -715.04833984375, + "logps/rejected": -2231.25830078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4731879234313965, + "rewards/margins": 15.399859428405762, + "rewards/rejected": -21.873046875, + "step": 38160 + }, + { + "epoch": 2.28, + "learning_rate": 8.356468870376771e-07, + "logits/chosen": -2.5018012523651123, + "logits/rejected": -1.6882257461547852, + "logps/chosen": -713.54443359375, + "logps/rejected": -2153.912353515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.470078468322754, + "rewards/margins": 14.626953125, + "rewards/rejected": -21.09703254699707, + "step": 38170 + }, + { + "epoch": 2.28, + "learning_rate": 8.343529780100113e-07, + "logits/chosen": -2.502939224243164, + "logits/rejected": -1.89645254611969, + "logps/chosen": -737.5704345703125, + "logps/rejected": -2264.918701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.665432929992676, + "rewards/margins": 15.536870956420898, + "rewards/rejected": -22.20230484008789, + "step": 38180 + }, + { + "epoch": 2.28, + "learning_rate": 8.330598708248011e-07, + "logits/chosen": -2.5476794242858887, + "logits/rejected": -1.858604073524475, + "logps/chosen": -727.0440673828125, + "logps/rejected": -2230.05908203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.576315402984619, + "rewards/margins": 15.272760391235352, + "rewards/rejected": -21.849075317382812, + "step": 38190 + }, + { + "epoch": 2.28, + "learning_rate": 8.317675661045479e-07, + "logits/chosen": -2.5434041023254395, + "logits/rejected": -1.7445876598358154, + "logps/chosen": -713.0278930664062, + "logps/rejected": -2225.42333984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.470430850982666, + "rewards/margins": 15.328956604003906, + "rewards/rejected": -21.799388885498047, + "step": 38200 + }, + { + "epoch": 2.28, + "learning_rate": 8.304760644713675e-07, + "logits/chosen": -2.510892152786255, + "logits/rejected": -1.8059310913085938, + "logps/chosen": -743.07666015625, + "logps/rejected": -2254.1064453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.704972267150879, + "rewards/margins": 15.386436462402344, + "rewards/rejected": -22.091405868530273, + "step": 38210 + }, + { + "epoch": 2.28, + "learning_rate": 8.291853665469887e-07, + "logits/chosen": -2.5294644832611084, + "logits/rejected": -1.8553285598754883, + "logps/chosen": -747.6575317382812, + "logps/rejected": -2267.631591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.7468061447143555, + "rewards/margins": 15.464347839355469, + "rewards/rejected": -22.21115493774414, + "step": 38220 + }, + { + "epoch": 2.28, + "learning_rate": 8.278954729527519e-07, + "logits/chosen": -2.536990165710449, + "logits/rejected": -1.8280856609344482, + "logps/chosen": -731.7210693359375, + "logps/rejected": -2227.785888671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.627243995666504, + "rewards/margins": 15.193563461303711, + "rewards/rejected": -21.82080841064453, + "step": 38230 + }, + { + "epoch": 2.28, + "learning_rate": 8.266063843096145e-07, + "logits/chosen": -2.531140089035034, + "logits/rejected": -1.803983449935913, + "logps/chosen": -748.5668334960938, + "logps/rejected": -2228.00732421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.749964714050293, + "rewards/margins": 15.080180168151855, + "rewards/rejected": -21.830142974853516, + "step": 38240 + }, + { + "epoch": 2.28, + "learning_rate": 8.253181012381409e-07, + "logits/chosen": -2.526803970336914, + "logits/rejected": -1.8709666728973389, + "logps/chosen": -740.4821166992188, + "logps/rejected": -2297.163330078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.721651554107666, + "rewards/margins": 15.786664009094238, + "rewards/rejected": -22.508312225341797, + "step": 38250 + }, + { + "epoch": 2.28, + "learning_rate": 8.240306243585134e-07, + "logits/chosen": -2.5084967613220215, + "logits/rejected": -1.8555558919906616, + "logps/chosen": -727.3374633789062, + "logps/rejected": -2189.015869140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.572603702545166, + "rewards/margins": 14.872970581054688, + "rewards/rejected": -21.445573806762695, + "step": 38260 + }, + { + "epoch": 2.28, + "learning_rate": 8.227439542905205e-07, + "logits/chosen": -2.51594877243042, + "logits/rejected": -1.8549867868423462, + "logps/chosen": -750.8283081054688, + "logps/rejected": -2180.138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.821230411529541, + "rewards/margins": 14.53428840637207, + "rewards/rejected": -21.355518341064453, + "step": 38270 + }, + { + "epoch": 2.28, + "learning_rate": 8.214580916535683e-07, + "logits/chosen": -2.557985544204712, + "logits/rejected": -1.8891137838363647, + "logps/chosen": -760.3651733398438, + "logps/rejected": -2166.1689453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.851373195648193, + "rewards/margins": 14.367718696594238, + "rewards/rejected": -21.21908950805664, + "step": 38280 + }, + { + "epoch": 2.28, + "learning_rate": 8.201730370666678e-07, + "logits/chosen": -2.548417806625366, + "logits/rejected": -1.7439244985580444, + "logps/chosen": -747.6202392578125, + "logps/rejected": -2153.97802734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.693364143371582, + "rewards/margins": 14.407681465148926, + "rewards/rejected": -21.10104751586914, + "step": 38290 + }, + { + "epoch": 2.28, + "learning_rate": 8.188887911484472e-07, + "logits/chosen": -2.5319712162017822, + "logits/rejected": -1.8771082162857056, + "logps/chosen": -737.5220947265625, + "logps/rejected": -2188.42529296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.6585798263549805, + "rewards/margins": 14.77696418762207, + "rewards/rejected": -21.435543060302734, + "step": 38300 + }, + { + "epoch": 2.28, + "learning_rate": 8.176053545171403e-07, + "logits/chosen": -2.544060230255127, + "logits/rejected": -1.9435135126113892, + "logps/chosen": -723.6801147460938, + "logps/rejected": -2111.0048828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.561923980712891, + "rewards/margins": 14.114557266235352, + "rewards/rejected": -20.67647933959961, + "step": 38310 + }, + { + "epoch": 2.29, + "learning_rate": 8.163227277905944e-07, + "logits/chosen": -2.5497193336486816, + "logits/rejected": -1.8878633975982666, + "logps/chosen": -708.8646240234375, + "logps/rejected": -2139.2392578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.367649078369141, + "rewards/margins": 14.5723295211792, + "rewards/rejected": -20.939979553222656, + "step": 38320 + }, + { + "epoch": 2.29, + "learning_rate": 8.150409115862659e-07, + "logits/chosen": -2.570258855819702, + "logits/rejected": -1.7880125045776367, + "logps/chosen": -731.5049438476562, + "logps/rejected": -2171.6953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.573633670806885, + "rewards/margins": 14.693957328796387, + "rewards/rejected": -21.267589569091797, + "step": 38330 + }, + { + "epoch": 2.29, + "learning_rate": 8.13759906521221e-07, + "logits/chosen": -2.536780834197998, + "logits/rejected": -1.8497234582901, + "logps/chosen": -722.6250610351562, + "logps/rejected": -2183.56591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.464898586273193, + "rewards/margins": 14.925959587097168, + "rewards/rejected": -21.390859603881836, + "step": 38340 + }, + { + "epoch": 2.29, + "learning_rate": 8.124797132121356e-07, + "logits/chosen": -2.498281240463257, + "logits/rejected": -1.837618112564087, + "logps/chosen": -720.0330810546875, + "logps/rejected": -2152.472412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.427992343902588, + "rewards/margins": 14.649510383605957, + "rewards/rejected": -21.07750129699707, + "step": 38350 + }, + { + "epoch": 2.29, + "learning_rate": 8.112003322752948e-07, + "logits/chosen": -2.5294504165649414, + "logits/rejected": -1.8457616567611694, + "logps/chosen": -728.0933227539062, + "logps/rejected": -2245.2138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.581995487213135, + "rewards/margins": 15.437357902526855, + "rewards/rejected": -22.019351959228516, + "step": 38360 + }, + { + "epoch": 2.29, + "learning_rate": 8.099217643265928e-07, + "logits/chosen": -2.5430562496185303, + "logits/rejected": -1.8251731395721436, + "logps/chosen": -754.1817626953125, + "logps/rejected": -2265.1875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.835334777832031, + "rewards/margins": 15.365948677062988, + "rewards/rejected": -22.201282501220703, + "step": 38370 + }, + { + "epoch": 2.29, + "learning_rate": 8.086440099815316e-07, + "logits/chosen": -2.4809441566467285, + "logits/rejected": -1.7571805715560913, + "logps/chosen": -715.2431030273438, + "logps/rejected": -2163.083740234375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.43996524810791, + "rewards/margins": 14.752532958984375, + "rewards/rejected": -21.1924991607666, + "step": 38380 + }, + { + "epoch": 2.29, + "learning_rate": 8.073670698552221e-07, + "logits/chosen": -2.5268023014068604, + "logits/rejected": -1.7401292324066162, + "logps/chosen": -671.8805541992188, + "logps/rejected": -2167.746826171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.051712512969971, + "rewards/margins": 15.184460639953613, + "rewards/rejected": -21.23617172241211, + "step": 38390 + }, + { + "epoch": 2.29, + "learning_rate": 8.06090944562385e-07, + "logits/chosen": -2.5341429710388184, + "logits/rejected": -1.8350276947021484, + "logps/chosen": -695.832275390625, + "logps/rejected": -2175.186279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.268303871154785, + "rewards/margins": 15.048113822937012, + "rewards/rejected": -21.316415786743164, + "step": 38400 + }, + { + "epoch": 2.29, + "learning_rate": 8.048156347173436e-07, + "logits/chosen": -2.512979030609131, + "logits/rejected": -1.8695533275604248, + "logps/chosen": -676.008056640625, + "logps/rejected": -2165.9892578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.076893329620361, + "rewards/margins": 15.143835067749023, + "rewards/rejected": -21.220726013183594, + "step": 38410 + }, + { + "epoch": 2.29, + "learning_rate": 8.035411409340349e-07, + "logits/chosen": -2.541781425476074, + "logits/rejected": -1.8524303436279297, + "logps/chosen": -704.2677001953125, + "logps/rejected": -2131.54248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.325687885284424, + "rewards/margins": 14.550466537475586, + "rewards/rejected": -20.876155853271484, + "step": 38420 + }, + { + "epoch": 2.29, + "learning_rate": 8.022674638259995e-07, + "logits/chosen": -2.5327036380767822, + "logits/rejected": -1.7495559453964233, + "logps/chosen": -687.6117553710938, + "logps/rejected": -2109.774169921875, + "loss": 0.0004, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2208733558654785, + "rewards/margins": 14.4512300491333, + "rewards/rejected": -20.672100067138672, + "step": 38430 + }, + { + "epoch": 2.29, + "learning_rate": 8.009946040063851e-07, + "logits/chosen": -2.581887722015381, + "logits/rejected": -1.9307100772857666, + "logps/chosen": -673.5958862304688, + "logps/rejected": -2181.810546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.051326274871826, + "rewards/margins": 15.317853927612305, + "rewards/rejected": -21.36918067932129, + "step": 38440 + }, + { + "epoch": 2.29, + "learning_rate": 7.997225620879467e-07, + "logits/chosen": -2.5105929374694824, + "logits/rejected": -1.8333219289779663, + "logps/chosen": -705.8868408203125, + "logps/rejected": -2161.941650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.282386779785156, + "rewards/margins": 14.876652717590332, + "rewards/rejected": -21.159038543701172, + "step": 38450 + }, + { + "epoch": 2.29, + "learning_rate": 7.984513386830453e-07, + "logits/chosen": -2.5080196857452393, + "logits/rejected": -1.8333479166030884, + "logps/chosen": -730.9208984375, + "logps/rejected": -2181.845947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.581265926361084, + "rewards/margins": 14.789886474609375, + "rewards/rejected": -21.371150970458984, + "step": 38460 + }, + { + "epoch": 2.29, + "learning_rate": 7.97180934403648e-07, + "logits/chosen": -2.529355525970459, + "logits/rejected": -1.8217957019805908, + "logps/chosen": -701.5067138671875, + "logps/rejected": -2225.19482421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.287501335144043, + "rewards/margins": 15.527273178100586, + "rewards/rejected": -21.814775466918945, + "step": 38470 + }, + { + "epoch": 2.29, + "learning_rate": 7.959113498613269e-07, + "logits/chosen": -2.526217222213745, + "logits/rejected": -1.892608404159546, + "logps/chosen": -704.0409545898438, + "logps/rejected": -2119.5390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.340024471282959, + "rewards/margins": 14.424115180969238, + "rewards/rejected": -20.76413917541504, + "step": 38480 + }, + { + "epoch": 2.3, + "learning_rate": 7.94642585667261e-07, + "logits/chosen": -2.558237075805664, + "logits/rejected": -1.8013198375701904, + "logps/chosen": -685.3840942382812, + "logps/rejected": -2159.985595703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.165187835693359, + "rewards/margins": 14.992793083190918, + "rewards/rejected": -21.157981872558594, + "step": 38490 + }, + { + "epoch": 2.3, + "learning_rate": 7.933746424322311e-07, + "logits/chosen": -2.5140581130981445, + "logits/rejected": -1.841234803199768, + "logps/chosen": -696.6787109375, + "logps/rejected": -2086.164306640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.255718231201172, + "rewards/margins": 14.171536445617676, + "rewards/rejected": -20.427255630493164, + "step": 38500 + }, + { + "epoch": 2.3, + "learning_rate": 7.921075207666284e-07, + "logits/chosen": -2.4938559532165527, + "logits/rejected": -1.8629333972930908, + "logps/chosen": -694.5936279296875, + "logps/rejected": -2146.305908203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.259693145751953, + "rewards/margins": 14.76745891571045, + "rewards/rejected": -21.027151107788086, + "step": 38510 + }, + { + "epoch": 2.3, + "learning_rate": 7.908412212804414e-07, + "logits/chosen": -2.5395686626434326, + "logits/rejected": -1.911163091659546, + "logps/chosen": -707.9913330078125, + "logps/rejected": -2168.073486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.31813907623291, + "rewards/margins": 14.912086486816406, + "rewards/rejected": -21.230224609375, + "step": 38520 + }, + { + "epoch": 2.3, + "learning_rate": 7.895757445832705e-07, + "logits/chosen": -2.533104419708252, + "logits/rejected": -1.895611047744751, + "logps/chosen": -706.1932983398438, + "logps/rejected": -2153.5390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.320770740509033, + "rewards/margins": 14.7695894241333, + "rewards/rejected": -21.09035873413086, + "step": 38530 + }, + { + "epoch": 2.3, + "learning_rate": 7.883110912843128e-07, + "logits/chosen": -2.5090956687927246, + "logits/rejected": -1.7868292331695557, + "logps/chosen": -669.8683471679688, + "logps/rejected": -2099.441650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.010306358337402, + "rewards/margins": 14.544822692871094, + "rewards/rejected": -20.555126190185547, + "step": 38540 + }, + { + "epoch": 2.3, + "learning_rate": 7.870472619923755e-07, + "logits/chosen": -2.5201163291931152, + "logits/rejected": -1.9786930084228516, + "logps/chosen": -700.6271362304688, + "logps/rejected": -2068.77880859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.285219192504883, + "rewards/margins": 13.94715404510498, + "rewards/rejected": -20.232372283935547, + "step": 38550 + }, + { + "epoch": 2.3, + "learning_rate": 7.857842573158627e-07, + "logits/chosen": -2.544520139694214, + "logits/rejected": -1.852985143661499, + "logps/chosen": -706.3917236328125, + "logps/rejected": -2157.5234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.299835205078125, + "rewards/margins": 14.82587718963623, + "rewards/rejected": -21.125713348388672, + "step": 38560 + }, + { + "epoch": 2.3, + "learning_rate": 7.845220778627885e-07, + "logits/chosen": -2.536050319671631, + "logits/rejected": -1.8891233205795288, + "logps/chosen": -689.8268432617188, + "logps/rejected": -2096.522705078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.200448036193848, + "rewards/margins": 14.325361251831055, + "rewards/rejected": -20.52581024169922, + "step": 38570 + }, + { + "epoch": 2.3, + "learning_rate": 7.832607242407631e-07, + "logits/chosen": -2.5206313133239746, + "logits/rejected": -1.7943315505981445, + "logps/chosen": -706.7557373046875, + "logps/rejected": -2115.19921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.342629432678223, + "rewards/margins": 14.360624313354492, + "rewards/rejected": -20.703256607055664, + "step": 38580 + }, + { + "epoch": 2.3, + "learning_rate": 7.820001970570032e-07, + "logits/chosen": -2.558527946472168, + "logits/rejected": -1.8443644046783447, + "logps/chosen": -686.2892456054688, + "logps/rejected": -2203.56591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.149890899658203, + "rewards/margins": 15.437710762023926, + "rewards/rejected": -21.587602615356445, + "step": 38590 + }, + { + "epoch": 2.3, + "learning_rate": 7.807404969183263e-07, + "logits/chosen": -2.5528969764709473, + "logits/rejected": -1.7397737503051758, + "logps/chosen": -680.8076782226562, + "logps/rejected": -2156.92919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.096771717071533, + "rewards/margins": 15.031291007995605, + "rewards/rejected": -21.128061294555664, + "step": 38600 + }, + { + "epoch": 2.3, + "learning_rate": 7.794816244311526e-07, + "logits/chosen": -2.48527193069458, + "logits/rejected": -1.8865489959716797, + "logps/chosen": -713.0977783203125, + "logps/rejected": -2103.06591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.456990718841553, + "rewards/margins": 14.121404647827148, + "rewards/rejected": -20.57839584350586, + "step": 38610 + }, + { + "epoch": 2.3, + "learning_rate": 7.782235802015029e-07, + "logits/chosen": -2.561066150665283, + "logits/rejected": -1.9824997186660767, + "logps/chosen": -707.6803588867188, + "logps/rejected": -2124.145751953125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.374934673309326, + "rewards/margins": 14.425455093383789, + "rewards/rejected": -20.80038833618164, + "step": 38620 + }, + { + "epoch": 2.3, + "learning_rate": 7.769663648349996e-07, + "logits/chosen": -2.5299839973449707, + "logits/rejected": -1.907524824142456, + "logps/chosen": -654.8927612304688, + "logps/rejected": -2115.3203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.825733661651611, + "rewards/margins": 14.886636734008789, + "rewards/rejected": -20.71236801147461, + "step": 38630 + }, + { + "epoch": 2.3, + "learning_rate": 7.757099789368663e-07, + "logits/chosen": -2.5757555961608887, + "logits/rejected": -1.9536259174346924, + "logps/chosen": -642.7420043945312, + "logps/rejected": -2117.395263671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.745285987854004, + "rewards/margins": 14.981664657592773, + "rewards/rejected": -20.726951599121094, + "step": 38640 + }, + { + "epoch": 2.3, + "learning_rate": 7.744544231119272e-07, + "logits/chosen": -2.5152909755706787, + "logits/rejected": -1.9150497913360596, + "logps/chosen": -673.583740234375, + "logps/rejected": -2108.483642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.011568546295166, + "rewards/margins": 14.620683670043945, + "rewards/rejected": -20.632251739501953, + "step": 38650 + }, + { + "epoch": 2.31, + "learning_rate": 7.73199697964607e-07, + "logits/chosen": -2.5627357959747314, + "logits/rejected": -1.9275022745132446, + "logps/chosen": -668.1753540039062, + "logps/rejected": -2104.96142578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.026721954345703, + "rewards/margins": 14.582321166992188, + "rewards/rejected": -20.60904312133789, + "step": 38660 + }, + { + "epoch": 2.31, + "learning_rate": 7.7194580409893e-07, + "logits/chosen": -2.5565619468688965, + "logits/rejected": -1.8636010885238647, + "logps/chosen": -652.6414794921875, + "logps/rejected": -2202.3251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.819362640380859, + "rewards/margins": 15.764991760253906, + "rewards/rejected": -21.584354400634766, + "step": 38670 + }, + { + "epoch": 2.31, + "learning_rate": 7.706927421185209e-07, + "logits/chosen": -2.5564892292022705, + "logits/rejected": -1.8774923086166382, + "logps/chosen": -670.2965698242188, + "logps/rejected": -2150.159423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.98051118850708, + "rewards/margins": 15.07934284210205, + "rewards/rejected": -21.059850692749023, + "step": 38680 + }, + { + "epoch": 2.31, + "learning_rate": 7.694405126266036e-07, + "logits/chosen": -2.53932785987854, + "logits/rejected": -1.884993553161621, + "logps/chosen": -667.7764892578125, + "logps/rejected": -2150.030029296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.052495956420898, + "rewards/margins": 15.001197814941406, + "rewards/rejected": -21.053695678710938, + "step": 38690 + }, + { + "epoch": 2.31, + "learning_rate": 7.681891162260016e-07, + "logits/chosen": -2.5621891021728516, + "logits/rejected": -1.7839912176132202, + "logps/chosen": -658.9844970703125, + "logps/rejected": -2102.4990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.850907802581787, + "rewards/margins": 14.72999382019043, + "rewards/rejected": -20.580900192260742, + "step": 38700 + }, + { + "epoch": 2.31, + "learning_rate": 7.669385535191368e-07, + "logits/chosen": -2.5264508724212646, + "logits/rejected": -1.8338420391082764, + "logps/chosen": -667.3245849609375, + "logps/rejected": -2167.691650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.905442237854004, + "rewards/margins": 15.3182954788208, + "rewards/rejected": -21.223735809326172, + "step": 38710 + }, + { + "epoch": 2.31, + "learning_rate": 7.656888251080305e-07, + "logits/chosen": -2.550063133239746, + "logits/rejected": -1.860245943069458, + "logps/chosen": -655.8526000976562, + "logps/rejected": -2171.71142578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.859973907470703, + "rewards/margins": 15.410726547241211, + "rewards/rejected": -21.270702362060547, + "step": 38720 + }, + { + "epoch": 2.31, + "learning_rate": 7.644399315943016e-07, + "logits/chosen": -2.5593628883361816, + "logits/rejected": -1.832029104232788, + "logps/chosen": -691.995849609375, + "logps/rejected": -2152.38232421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.246092319488525, + "rewards/margins": 14.8179349899292, + "rewards/rejected": -21.06402587890625, + "step": 38730 + }, + { + "epoch": 2.31, + "learning_rate": 7.631918735791672e-07, + "logits/chosen": -2.5543880462646484, + "logits/rejected": -1.899767279624939, + "logps/chosen": -692.1204833984375, + "logps/rejected": -2074.58154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.213570594787598, + "rewards/margins": 14.098838806152344, + "rewards/rejected": -20.312410354614258, + "step": 38740 + }, + { + "epoch": 2.31, + "learning_rate": 7.619446516634429e-07, + "logits/chosen": -2.543851137161255, + "logits/rejected": -1.8601911067962646, + "logps/chosen": -651.9461669921875, + "logps/rejected": -2111.60107421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.763935565948486, + "rewards/margins": 14.908683776855469, + "rewards/rejected": -20.672616958618164, + "step": 38750 + }, + { + "epoch": 2.31, + "learning_rate": 7.606982664475421e-07, + "logits/chosen": -2.5133023262023926, + "logits/rejected": -1.9421699047088623, + "logps/chosen": -694.59228515625, + "logps/rejected": -2176.534423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.204164981842041, + "rewards/margins": 15.104982376098633, + "rewards/rejected": -21.309146881103516, + "step": 38760 + }, + { + "epoch": 2.31, + "learning_rate": 7.594527185314718e-07, + "logits/chosen": -2.570805788040161, + "logits/rejected": -1.8200515508651733, + "logps/chosen": -673.2413330078125, + "logps/rejected": -2134.43505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.090987682342529, + "rewards/margins": 14.809173583984375, + "rewards/rejected": -20.900161743164062, + "step": 38770 + }, + { + "epoch": 2.31, + "learning_rate": 7.582080085148424e-07, + "logits/chosen": -2.523603916168213, + "logits/rejected": -2.0150654315948486, + "logps/chosen": -660.6112670898438, + "logps/rejected": -2212.49658203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.981015682220459, + "rewards/margins": 15.708106994628906, + "rewards/rejected": -21.689123153686523, + "step": 38780 + }, + { + "epoch": 2.31, + "learning_rate": 7.569641369968539e-07, + "logits/chosen": -2.552422285079956, + "logits/rejected": -1.7459228038787842, + "logps/chosen": -665.9141845703125, + "logps/rejected": -2112.92626953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.004570007324219, + "rewards/margins": 14.687994003295898, + "rewards/rejected": -20.692564010620117, + "step": 38790 + }, + { + "epoch": 2.31, + "learning_rate": 7.557211045763091e-07, + "logits/chosen": -2.570573329925537, + "logits/rejected": -1.8582693338394165, + "logps/chosen": -665.9134521484375, + "logps/rejected": -2148.268310546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.939862251281738, + "rewards/margins": 15.097574234008789, + "rewards/rejected": -21.037437438964844, + "step": 38800 + }, + { + "epoch": 2.31, + "learning_rate": 7.544789118516008e-07, + "logits/chosen": -2.5517072677612305, + "logits/rejected": -1.9170854091644287, + "logps/chosen": -659.5131225585938, + "logps/rejected": -2210.779541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.896266937255859, + "rewards/margins": 15.768628120422363, + "rewards/rejected": -21.664892196655273, + "step": 38810 + }, + { + "epoch": 2.31, + "learning_rate": 7.532375594207236e-07, + "logits/chosen": -2.536099910736084, + "logits/rejected": -1.9583457708358765, + "logps/chosen": -680.658203125, + "logps/rejected": -2038.019287109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.07900333404541, + "rewards/margins": 13.862098693847656, + "rewards/rejected": -19.941104888916016, + "step": 38820 + }, + { + "epoch": 2.32, + "learning_rate": 7.519970478812613e-07, + "logits/chosen": -2.5427753925323486, + "logits/rejected": -1.7937005758285522, + "logps/chosen": -688.011962890625, + "logps/rejected": -2159.6328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.184144020080566, + "rewards/margins": 14.961874008178711, + "rewards/rejected": -21.146018981933594, + "step": 38830 + }, + { + "epoch": 2.32, + "learning_rate": 7.507573778303998e-07, + "logits/chosen": -2.5169215202331543, + "logits/rejected": -1.809975028038025, + "logps/chosen": -670.2452392578125, + "logps/rejected": -2129.421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.010728359222412, + "rewards/margins": 14.835368156433105, + "rewards/rejected": -20.84609603881836, + "step": 38840 + }, + { + "epoch": 2.32, + "learning_rate": 7.495185498649132e-07, + "logits/chosen": -2.5648093223571777, + "logits/rejected": -1.955331802368164, + "logps/chosen": -670.6187744140625, + "logps/rejected": -2009.0941162109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.006081581115723, + "rewards/margins": 13.645207405090332, + "rewards/rejected": -19.651287078857422, + "step": 38850 + }, + { + "epoch": 2.32, + "learning_rate": 7.482805645811744e-07, + "logits/chosen": -2.570664882659912, + "logits/rejected": -1.9569017887115479, + "logps/chosen": -699.8510131835938, + "logps/rejected": -2134.140380859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.26072883605957, + "rewards/margins": 14.645414352416992, + "rewards/rejected": -20.906143188476562, + "step": 38860 + }, + { + "epoch": 2.32, + "learning_rate": 7.470434225751496e-07, + "logits/chosen": -2.552523136138916, + "logits/rejected": -1.7791494131088257, + "logps/chosen": -658.4082641601562, + "logps/rejected": -2144.572021484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.875483512878418, + "rewards/margins": 15.135801315307617, + "rewards/rejected": -21.01128387451172, + "step": 38870 + }, + { + "epoch": 2.32, + "learning_rate": 7.45807124442399e-07, + "logits/chosen": -2.524726390838623, + "logits/rejected": -1.9265167713165283, + "logps/chosen": -687.7662963867188, + "logps/rejected": -2121.894287109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.175998687744141, + "rewards/margins": 14.59907341003418, + "rewards/rejected": -20.775074005126953, + "step": 38880 + }, + { + "epoch": 2.32, + "learning_rate": 7.445716707780759e-07, + "logits/chosen": -2.521904945373535, + "logits/rejected": -1.814119577407837, + "logps/chosen": -688.5657958984375, + "logps/rejected": -2079.925537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.184310436248779, + "rewards/margins": 14.164441108703613, + "rewards/rejected": -20.348751068115234, + "step": 38890 + }, + { + "epoch": 2.32, + "learning_rate": 7.433370621769284e-07, + "logits/chosen": -2.5539934635162354, + "logits/rejected": -1.8851900100708008, + "logps/chosen": -656.7335815429688, + "logps/rejected": -2067.213134765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.91219425201416, + "rewards/margins": 14.33446979522705, + "rewards/rejected": -20.246662139892578, + "step": 38900 + }, + { + "epoch": 2.32, + "learning_rate": 7.421032992332967e-07, + "logits/chosen": -2.5200533866882324, + "logits/rejected": -1.8175128698349, + "logps/chosen": -661.944580078125, + "logps/rejected": -2225.583251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.920195579528809, + "rewards/margins": 15.890100479125977, + "rewards/rejected": -21.81029510498047, + "step": 38910 + }, + { + "epoch": 2.32, + "learning_rate": 7.408703825411143e-07, + "logits/chosen": -2.5592360496520996, + "logits/rejected": -1.8945798873901367, + "logps/chosen": -677.7496337890625, + "logps/rejected": -2080.44287109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.125694751739502, + "rewards/margins": 14.224268913269043, + "rewards/rejected": -20.34996223449707, + "step": 38920 + }, + { + "epoch": 2.32, + "learning_rate": 7.396383126939074e-07, + "logits/chosen": -2.513711929321289, + "logits/rejected": -1.9243488311767578, + "logps/chosen": -674.3538208007812, + "logps/rejected": -2153.87060546875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.059117317199707, + "rewards/margins": 15.031694412231445, + "rewards/rejected": -21.09081268310547, + "step": 38930 + }, + { + "epoch": 2.32, + "learning_rate": 7.384070902847943e-07, + "logits/chosen": -2.529837131500244, + "logits/rejected": -1.865599274635315, + "logps/chosen": -649.4803466796875, + "logps/rejected": -2143.75, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.795224666595459, + "rewards/margins": 15.18822956085205, + "rewards/rejected": -20.983455657958984, + "step": 38940 + }, + { + "epoch": 2.32, + "learning_rate": 7.371767159064852e-07, + "logits/chosen": -2.5182533264160156, + "logits/rejected": -1.7539870738983154, + "logps/chosen": -677.7423706054688, + "logps/rejected": -2051.6025390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.097284317016602, + "rewards/margins": 13.97950267791748, + "rewards/rejected": -20.076786041259766, + "step": 38950 + }, + { + "epoch": 2.32, + "learning_rate": 7.359471901512829e-07, + "logits/chosen": -2.5550053119659424, + "logits/rejected": -1.9205081462860107, + "logps/chosen": -666.1929321289062, + "logps/rejected": -2218.43701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.915627479553223, + "rewards/margins": 15.828897476196289, + "rewards/rejected": -21.744525909423828, + "step": 38960 + }, + { + "epoch": 2.32, + "learning_rate": 7.347185136110808e-07, + "logits/chosen": -2.5252838134765625, + "logits/rejected": -1.9169632196426392, + "logps/chosen": -655.7896118164062, + "logps/rejected": -2101.431396484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.865409851074219, + "rewards/margins": 14.71586799621582, + "rewards/rejected": -20.581275939941406, + "step": 38970 + }, + { + "epoch": 2.32, + "learning_rate": 7.334906868773636e-07, + "logits/chosen": -2.571465253829956, + "logits/rejected": -1.8925793170928955, + "logps/chosen": -648.6580810546875, + "logps/rejected": -2191.660400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.786550045013428, + "rewards/margins": 15.6945161819458, + "rewards/rejected": -21.481067657470703, + "step": 38980 + }, + { + "epoch": 2.32, + "learning_rate": 7.322637105412073e-07, + "logits/chosen": -2.546532154083252, + "logits/rejected": -1.875769853591919, + "logps/chosen": -671.1036376953125, + "logps/rejected": -2063.302978515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.023596286773682, + "rewards/margins": 14.175689697265625, + "rewards/rejected": -20.19928741455078, + "step": 38990 + }, + { + "epoch": 2.33, + "learning_rate": 7.31037585193278e-07, + "logits/chosen": -2.564723491668701, + "logits/rejected": -1.8775684833526611, + "logps/chosen": -678.227783203125, + "logps/rejected": -2186.669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.084209442138672, + "rewards/margins": 15.343286514282227, + "rewards/rejected": -21.427494049072266, + "step": 39000 + }, + { + "epoch": 2.33, + "learning_rate": 7.298123114238328e-07, + "logits/chosen": -2.5344481468200684, + "logits/rejected": -1.8354469537734985, + "logps/chosen": -677.1944580078125, + "logps/rejected": -2197.399658203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.011569976806641, + "rewards/margins": 15.514508247375488, + "rewards/rejected": -21.526079177856445, + "step": 39010 + }, + { + "epoch": 2.33, + "learning_rate": 7.285878898227183e-07, + "logits/chosen": -2.539456605911255, + "logits/rejected": -1.924538016319275, + "logps/chosen": -680.6695556640625, + "logps/rejected": -2111.193359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.110344409942627, + "rewards/margins": 14.559303283691406, + "rewards/rejected": -20.669649124145508, + "step": 39020 + }, + { + "epoch": 2.33, + "learning_rate": 7.273643209793719e-07, + "logits/chosen": -2.5166218280792236, + "logits/rejected": -1.8649117946624756, + "logps/chosen": -671.3375244140625, + "logps/rejected": -2116.99560546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.981575012207031, + "rewards/margins": 14.757479667663574, + "rewards/rejected": -20.739055633544922, + "step": 39030 + }, + { + "epoch": 2.33, + "learning_rate": 7.261416054828171e-07, + "logits/chosen": -2.5155413150787354, + "logits/rejected": -1.9146314859390259, + "logps/chosen": -651.9049072265625, + "logps/rejected": -2043.0650634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.826320171356201, + "rewards/margins": 14.154289245605469, + "rewards/rejected": -19.980609893798828, + "step": 39040 + }, + { + "epoch": 2.33, + "learning_rate": 7.249197439216724e-07, + "logits/chosen": -2.5274770259857178, + "logits/rejected": -1.859100103378296, + "logps/chosen": -678.9222412109375, + "logps/rejected": -2146.05517578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0869340896606445, + "rewards/margins": 14.937585830688477, + "rewards/rejected": -21.024520874023438, + "step": 39050 + }, + { + "epoch": 2.33, + "learning_rate": 7.236987368841386e-07, + "logits/chosen": -2.5604794025421143, + "logits/rejected": -1.9134271144866943, + "logps/chosen": -686.9732055664062, + "logps/rejected": -2002.3316650390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.143256187438965, + "rewards/margins": 13.433341979980469, + "rewards/rejected": -19.576597213745117, + "step": 39060 + }, + { + "epoch": 2.33, + "learning_rate": 7.22478584958012e-07, + "logits/chosen": -2.5511178970336914, + "logits/rejected": -1.900094985961914, + "logps/chosen": -682.5203857421875, + "logps/rejected": -2088.81982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.128782749176025, + "rewards/margins": 14.328320503234863, + "rewards/rejected": -20.457101821899414, + "step": 39070 + }, + { + "epoch": 2.33, + "learning_rate": 7.212592887306704e-07, + "logits/chosen": -2.5788841247558594, + "logits/rejected": -1.912670373916626, + "logps/chosen": -671.0408935546875, + "logps/rejected": -2134.37060546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.988811492919922, + "rewards/margins": 14.90312671661377, + "rewards/rejected": -20.89194107055664, + "step": 39080 + }, + { + "epoch": 2.33, + "learning_rate": 7.200408487890859e-07, + "logits/chosen": -2.520442247390747, + "logits/rejected": -1.9126670360565186, + "logps/chosen": -664.8240966796875, + "logps/rejected": -2151.142822265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9440412521362305, + "rewards/margins": 15.119768142700195, + "rewards/rejected": -21.06380844116211, + "step": 39090 + }, + { + "epoch": 2.33, + "learning_rate": 7.188232657198127e-07, + "logits/chosen": -2.554872751235962, + "logits/rejected": -1.7911760807037354, + "logps/chosen": -669.7029418945312, + "logps/rejected": -2054.42138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.964770317077637, + "rewards/margins": 14.133865356445312, + "rewards/rejected": -20.098634719848633, + "step": 39100 + }, + { + "epoch": 2.33, + "learning_rate": 7.176065401089982e-07, + "logits/chosen": -2.581632375717163, + "logits/rejected": -2.0018560886383057, + "logps/chosen": -692.9483032226562, + "logps/rejected": -2142.848388671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.269179344177246, + "rewards/margins": 14.703704833984375, + "rewards/rejected": -20.972885131835938, + "step": 39110 + }, + { + "epoch": 2.33, + "learning_rate": 7.163906725423717e-07, + "logits/chosen": -2.598691463470459, + "logits/rejected": -1.9071314334869385, + "logps/chosen": -648.2440185546875, + "logps/rejected": -2084.9287109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.73968505859375, + "rewards/margins": 14.667352676391602, + "rewards/rejected": -20.407039642333984, + "step": 39120 + }, + { + "epoch": 2.33, + "learning_rate": 7.151756636052529e-07, + "logits/chosen": -2.557190418243408, + "logits/rejected": -1.9260902404785156, + "logps/chosen": -695.8160400390625, + "logps/rejected": -2200.014404296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.202953338623047, + "rewards/margins": 15.362896919250488, + "rewards/rejected": -21.56585121154785, + "step": 39130 + }, + { + "epoch": 2.33, + "learning_rate": 7.139615138825467e-07, + "logits/chosen": -2.5542891025543213, + "logits/rejected": -1.7628132104873657, + "logps/chosen": -650.9346923828125, + "logps/rejected": -2058.7109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.741692066192627, + "rewards/margins": 14.397703170776367, + "rewards/rejected": -20.13939666748047, + "step": 39140 + }, + { + "epoch": 2.33, + "learning_rate": 7.127482239587449e-07, + "logits/chosen": -2.5782065391540527, + "logits/rejected": -1.9207903146743774, + "logps/chosen": -653.8247680664062, + "logps/rejected": -2210.55078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.80120849609375, + "rewards/margins": 15.859647750854492, + "rewards/rejected": -21.660858154296875, + "step": 39150 + }, + { + "epoch": 2.34, + "learning_rate": 7.115357944179254e-07, + "logits/chosen": -2.5261118412017822, + "logits/rejected": -1.7961629629135132, + "logps/chosen": -676.0054321289062, + "logps/rejected": -2122.737548828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.133852481842041, + "rewards/margins": 14.652666091918945, + "rewards/rejected": -20.786518096923828, + "step": 39160 + }, + { + "epoch": 2.34, + "learning_rate": 7.103242258437518e-07, + "logits/chosen": -2.5163989067077637, + "logits/rejected": -1.9281362295150757, + "logps/chosen": -648.349365234375, + "logps/rejected": -2151.756103515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.792256832122803, + "rewards/margins": 15.286336898803711, + "rewards/rejected": -21.078594207763672, + "step": 39170 + }, + { + "epoch": 2.34, + "learning_rate": 7.091135188194729e-07, + "logits/chosen": -2.5560102462768555, + "logits/rejected": -1.9403841495513916, + "logps/chosen": -670.3623657226562, + "logps/rejected": -2231.81689453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.965712070465088, + "rewards/margins": 15.901583671569824, + "rewards/rejected": -21.867298126220703, + "step": 39180 + }, + { + "epoch": 2.34, + "learning_rate": 7.07903673927923e-07, + "logits/chosen": -2.5857150554656982, + "logits/rejected": -2.0388970375061035, + "logps/chosen": -646.08544921875, + "logps/rejected": -2174.87841796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.769893169403076, + "rewards/margins": 15.538101196289062, + "rewards/rejected": -21.307992935180664, + "step": 39190 + }, + { + "epoch": 2.34, + "learning_rate": 7.066946917515219e-07, + "logits/chosen": -2.528567314147949, + "logits/rejected": -1.8855390548706055, + "logps/chosen": -672.52099609375, + "logps/rejected": -2125.958251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.023303031921387, + "rewards/margins": 14.793116569519043, + "rewards/rejected": -20.816417694091797, + "step": 39200 + }, + { + "epoch": 2.34, + "learning_rate": 7.054865728722732e-07, + "logits/chosen": -2.5057525634765625, + "logits/rejected": -1.861670732498169, + "logps/chosen": -640.703125, + "logps/rejected": -2101.37548828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.715482234954834, + "rewards/margins": 14.862569808959961, + "rewards/rejected": -20.578052520751953, + "step": 39210 + }, + { + "epoch": 2.34, + "learning_rate": 7.042793178717655e-07, + "logits/chosen": -2.5089964866638184, + "logits/rejected": -1.8897607326507568, + "logps/chosen": -648.438232421875, + "logps/rejected": -2152.20458984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7402801513671875, + "rewards/margins": 15.342683792114258, + "rewards/rejected": -21.08296012878418, + "step": 39220 + }, + { + "epoch": 2.34, + "learning_rate": 7.030729273311712e-07, + "logits/chosen": -2.580747127532959, + "logits/rejected": -1.8208754062652588, + "logps/chosen": -683.0836791992188, + "logps/rejected": -2115.58642578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.137304306030273, + "rewards/margins": 14.578842163085938, + "rewards/rejected": -20.71614646911621, + "step": 39230 + }, + { + "epoch": 2.34, + "learning_rate": 7.018674018312468e-07, + "logits/chosen": -2.5379836559295654, + "logits/rejected": -1.9375041723251343, + "logps/chosen": -659.7002563476562, + "logps/rejected": -2071.286376953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.923264980316162, + "rewards/margins": 14.348237991333008, + "rewards/rejected": -20.271503448486328, + "step": 39240 + }, + { + "epoch": 2.34, + "learning_rate": 7.006627419523323e-07, + "logits/chosen": -2.542153835296631, + "logits/rejected": -1.878148078918457, + "logps/chosen": -667.1170654296875, + "logps/rejected": -2146.899169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9784369468688965, + "rewards/margins": 15.043935775756836, + "rewards/rejected": -21.02237319946289, + "step": 39250 + }, + { + "epoch": 2.34, + "learning_rate": 6.994589482743508e-07, + "logits/chosen": -2.535680055618286, + "logits/rejected": -1.9198541641235352, + "logps/chosen": -669.983154296875, + "logps/rejected": -2190.936279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.967162132263184, + "rewards/margins": 15.511775016784668, + "rewards/rejected": -21.47893714904785, + "step": 39260 + }, + { + "epoch": 2.34, + "learning_rate": 6.982560213768088e-07, + "logits/chosen": -2.4907948970794678, + "logits/rejected": -1.8869683742523193, + "logps/chosen": -664.0499877929688, + "logps/rejected": -2065.7685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.941020488739014, + "rewards/margins": 14.274055480957031, + "rewards/rejected": -20.215078353881836, + "step": 39270 + }, + { + "epoch": 2.34, + "learning_rate": 6.970539618387951e-07, + "logits/chosen": -2.5178561210632324, + "logits/rejected": -1.8542531728744507, + "logps/chosen": -685.3580322265625, + "logps/rejected": -2161.12744140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.133646011352539, + "rewards/margins": 15.041854858398438, + "rewards/rejected": -21.17550277709961, + "step": 39280 + }, + { + "epoch": 2.34, + "learning_rate": 6.958527702389811e-07, + "logits/chosen": -2.5423758029937744, + "logits/rejected": -1.8733127117156982, + "logps/chosen": -687.9133911132812, + "logps/rejected": -2196.70751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.107691764831543, + "rewards/margins": 15.409403800964355, + "rewards/rejected": -21.51709747314453, + "step": 39290 + }, + { + "epoch": 2.34, + "learning_rate": 6.946524471556212e-07, + "logits/chosen": -2.5353152751922607, + "logits/rejected": -1.884173035621643, + "logps/chosen": -678.96142578125, + "logps/rejected": -2147.39990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0769171714782715, + "rewards/margins": 14.942889213562012, + "rewards/rejected": -21.019805908203125, + "step": 39300 + }, + { + "epoch": 2.34, + "learning_rate": 6.934529931665488e-07, + "logits/chosen": -2.5543417930603027, + "logits/rejected": -1.8624547719955444, + "logps/chosen": -669.960205078125, + "logps/rejected": -2054.840087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.985145092010498, + "rewards/margins": 14.120737075805664, + "rewards/rejected": -20.105880737304688, + "step": 39310 + }, + { + "epoch": 2.34, + "learning_rate": 6.922544088491839e-07, + "logits/chosen": -2.5372540950775146, + "logits/rejected": -1.9245887994766235, + "logps/chosen": -660.9759521484375, + "logps/rejected": -2186.8671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.910842418670654, + "rewards/margins": 15.519277572631836, + "rewards/rejected": -21.43012237548828, + "step": 39320 + }, + { + "epoch": 2.35, + "learning_rate": 6.91056694780522e-07, + "logits/chosen": -2.5517711639404297, + "logits/rejected": -1.8531303405761719, + "logps/chosen": -648.9000244140625, + "logps/rejected": -2099.010986328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.767354488372803, + "rewards/margins": 14.770315170288086, + "rewards/rejected": -20.537670135498047, + "step": 39330 + }, + { + "epoch": 2.35, + "learning_rate": 6.898598515371457e-07, + "logits/chosen": -2.5421319007873535, + "logits/rejected": -1.8280003070831299, + "logps/chosen": -703.7599487304688, + "logps/rejected": -2160.73486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.307694435119629, + "rewards/margins": 14.84901237487793, + "rewards/rejected": -21.156705856323242, + "step": 39340 + }, + { + "epoch": 2.35, + "learning_rate": 6.886638796952125e-07, + "logits/chosen": -2.5265917778015137, + "logits/rejected": -1.83309006690979, + "logps/chosen": -666.5829467773438, + "logps/rejected": -2125.12158203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.009729862213135, + "rewards/margins": 14.783709526062012, + "rewards/rejected": -20.793437957763672, + "step": 39350 + }, + { + "epoch": 2.35, + "learning_rate": 6.874687798304657e-07, + "logits/chosen": -2.5475051403045654, + "logits/rejected": -1.9183406829833984, + "logps/chosen": -674.0855712890625, + "logps/rejected": -2049.830078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.993624687194824, + "rewards/margins": 14.074501037597656, + "rewards/rejected": -20.068126678466797, + "step": 39360 + }, + { + "epoch": 2.35, + "learning_rate": 6.862745525182238e-07, + "logits/chosen": -2.5531437397003174, + "logits/rejected": -1.8896644115447998, + "logps/chosen": -679.6767578125, + "logps/rejected": -2164.595458984375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.036721706390381, + "rewards/margins": 15.160913467407227, + "rewards/rejected": -21.197635650634766, + "step": 39370 + }, + { + "epoch": 2.35, + "learning_rate": 6.850811983333908e-07, + "logits/chosen": -2.522662401199341, + "logits/rejected": -1.8452329635620117, + "logps/chosen": -666.8161010742188, + "logps/rejected": -2028.9342041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.011092662811279, + "rewards/margins": 13.834294319152832, + "rewards/rejected": -19.845386505126953, + "step": 39380 + }, + { + "epoch": 2.35, + "learning_rate": 6.83888717850445e-07, + "logits/chosen": -2.499213457107544, + "logits/rejected": -1.7278060913085938, + "logps/chosen": -662.1484375, + "logps/rejected": -2165.521728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.945497035980225, + "rewards/margins": 15.259023666381836, + "rewards/rejected": -21.20452117919922, + "step": 39390 + }, + { + "epoch": 2.35, + "learning_rate": 6.826971116434472e-07, + "logits/chosen": -2.5542054176330566, + "logits/rejected": -1.822199821472168, + "logps/chosen": -651.8155517578125, + "logps/rejected": -2156.18798828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7633466720581055, + "rewards/margins": 15.354108810424805, + "rewards/rejected": -21.11745834350586, + "step": 39400 + }, + { + "epoch": 2.35, + "learning_rate": 6.815063802860367e-07, + "logits/chosen": -2.5318989753723145, + "logits/rejected": -1.8050940036773682, + "logps/chosen": -665.7168579101562, + "logps/rejected": -2103.36962890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.948948860168457, + "rewards/margins": 14.641328811645508, + "rewards/rejected": -20.590274810791016, + "step": 39410 + }, + { + "epoch": 2.35, + "learning_rate": 6.803165243514315e-07, + "logits/chosen": -2.526654005050659, + "logits/rejected": -1.846509575843811, + "logps/chosen": -663.8825073242188, + "logps/rejected": -2051.2890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.944726943969727, + "rewards/margins": 14.128433227539062, + "rewards/rejected": -20.073162078857422, + "step": 39420 + }, + { + "epoch": 2.35, + "learning_rate": 6.791275444124281e-07, + "logits/chosen": -2.516608715057373, + "logits/rejected": -1.943347692489624, + "logps/chosen": -665.4847412109375, + "logps/rejected": -2112.25244140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9867987632751465, + "rewards/margins": 14.692715644836426, + "rewards/rejected": -20.679515838623047, + "step": 39430 + }, + { + "epoch": 2.35, + "learning_rate": 6.779394410414019e-07, + "logits/chosen": -2.5670270919799805, + "logits/rejected": -1.9824306964874268, + "logps/chosen": -673.9586181640625, + "logps/rejected": -2237.943115234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.012306213378906, + "rewards/margins": 15.92284107208252, + "rewards/rejected": -21.93514633178711, + "step": 39440 + }, + { + "epoch": 2.35, + "learning_rate": 6.767522148103054e-07, + "logits/chosen": -2.548243999481201, + "logits/rejected": -1.8646514415740967, + "logps/chosen": -695.9085693359375, + "logps/rejected": -2133.444580078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.190523147583008, + "rewards/margins": 14.7002592086792, + "rewards/rejected": -20.890783309936523, + "step": 39450 + }, + { + "epoch": 2.35, + "learning_rate": 6.755658662906694e-07, + "logits/chosen": -2.584961414337158, + "logits/rejected": -1.8953145742416382, + "logps/chosen": -671.5704345703125, + "logps/rejected": -2277.098876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.043181419372559, + "rewards/margins": 16.279727935791016, + "rewards/rejected": -22.32291030883789, + "step": 39460 + }, + { + "epoch": 2.35, + "learning_rate": 6.743803960536025e-07, + "logits/chosen": -2.5466740131378174, + "logits/rejected": -1.9469455480575562, + "logps/chosen": -668.3674926757812, + "logps/rejected": -2152.106201171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.964564800262451, + "rewards/margins": 15.0975923538208, + "rewards/rejected": -21.062156677246094, + "step": 39470 + }, + { + "epoch": 2.35, + "learning_rate": 6.731958046697893e-07, + "logits/chosen": -2.520284414291382, + "logits/rejected": -1.8216865062713623, + "logps/chosen": -684.0017700195312, + "logps/rejected": -2201.16162109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.159753322601318, + "rewards/margins": 15.404215812683105, + "rewards/rejected": -21.563968658447266, + "step": 39480 + }, + { + "epoch": 2.35, + "learning_rate": 6.720120927094925e-07, + "logits/chosen": -2.6444082260131836, + "logits/rejected": -1.9810603857040405, + "logps/chosen": -683.5159912109375, + "logps/rejected": -2081.116943359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.075016021728516, + "rewards/margins": 14.290437698364258, + "rewards/rejected": -20.365453720092773, + "step": 39490 + }, + { + "epoch": 2.36, + "learning_rate": 6.70829260742551e-07, + "logits/chosen": -2.5447421073913574, + "logits/rejected": -1.971552848815918, + "logps/chosen": -670.7334594726562, + "logps/rejected": -2099.102294921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.019437313079834, + "rewards/margins": 14.534063339233398, + "rewards/rejected": -20.55350112915039, + "step": 39500 + }, + { + "epoch": 2.36, + "learning_rate": 6.696473093383798e-07, + "logits/chosen": -2.5328779220581055, + "logits/rejected": -1.8196414709091187, + "logps/chosen": -658.72705078125, + "logps/rejected": -2125.651123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.917784214019775, + "rewards/margins": 14.903582572937012, + "rewards/rejected": -20.821369171142578, + "step": 39510 + }, + { + "epoch": 2.36, + "learning_rate": 6.684662390659707e-07, + "logits/chosen": -2.530585765838623, + "logits/rejected": -1.9265453815460205, + "logps/chosen": -669.9471435546875, + "logps/rejected": -2216.24755859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.897751808166504, + "rewards/margins": 15.8187894821167, + "rewards/rejected": -21.716541290283203, + "step": 39520 + }, + { + "epoch": 2.36, + "learning_rate": 6.672860504938911e-07, + "logits/chosen": -2.5435848236083984, + "logits/rejected": -1.8361583948135376, + "logps/chosen": -676.405029296875, + "logps/rejected": -2091.71875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.055617332458496, + "rewards/margins": 14.391497611999512, + "rewards/rejected": -20.447114944458008, + "step": 39530 + }, + { + "epoch": 2.36, + "learning_rate": 6.66106744190283e-07, + "logits/chosen": -2.526468276977539, + "logits/rejected": -1.7837333679199219, + "logps/chosen": -685.8671875, + "logps/rejected": -2078.974853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.107000827789307, + "rewards/margins": 14.2470064163208, + "rewards/rejected": -20.354007720947266, + "step": 39540 + }, + { + "epoch": 2.36, + "learning_rate": 6.649283207228651e-07, + "logits/chosen": -2.5415425300598145, + "logits/rejected": -1.7997570037841797, + "logps/chosen": -665.3787841796875, + "logps/rejected": -1956.717529296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.962850093841553, + "rewards/margins": 13.172200202941895, + "rewards/rejected": -19.13504981994629, + "step": 39550 + }, + { + "epoch": 2.36, + "learning_rate": 6.6375078065893e-07, + "logits/chosen": -2.5124106407165527, + "logits/rejected": -1.8785184621810913, + "logps/chosen": -659.8665161132812, + "logps/rejected": -2089.43994140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.912564277648926, + "rewards/margins": 14.539380073547363, + "rewards/rejected": -20.451940536499023, + "step": 39560 + }, + { + "epoch": 2.36, + "learning_rate": 6.625741245653466e-07, + "logits/chosen": -2.5577805042266846, + "logits/rejected": -1.8549352884292603, + "logps/chosen": -642.42724609375, + "logps/rejected": -2191.24658203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.731416702270508, + "rewards/margins": 15.741093635559082, + "rewards/rejected": -21.472509384155273, + "step": 39570 + }, + { + "epoch": 2.36, + "learning_rate": 6.613983530085547e-07, + "logits/chosen": -2.59379243850708, + "logits/rejected": -1.841210126876831, + "logps/chosen": -679.364501953125, + "logps/rejected": -2117.02685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.035552978515625, + "rewards/margins": 14.69328784942627, + "rewards/rejected": -20.728839874267578, + "step": 39580 + }, + { + "epoch": 2.36, + "learning_rate": 6.602234665545737e-07, + "logits/chosen": -2.524651050567627, + "logits/rejected": -1.8758676052093506, + "logps/chosen": -655.4459228515625, + "logps/rejected": -2056.43017578125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.844720840454102, + "rewards/margins": 14.289587020874023, + "rewards/rejected": -20.134309768676758, + "step": 39590 + }, + { + "epoch": 2.36, + "learning_rate": 6.590494657689909e-07, + "logits/chosen": -2.532473087310791, + "logits/rejected": -1.944368600845337, + "logps/chosen": -655.5452270507812, + "logps/rejected": -2088.8681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8237504959106445, + "rewards/margins": 14.608685493469238, + "rewards/rejected": -20.432437896728516, + "step": 39600 + }, + { + "epoch": 2.36, + "learning_rate": 6.578763512169731e-07, + "logits/chosen": -2.6001856327056885, + "logits/rejected": -1.9491443634033203, + "logps/chosen": -668.9898681640625, + "logps/rejected": -2151.61376953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.006375789642334, + "rewards/margins": 15.066655158996582, + "rewards/rejected": -21.073030471801758, + "step": 39610 + }, + { + "epoch": 2.36, + "learning_rate": 6.567041234632552e-07, + "logits/chosen": -2.556574821472168, + "logits/rejected": -1.9430429935455322, + "logps/chosen": -673.6983642578125, + "logps/rejected": -2135.30517578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.956656455993652, + "rewards/margins": 14.960382461547852, + "rewards/rejected": -20.917037963867188, + "step": 39620 + }, + { + "epoch": 2.36, + "learning_rate": 6.5553278307215e-07, + "logits/chosen": -2.5865983963012695, + "logits/rejected": -1.9712164402008057, + "logps/chosen": -644.5958251953125, + "logps/rejected": -2150.2001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.793044090270996, + "rewards/margins": 15.249975204467773, + "rewards/rejected": -21.043018341064453, + "step": 39630 + }, + { + "epoch": 2.36, + "learning_rate": 6.543623306075381e-07, + "logits/chosen": -2.4868857860565186, + "logits/rejected": -1.9325320720672607, + "logps/chosen": -687.1781005859375, + "logps/rejected": -2114.77197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.148580074310303, + "rewards/margins": 14.558433532714844, + "rewards/rejected": -20.707014083862305, + "step": 39640 + }, + { + "epoch": 2.36, + "learning_rate": 6.531927666328783e-07, + "logits/chosen": -2.559415340423584, + "logits/rejected": -1.8565948009490967, + "logps/chosen": -654.01123046875, + "logps/rejected": -2114.96044921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.874107360839844, + "rewards/margins": 14.841914176940918, + "rewards/rejected": -20.716022491455078, + "step": 39650 + }, + { + "epoch": 2.36, + "learning_rate": 6.520240917111961e-07, + "logits/chosen": -2.567993640899658, + "logits/rejected": -1.8771969079971313, + "logps/chosen": -677.2086791992188, + "logps/rejected": -2156.58642578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.076774597167969, + "rewards/margins": 15.034812927246094, + "rewards/rejected": -21.111587524414062, + "step": 39660 + }, + { + "epoch": 2.37, + "learning_rate": 6.50856306405093e-07, + "logits/chosen": -2.5651211738586426, + "logits/rejected": -1.8964052200317383, + "logps/chosen": -672.4000244140625, + "logps/rejected": -2106.583251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.043793678283691, + "rewards/margins": 14.580635070800781, + "rewards/rejected": -20.62442970275879, + "step": 39670 + }, + { + "epoch": 2.37, + "learning_rate": 6.496894112767402e-07, + "logits/chosen": -2.5076441764831543, + "logits/rejected": -1.805936574935913, + "logps/chosen": -675.3057861328125, + "logps/rejected": -2160.37841796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.040703773498535, + "rewards/margins": 15.127603530883789, + "rewards/rejected": -21.168304443359375, + "step": 39680 + }, + { + "epoch": 2.37, + "learning_rate": 6.485234068878809e-07, + "logits/chosen": -2.509423017501831, + "logits/rejected": -1.8307613134384155, + "logps/chosen": -716.060302734375, + "logps/rejected": -2128.19677734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.406613826751709, + "rewards/margins": 14.426843643188477, + "rewards/rejected": -20.833457946777344, + "step": 39690 + }, + { + "epoch": 2.37, + "learning_rate": 6.473582937998305e-07, + "logits/chosen": -2.572984457015991, + "logits/rejected": -1.8662109375, + "logps/chosen": -664.7227172851562, + "logps/rejected": -2140.37158203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.940349102020264, + "rewards/margins": 15.025400161743164, + "rewards/rejected": -20.965749740600586, + "step": 39700 + }, + { + "epoch": 2.37, + "learning_rate": 6.461940725734733e-07, + "logits/chosen": -2.5790843963623047, + "logits/rejected": -1.8807485103607178, + "logps/chosen": -667.8521118164062, + "logps/rejected": -2158.401123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.964962005615234, + "rewards/margins": 15.178568840026855, + "rewards/rejected": -21.14353370666504, + "step": 39710 + }, + { + "epoch": 2.37, + "learning_rate": 6.450307437692663e-07, + "logits/chosen": -2.5657923221588135, + "logits/rejected": -1.906765341758728, + "logps/chosen": -654.51123046875, + "logps/rejected": -2184.843994140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.817798614501953, + "rewards/margins": 15.5824613571167, + "rewards/rejected": -21.400259017944336, + "step": 39720 + }, + { + "epoch": 2.37, + "learning_rate": 6.438683079472355e-07, + "logits/chosen": -2.5293002128601074, + "logits/rejected": -1.7791370153427124, + "logps/chosen": -674.0873413085938, + "logps/rejected": -2158.423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.039191246032715, + "rewards/margins": 15.106382369995117, + "rewards/rejected": -21.14557456970215, + "step": 39730 + }, + { + "epoch": 2.37, + "learning_rate": 6.427067656669774e-07, + "logits/chosen": -2.529872179031372, + "logits/rejected": -1.8373740911483765, + "logps/chosen": -672.3658447265625, + "logps/rejected": -2233.24267578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.013632774353027, + "rewards/margins": 15.8716402053833, + "rewards/rejected": -21.885272979736328, + "step": 39740 + }, + { + "epoch": 2.37, + "learning_rate": 6.415461174876589e-07, + "logits/chosen": -2.549093723297119, + "logits/rejected": -1.9165493249893188, + "logps/chosen": -659.6556396484375, + "logps/rejected": -2062.71728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.950013160705566, + "rewards/margins": 14.2465238571167, + "rewards/rejected": -20.196537017822266, + "step": 39750 + }, + { + "epoch": 2.37, + "learning_rate": 6.403863639680157e-07, + "logits/chosen": -2.5237433910369873, + "logits/rejected": -1.7914304733276367, + "logps/chosen": -669.5413818359375, + "logps/rejected": -2224.156494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.930664539337158, + "rewards/margins": 15.859189987182617, + "rewards/rejected": -21.789854049682617, + "step": 39760 + }, + { + "epoch": 2.37, + "learning_rate": 6.39227505666353e-07, + "logits/chosen": -2.504106283187866, + "logits/rejected": -1.8149387836456299, + "logps/chosen": -652.2661743164062, + "logps/rejected": -2132.4560546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8015851974487305, + "rewards/margins": 15.075976371765137, + "rewards/rejected": -20.877559661865234, + "step": 39770 + }, + { + "epoch": 2.37, + "learning_rate": 6.380695431405453e-07, + "logits/chosen": -2.5154988765716553, + "logits/rejected": -1.7628597021102905, + "logps/chosen": -661.4422607421875, + "logps/rejected": -2132.8818359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.892727851867676, + "rewards/margins": 14.992132186889648, + "rewards/rejected": -20.88486099243164, + "step": 39780 + }, + { + "epoch": 2.37, + "learning_rate": 6.369124769480362e-07, + "logits/chosen": -2.5303478240966797, + "logits/rejected": -1.895745873451233, + "logps/chosen": -671.6834716796875, + "logps/rejected": -2070.506591796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.036886692047119, + "rewards/margins": 14.22362232208252, + "rewards/rejected": -20.260509490966797, + "step": 39790 + }, + { + "epoch": 2.37, + "learning_rate": 6.357563076458367e-07, + "logits/chosen": -2.5655462741851807, + "logits/rejected": -1.8776273727416992, + "logps/chosen": -650.6311645507812, + "logps/rejected": -2134.80322265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.781595706939697, + "rewards/margins": 15.120488166809082, + "rewards/rejected": -20.902082443237305, + "step": 39800 + }, + { + "epoch": 2.37, + "learning_rate": 6.346010357905269e-07, + "logits/chosen": -2.551055431365967, + "logits/rejected": -1.843266248703003, + "logps/chosen": -685.201171875, + "logps/rejected": -2137.84326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.103305816650391, + "rewards/margins": 14.83387565612793, + "rewards/rejected": -20.937183380126953, + "step": 39810 + }, + { + "epoch": 2.37, + "learning_rate": 6.334466619382553e-07, + "logits/chosen": -2.541881561279297, + "logits/rejected": -1.9312191009521484, + "logps/chosen": -661.275390625, + "logps/rejected": -2178.17724609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.887155055999756, + "rewards/margins": 15.447463989257812, + "rewards/rejected": -21.334619522094727, + "step": 39820 + }, + { + "epoch": 2.38, + "learning_rate": 6.322931866447355e-07, + "logits/chosen": -2.5577340126037598, + "logits/rejected": -1.8853811025619507, + "logps/chosen": -652.7318115234375, + "logps/rejected": -2165.138916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8852996826171875, + "rewards/margins": 15.3257474899292, + "rewards/rejected": -21.211048126220703, + "step": 39830 + }, + { + "epoch": 2.38, + "learning_rate": 6.311406104652534e-07, + "logits/chosen": -2.5211634635925293, + "logits/rejected": -1.9146407842636108, + "logps/chosen": -667.1649169921875, + "logps/rejected": -2062.727783203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.010342597961426, + "rewards/margins": 14.178853034973145, + "rewards/rejected": -20.18919563293457, + "step": 39840 + }, + { + "epoch": 2.38, + "learning_rate": 6.299889339546561e-07, + "logits/chosen": -2.565214157104492, + "logits/rejected": -1.7974889278411865, + "logps/chosen": -645.8523559570312, + "logps/rejected": -2120.205322265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.760936737060547, + "rewards/margins": 14.985086441040039, + "rewards/rejected": -20.746021270751953, + "step": 39850 + }, + { + "epoch": 2.38, + "learning_rate": 6.288381576673635e-07, + "logits/chosen": -2.5365638732910156, + "logits/rejected": -1.8689873218536377, + "logps/chosen": -666.3363647460938, + "logps/rejected": -2134.64990234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9300994873046875, + "rewards/margins": 14.982144355773926, + "rewards/rejected": -20.912242889404297, + "step": 39860 + }, + { + "epoch": 2.38, + "learning_rate": 6.276882821573566e-07, + "logits/chosen": -2.551907777786255, + "logits/rejected": -1.9202972650527954, + "logps/chosen": -665.3922119140625, + "logps/rejected": -2170.24267578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.927990913391113, + "rewards/margins": 15.331537246704102, + "rewards/rejected": -21.2595272064209, + "step": 39870 + }, + { + "epoch": 2.38, + "learning_rate": 6.265393079781881e-07, + "logits/chosen": -2.5639090538024902, + "logits/rejected": -1.7160803079605103, + "logps/chosen": -668.14990234375, + "logps/rejected": -2120.21875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.036708354949951, + "rewards/margins": 14.731256484985352, + "rewards/rejected": -20.767967224121094, + "step": 39880 + }, + { + "epoch": 2.38, + "learning_rate": 6.253912356829714e-07, + "logits/chosen": -2.544442653656006, + "logits/rejected": -1.9543712139129639, + "logps/chosen": -679.6287231445312, + "logps/rejected": -2208.70458984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.107433319091797, + "rewards/margins": 15.540356636047363, + "rewards/rejected": -21.647790908813477, + "step": 39890 + }, + { + "epoch": 2.38, + "learning_rate": 6.242440658243915e-07, + "logits/chosen": -2.5682923793792725, + "logits/rejected": -1.8682053089141846, + "logps/chosen": -651.6925048828125, + "logps/rejected": -2167.776123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.834257125854492, + "rewards/margins": 15.395466804504395, + "rewards/rejected": -21.229724884033203, + "step": 39900 + }, + { + "epoch": 2.38, + "learning_rate": 6.230977989546936e-07, + "logits/chosen": -2.5802319049835205, + "logits/rejected": -1.9385267496109009, + "logps/chosen": -655.8024291992188, + "logps/rejected": -2095.6875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.792849540710449, + "rewards/margins": 14.706548690795898, + "rewards/rejected": -20.4993953704834, + "step": 39910 + }, + { + "epoch": 2.38, + "learning_rate": 6.219524356256915e-07, + "logits/chosen": -2.539188861846924, + "logits/rejected": -1.7813975811004639, + "logps/chosen": -654.1200561523438, + "logps/rejected": -2156.72412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.850498676300049, + "rewards/margins": 15.275280952453613, + "rewards/rejected": -21.12578010559082, + "step": 39920 + }, + { + "epoch": 2.38, + "learning_rate": 6.208079763887626e-07, + "logits/chosen": -2.5685811042785645, + "logits/rejected": -1.894280195236206, + "logps/chosen": -672.7545166015625, + "logps/rejected": -2121.81591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.020015716552734, + "rewards/margins": 14.745363235473633, + "rewards/rejected": -20.7653751373291, + "step": 39930 + }, + { + "epoch": 2.38, + "learning_rate": 6.196644217948503e-07, + "logits/chosen": -2.5459423065185547, + "logits/rejected": -1.871564269065857, + "logps/chosen": -668.6702270507812, + "logps/rejected": -2266.501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.95481014251709, + "rewards/margins": 16.26024627685547, + "rewards/rejected": -22.215055465698242, + "step": 39940 + }, + { + "epoch": 2.38, + "learning_rate": 6.185217723944611e-07, + "logits/chosen": -2.5385050773620605, + "logits/rejected": -1.9102964401245117, + "logps/chosen": -659.8077392578125, + "logps/rejected": -2090.675537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.930823802947998, + "rewards/margins": 14.5355806350708, + "rewards/rejected": -20.466405868530273, + "step": 39950 + }, + { + "epoch": 2.38, + "learning_rate": 6.173800287376669e-07, + "logits/chosen": -2.5481507778167725, + "logits/rejected": -1.9150851964950562, + "logps/chosen": -648.60546875, + "logps/rejected": -2098.1396484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7493767738342285, + "rewards/margins": 14.796626091003418, + "rewards/rejected": -20.546005249023438, + "step": 39960 + }, + { + "epoch": 2.38, + "learning_rate": 6.162391913741026e-07, + "logits/chosen": -2.5181758403778076, + "logits/rejected": -1.788903832435608, + "logps/chosen": -675.20751953125, + "logps/rejected": -2119.7412109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0218634605407715, + "rewards/margins": 14.725997924804688, + "rewards/rejected": -20.747859954833984, + "step": 39970 + }, + { + "epoch": 2.38, + "learning_rate": 6.150992608529672e-07, + "logits/chosen": -2.5476818084716797, + "logits/rejected": -1.8640422821044922, + "logps/chosen": -647.7337036132812, + "logps/rejected": -2032.181640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.786009788513184, + "rewards/margins": 14.101025581359863, + "rewards/rejected": -19.887033462524414, + "step": 39980 + }, + { + "epoch": 2.38, + "learning_rate": 6.139602377230247e-07, + "logits/chosen": -2.5597262382507324, + "logits/rejected": -1.954095482826233, + "logps/chosen": -661.2362670898438, + "logps/rejected": -2141.17919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8838887214660645, + "rewards/margins": 15.08173942565918, + "rewards/rejected": -20.96562957763672, + "step": 39990 + }, + { + "epoch": 2.39, + "learning_rate": 6.128221225325989e-07, + "logits/chosen": -2.5284276008605957, + "logits/rejected": -1.8891204595565796, + "logps/chosen": -652.3214111328125, + "logps/rejected": -2105.41650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.837202548980713, + "rewards/margins": 14.769612312316895, + "rewards/rejected": -20.606813430786133, + "step": 40000 + }, + { + "epoch": 2.39, + "learning_rate": 6.116849158295795e-07, + "logits/chosen": -2.5448451042175293, + "logits/rejected": -1.8701412677764893, + "logps/chosen": -684.8311157226562, + "logps/rejected": -2078.135498046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.116442680358887, + "rewards/margins": 14.223867416381836, + "rewards/rejected": -20.34031105041504, + "step": 40010 + }, + { + "epoch": 2.39, + "learning_rate": 6.105486181614176e-07, + "logits/chosen": -2.5468010902404785, + "logits/rejected": -1.8949766159057617, + "logps/chosen": -653.1161499023438, + "logps/rejected": -2202.064697265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.81935977935791, + "rewards/margins": 15.747883796691895, + "rewards/rejected": -21.567243576049805, + "step": 40020 + }, + { + "epoch": 2.39, + "learning_rate": 6.094132300751265e-07, + "logits/chosen": -2.582515239715576, + "logits/rejected": -1.8950328826904297, + "logps/chosen": -675.236328125, + "logps/rejected": -2166.549072265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.011897087097168, + "rewards/margins": 15.212732315063477, + "rewards/rejected": -21.224628448486328, + "step": 40030 + }, + { + "epoch": 2.39, + "learning_rate": 6.082787521172826e-07, + "logits/chosen": -2.5486831665039062, + "logits/rejected": -1.9423259496688843, + "logps/chosen": -659.12841796875, + "logps/rejected": -2096.708984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.90627384185791, + "rewards/margins": 14.618492126464844, + "rewards/rejected": -20.524765014648438, + "step": 40040 + }, + { + "epoch": 2.39, + "learning_rate": 6.071451848340235e-07, + "logits/chosen": -2.5685980319976807, + "logits/rejected": -1.9420028924942017, + "logps/chosen": -644.4766235351562, + "logps/rejected": -2234.71630859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.808290958404541, + "rewards/margins": 16.09633445739746, + "rewards/rejected": -21.904624938964844, + "step": 40050 + }, + { + "epoch": 2.39, + "learning_rate": 6.060125287710478e-07, + "logits/chosen": -2.5231456756591797, + "logits/rejected": -1.6511037349700928, + "logps/chosen": -677.8778076171875, + "logps/rejected": -2167.334228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.051547050476074, + "rewards/margins": 15.1865816116333, + "rewards/rejected": -21.238128662109375, + "step": 40060 + }, + { + "epoch": 2.39, + "learning_rate": 6.048807844736168e-07, + "logits/chosen": -2.5239322185516357, + "logits/rejected": -1.8350967168807983, + "logps/chosen": -659.1536865234375, + "logps/rejected": -2071.241943359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.926207542419434, + "rewards/margins": 14.347681999206543, + "rewards/rejected": -20.273889541625977, + "step": 40070 + }, + { + "epoch": 2.39, + "learning_rate": 6.037499524865523e-07, + "logits/chosen": -2.557682752609253, + "logits/rejected": -1.885286569595337, + "logps/chosen": -678.7664184570312, + "logps/rejected": -2151.758056640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.044146537780762, + "rewards/margins": 15.0208740234375, + "rewards/rejected": -21.065019607543945, + "step": 40080 + }, + { + "epoch": 2.39, + "learning_rate": 6.026200333542368e-07, + "logits/chosen": -2.5092387199401855, + "logits/rejected": -1.843560814857483, + "logps/chosen": -660.98388671875, + "logps/rejected": -2176.397705078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.932743549346924, + "rewards/margins": 15.39159107208252, + "rewards/rejected": -21.3243350982666, + "step": 40090 + }, + { + "epoch": 2.39, + "learning_rate": 6.014910276206118e-07, + "logits/chosen": -2.5954322814941406, + "logits/rejected": -1.9001544713974, + "logps/chosen": -654.1895751953125, + "logps/rejected": -2089.109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.785801887512207, + "rewards/margins": 14.656488418579102, + "rewards/rejected": -20.442291259765625, + "step": 40100 + }, + { + "epoch": 2.39, + "learning_rate": 6.003629358291832e-07, + "logits/chosen": -2.5086352825164795, + "logits/rejected": -1.911564588546753, + "logps/chosen": -652.3854370117188, + "logps/rejected": -2121.748779296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.873947620391846, + "rewards/margins": 14.903158187866211, + "rewards/rejected": -20.7771053314209, + "step": 40110 + }, + { + "epoch": 2.39, + "learning_rate": 5.992357585230119e-07, + "logits/chosen": -2.5582056045532227, + "logits/rejected": -1.7510578632354736, + "logps/chosen": -673.0636596679688, + "logps/rejected": -2108.885498046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9650797843933105, + "rewards/margins": 14.676867485046387, + "rewards/rejected": -20.641948699951172, + "step": 40120 + }, + { + "epoch": 2.39, + "learning_rate": 5.981094962447235e-07, + "logits/chosen": -2.561660051345825, + "logits/rejected": -1.91916823387146, + "logps/chosen": -678.5431518554688, + "logps/rejected": -2230.344482421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0851240158081055, + "rewards/margins": 15.776147842407227, + "rewards/rejected": -21.86127281188965, + "step": 40130 + }, + { + "epoch": 2.39, + "learning_rate": 5.969841495364978e-07, + "logits/chosen": -2.526210308074951, + "logits/rejected": -1.8405052423477173, + "logps/chosen": -674.430419921875, + "logps/rejected": -2109.041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.000213623046875, + "rewards/margins": 14.63879680633545, + "rewards/rejected": -20.63901138305664, + "step": 40140 + }, + { + "epoch": 2.39, + "learning_rate": 5.958597189400797e-07, + "logits/chosen": -2.511444568634033, + "logits/rejected": -1.801842451095581, + "logps/chosen": -720.2076416015625, + "logps/rejected": -2147.02001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.535942077636719, + "rewards/margins": 14.4954833984375, + "rewards/rejected": -21.031423568725586, + "step": 40150 + }, + { + "epoch": 2.39, + "learning_rate": 5.947362049967672e-07, + "logits/chosen": -2.5682497024536133, + "logits/rejected": -1.8973596096038818, + "logps/chosen": -666.1494750976562, + "logps/rejected": -2205.233154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.958218574523926, + "rewards/margins": 15.652032852172852, + "rewards/rejected": -21.610252380371094, + "step": 40160 + }, + { + "epoch": 2.4, + "learning_rate": 5.936136082474228e-07, + "logits/chosen": -2.5595543384552, + "logits/rejected": -1.9244130849838257, + "logps/chosen": -673.125, + "logps/rejected": -2158.79833984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.041526794433594, + "rewards/margins": 15.088554382324219, + "rewards/rejected": -21.130081176757812, + "step": 40170 + }, + { + "epoch": 2.4, + "learning_rate": 5.924919292324624e-07, + "logits/chosen": -2.516733169555664, + "logits/rejected": -1.9916709661483765, + "logps/chosen": -672.18212890625, + "logps/rejected": -2024.0673828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.088198661804199, + "rewards/margins": 13.717897415161133, + "rewards/rejected": -19.806095123291016, + "step": 40180 + }, + { + "epoch": 2.4, + "learning_rate": 5.91371168491863e-07, + "logits/chosen": -2.5322906970977783, + "logits/rejected": -1.8982353210449219, + "logps/chosen": -702.0899658203125, + "logps/rejected": -2058.083251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.322819709777832, + "rewards/margins": 13.8064603805542, + "rewards/rejected": -20.12928009033203, + "step": 40190 + }, + { + "epoch": 2.4, + "learning_rate": 5.902513265651585e-07, + "logits/chosen": -2.5785365104675293, + "logits/rejected": -1.994748830795288, + "logps/chosen": -660.9891357421875, + "logps/rejected": -2054.388916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.958974361419678, + "rewards/margins": 14.1397066116333, + "rewards/rejected": -20.098682403564453, + "step": 40200 + }, + { + "epoch": 2.4, + "learning_rate": 5.891324039914406e-07, + "logits/chosen": -2.5858891010284424, + "logits/rejected": -1.9309701919555664, + "logps/chosen": -645.2557373046875, + "logps/rejected": -2098.92431640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.732205867767334, + "rewards/margins": 14.818036079406738, + "rewards/rejected": -20.550243377685547, + "step": 40210 + }, + { + "epoch": 2.4, + "learning_rate": 5.880144013093603e-07, + "logits/chosen": -2.563436269760132, + "logits/rejected": -1.9939991235733032, + "logps/chosen": -670.611572265625, + "logps/rejected": -2141.822998046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.988009452819824, + "rewards/margins": 14.99189567565918, + "rewards/rejected": -20.979900360107422, + "step": 40220 + }, + { + "epoch": 2.4, + "learning_rate": 5.868973190571214e-07, + "logits/chosen": -2.5642242431640625, + "logits/rejected": -1.9964910745620728, + "logps/chosen": -668.4000244140625, + "logps/rejected": -2151.87548828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.935410499572754, + "rewards/margins": 15.116554260253906, + "rewards/rejected": -21.051965713500977, + "step": 40230 + }, + { + "epoch": 2.4, + "learning_rate": 5.857811577724904e-07, + "logits/chosen": -2.5323328971862793, + "logits/rejected": -1.8812103271484375, + "logps/chosen": -675.9169311523438, + "logps/rejected": -2017.701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.986246585845947, + "rewards/margins": 13.761163711547852, + "rewards/rejected": -19.747407913208008, + "step": 40240 + }, + { + "epoch": 2.4, + "learning_rate": 5.846659179927841e-07, + "logits/chosen": -2.5440568923950195, + "logits/rejected": -1.971174955368042, + "logps/chosen": -647.4503173828125, + "logps/rejected": -2140.052001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.802402496337891, + "rewards/margins": 15.150431632995605, + "rewards/rejected": -20.952835083007812, + "step": 40250 + }, + { + "epoch": 2.4, + "learning_rate": 5.835516002548816e-07, + "logits/chosen": -2.5412063598632812, + "logits/rejected": -1.851690649986267, + "logps/chosen": -673.3740234375, + "logps/rejected": -2091.24169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.001920700073242, + "rewards/margins": 14.4660062789917, + "rewards/rejected": -20.467927932739258, + "step": 40260 + }, + { + "epoch": 2.4, + "learning_rate": 5.82438205095214e-07, + "logits/chosen": -2.5675909519195557, + "logits/rejected": -1.79000723361969, + "logps/chosen": -669.9593505859375, + "logps/rejected": -2155.656005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.974587440490723, + "rewards/margins": 15.1494779586792, + "rewards/rejected": -21.12406349182129, + "step": 40270 + }, + { + "epoch": 2.4, + "learning_rate": 5.813257330497699e-07, + "logits/chosen": -2.512420415878296, + "logits/rejected": -1.6914894580841064, + "logps/chosen": -678.9886474609375, + "logps/rejected": -2161.82421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1129865646362305, + "rewards/margins": 15.057888984680176, + "rewards/rejected": -21.170875549316406, + "step": 40280 + }, + { + "epoch": 2.4, + "learning_rate": 5.802141846540932e-07, + "logits/chosen": -2.533330202102661, + "logits/rejected": -1.8430055379867554, + "logps/chosen": -679.8394775390625, + "logps/rejected": -2181.07080078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.070545673370361, + "rewards/margins": 15.2910737991333, + "rewards/rejected": -21.36161994934082, + "step": 40290 + }, + { + "epoch": 2.4, + "learning_rate": 5.791035604432838e-07, + "logits/chosen": -2.5567469596862793, + "logits/rejected": -1.9720350503921509, + "logps/chosen": -649.8966064453125, + "logps/rejected": -2154.156005859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.815730094909668, + "rewards/margins": 15.287359237670898, + "rewards/rejected": -21.10308837890625, + "step": 40300 + }, + { + "epoch": 2.4, + "learning_rate": 5.779938609519956e-07, + "logits/chosen": -2.545142889022827, + "logits/rejected": -1.9587223529815674, + "logps/chosen": -666.3773193359375, + "logps/rejected": -2107.9404296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.980179786682129, + "rewards/margins": 14.651800155639648, + "rewards/rejected": -20.63197898864746, + "step": 40310 + }, + { + "epoch": 2.4, + "learning_rate": 5.768850867144385e-07, + "logits/chosen": -2.4968764781951904, + "logits/rejected": -1.7605092525482178, + "logps/chosen": -682.0011596679688, + "logps/rejected": -2241.080810546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.143688678741455, + "rewards/margins": 15.816961288452148, + "rewards/rejected": -21.960651397705078, + "step": 40320 + }, + { + "epoch": 2.4, + "learning_rate": 5.757772382643761e-07, + "logits/chosen": -2.5508759021759033, + "logits/rejected": -1.8672218322753906, + "logps/chosen": -671.2175903320312, + "logps/rejected": -2078.5224609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.03500509262085, + "rewards/margins": 14.304769515991211, + "rewards/rejected": -20.33977699279785, + "step": 40330 + }, + { + "epoch": 2.41, + "learning_rate": 5.746703161351266e-07, + "logits/chosen": -2.530801773071289, + "logits/rejected": -1.9215835332870483, + "logps/chosen": -646.4789428710938, + "logps/rejected": -2062.2998046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.778661727905273, + "rewards/margins": 14.393798828125, + "rewards/rejected": -20.172460556030273, + "step": 40340 + }, + { + "epoch": 2.41, + "learning_rate": 5.735643208595623e-07, + "logits/chosen": -2.532844066619873, + "logits/rejected": -1.9392969608306885, + "logps/chosen": -653.5617065429688, + "logps/rejected": -2246.04150390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.852540493011475, + "rewards/margins": 16.155433654785156, + "rewards/rejected": -22.007976531982422, + "step": 40350 + }, + { + "epoch": 2.41, + "learning_rate": 5.724592529701103e-07, + "logits/chosen": -2.512531280517578, + "logits/rejected": -1.835190773010254, + "logps/chosen": -679.9890747070312, + "logps/rejected": -2121.215576171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.138588905334473, + "rewards/margins": 14.626690864562988, + "rewards/rejected": -20.76527976989746, + "step": 40360 + }, + { + "epoch": 2.41, + "learning_rate": 5.713551129987477e-07, + "logits/chosen": -2.5310583114624023, + "logits/rejected": -1.8481266498565674, + "logps/chosen": -644.5083618164062, + "logps/rejected": -2038.2685546875, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.634903430938721, + "rewards/margins": 14.310053825378418, + "rewards/rejected": -19.944957733154297, + "step": 40370 + }, + { + "epoch": 2.41, + "learning_rate": 5.702519014770108e-07, + "logits/chosen": -2.488957643508911, + "logits/rejected": -1.8727505207061768, + "logps/chosen": -662.3701171875, + "logps/rejected": -2096.13623046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9486589431762695, + "rewards/margins": 14.558134078979492, + "rewards/rejected": -20.506793975830078, + "step": 40380 + }, + { + "epoch": 2.41, + "learning_rate": 5.691496189359822e-07, + "logits/chosen": -2.571145534515381, + "logits/rejected": -1.825079321861267, + "logps/chosen": -645.1153564453125, + "logps/rejected": -2117.537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7009711265563965, + "rewards/margins": 15.03605842590332, + "rewards/rejected": -20.737030029296875, + "step": 40390 + }, + { + "epoch": 2.41, + "learning_rate": 5.680482659063038e-07, + "logits/chosen": -2.5169219970703125, + "logits/rejected": -1.820001244544983, + "logps/chosen": -677.1275634765625, + "logps/rejected": -2225.816162109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.075485706329346, + "rewards/margins": 15.73119068145752, + "rewards/rejected": -21.80667495727539, + "step": 40400 + }, + { + "epoch": 2.41, + "learning_rate": 5.669478429181646e-07, + "logits/chosen": -2.543083667755127, + "logits/rejected": -1.9244283437728882, + "logps/chosen": -674.635986328125, + "logps/rejected": -2133.863037109375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.092240810394287, + "rewards/margins": 14.807729721069336, + "rewards/rejected": -20.89997100830078, + "step": 40410 + }, + { + "epoch": 2.41, + "learning_rate": 5.658483505013096e-07, + "logits/chosen": -2.4948537349700928, + "logits/rejected": -1.9326579570770264, + "logps/chosen": -666.6917724609375, + "logps/rejected": -2109.53076171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9953999519348145, + "rewards/margins": 14.659611701965332, + "rewards/rejected": -20.655010223388672, + "step": 40420 + }, + { + "epoch": 2.41, + "learning_rate": 5.64749789185034e-07, + "logits/chosen": -2.55998158454895, + "logits/rejected": -1.940283179283142, + "logps/chosen": -686.8670654296875, + "logps/rejected": -2057.14013671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.153642654418945, + "rewards/margins": 13.981313705444336, + "rewards/rejected": -20.134960174560547, + "step": 40430 + }, + { + "epoch": 2.41, + "learning_rate": 5.636521594981851e-07, + "logits/chosen": -2.5466980934143066, + "logits/rejected": -1.9230773448944092, + "logps/chosen": -692.800537109375, + "logps/rejected": -2099.75244140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.186964988708496, + "rewards/margins": 14.374414443969727, + "rewards/rejected": -20.561378479003906, + "step": 40440 + }, + { + "epoch": 2.41, + "learning_rate": 5.625554619691628e-07, + "logits/chosen": -2.5427792072296143, + "logits/rejected": -1.8241031169891357, + "logps/chosen": -666.638916015625, + "logps/rejected": -1988.218994140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.94776725769043, + "rewards/margins": 13.482566833496094, + "rewards/rejected": -19.430334091186523, + "step": 40450 + }, + { + "epoch": 2.41, + "learning_rate": 5.614596971259148e-07, + "logits/chosen": -2.5380916595458984, + "logits/rejected": -1.8627078533172607, + "logps/chosen": -704.9982299804688, + "logps/rejected": -2165.349853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.342503547668457, + "rewards/margins": 14.86358642578125, + "rewards/rejected": -21.20608901977539, + "step": 40460 + }, + { + "epoch": 2.41, + "learning_rate": 5.603648654959454e-07, + "logits/chosen": -2.536531925201416, + "logits/rejected": -1.9110370874404907, + "logps/chosen": -682.8671875, + "logps/rejected": -2058.168701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.130865573883057, + "rewards/margins": 14.00062370300293, + "rewards/rejected": -20.131488800048828, + "step": 40470 + }, + { + "epoch": 2.41, + "learning_rate": 5.592709676063038e-07, + "logits/chosen": -2.569720506668091, + "logits/rejected": -1.8247398138046265, + "logps/chosen": -656.1087646484375, + "logps/rejected": -2172.234130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.832289218902588, + "rewards/margins": 15.44702434539795, + "rewards/rejected": -21.279314041137695, + "step": 40480 + }, + { + "epoch": 2.41, + "learning_rate": 5.581780039835949e-07, + "logits/chosen": -2.5330634117126465, + "logits/rejected": -1.9597175121307373, + "logps/chosen": -662.36572265625, + "logps/rejected": -2125.35888671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.92401647567749, + "rewards/margins": 14.881299018859863, + "rewards/rejected": -20.805313110351562, + "step": 40490 + }, + { + "epoch": 2.42, + "learning_rate": 5.570859751539687e-07, + "logits/chosen": -2.5553817749023438, + "logits/rejected": -1.8486171960830688, + "logps/chosen": -666.0078125, + "logps/rejected": -2111.824462890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.930237770080566, + "rewards/margins": 14.7305326461792, + "rewards/rejected": -20.660770416259766, + "step": 40500 + }, + { + "epoch": 2.42, + "learning_rate": 5.559948816431307e-07, + "logits/chosen": -2.5729103088378906, + "logits/rejected": -1.9420251846313477, + "logps/chosen": -650.300537109375, + "logps/rejected": -2020.277587890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.852694034576416, + "rewards/margins": 13.911657333374023, + "rewards/rejected": -19.76435089111328, + "step": 40510 + }, + { + "epoch": 2.42, + "learning_rate": 5.549047239763306e-07, + "logits/chosen": -2.5752267837524414, + "logits/rejected": -1.8974730968475342, + "logps/chosen": -695.3998413085938, + "logps/rejected": -2187.01123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.27324914932251, + "rewards/margins": 15.142440795898438, + "rewards/rejected": -21.41568946838379, + "step": 40520 + }, + { + "epoch": 2.42, + "learning_rate": 5.538155026783726e-07, + "logits/chosen": -2.5797810554504395, + "logits/rejected": -1.9261760711669922, + "logps/chosen": -689.5462646484375, + "logps/rejected": -2090.13134765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.176182746887207, + "rewards/margins": 14.283658027648926, + "rewards/rejected": -20.4598388671875, + "step": 40530 + }, + { + "epoch": 2.42, + "learning_rate": 5.527272182736065e-07, + "logits/chosen": -2.5958361625671387, + "logits/rejected": -2.038975477218628, + "logps/chosen": -676.7638549804688, + "logps/rejected": -2142.0888671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.048737525939941, + "rewards/margins": 14.94610595703125, + "rewards/rejected": -20.994842529296875, + "step": 40540 + }, + { + "epoch": 2.42, + "learning_rate": 5.516398712859325e-07, + "logits/chosen": -2.534081220626831, + "logits/rejected": -1.8301441669464111, + "logps/chosen": -662.3177490234375, + "logps/rejected": -2201.87451171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.859857082366943, + "rewards/margins": 15.71580696105957, + "rewards/rejected": -21.575664520263672, + "step": 40550 + }, + { + "epoch": 2.42, + "learning_rate": 5.505534622387998e-07, + "logits/chosen": -2.5206737518310547, + "logits/rejected": -1.9330251216888428, + "logps/chosen": -696.7084350585938, + "logps/rejected": -2089.504638671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.31519079208374, + "rewards/margins": 14.149856567382812, + "rewards/rejected": -20.46504783630371, + "step": 40560 + }, + { + "epoch": 2.42, + "learning_rate": 5.494679916552053e-07, + "logits/chosen": -2.500844717025757, + "logits/rejected": -1.7942476272583008, + "logps/chosen": -665.0299072265625, + "logps/rejected": -2106.077392578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.903277397155762, + "rewards/margins": 14.712099075317383, + "rewards/rejected": -20.61537742614746, + "step": 40570 + }, + { + "epoch": 2.42, + "learning_rate": 5.483834600576949e-07, + "logits/chosen": -2.568438768386841, + "logits/rejected": -1.9547832012176514, + "logps/chosen": -687.0401611328125, + "logps/rejected": -2126.735595703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.137176036834717, + "rewards/margins": 14.672574043273926, + "rewards/rejected": -20.809749603271484, + "step": 40580 + }, + { + "epoch": 2.42, + "learning_rate": 5.472998679683619e-07, + "logits/chosen": -2.549705982208252, + "logits/rejected": -1.8718013763427734, + "logps/chosen": -660.9813842773438, + "logps/rejected": -2176.03271484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8461809158325195, + "rewards/margins": 15.471531867980957, + "rewards/rejected": -21.317710876464844, + "step": 40590 + }, + { + "epoch": 2.42, + "learning_rate": 5.462172159088474e-07, + "logits/chosen": -2.5424232482910156, + "logits/rejected": -1.8721024990081787, + "logps/chosen": -661.1739501953125, + "logps/rejected": -2161.546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.910995006561279, + "rewards/margins": 15.267802238464355, + "rewards/rejected": -21.178796768188477, + "step": 40600 + }, + { + "epoch": 2.42, + "learning_rate": 5.451355044003404e-07, + "logits/chosen": -2.560600519180298, + "logits/rejected": -1.8375434875488281, + "logps/chosen": -704.1503295898438, + "logps/rejected": -2155.52392578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.308106422424316, + "rewards/margins": 14.808309555053711, + "rewards/rejected": -21.116418838500977, + "step": 40610 + }, + { + "epoch": 2.42, + "learning_rate": 5.440547339635769e-07, + "logits/chosen": -2.558837413787842, + "logits/rejected": -1.9355932474136353, + "logps/chosen": -688.6904296875, + "logps/rejected": -2115.044677734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.171452522277832, + "rewards/margins": 14.527059555053711, + "rewards/rejected": -20.69851303100586, + "step": 40620 + }, + { + "epoch": 2.42, + "learning_rate": 5.429749051188391e-07, + "logits/chosen": -2.5495054721832275, + "logits/rejected": -1.8749163150787354, + "logps/chosen": -652.51220703125, + "logps/rejected": -2156.085693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.802435874938965, + "rewards/margins": 15.313865661621094, + "rewards/rejected": -21.116300582885742, + "step": 40630 + }, + { + "epoch": 2.42, + "learning_rate": 5.418960183859572e-07, + "logits/chosen": -2.535144090652466, + "logits/rejected": -1.974035620689392, + "logps/chosen": -713.0923461914062, + "logps/rejected": -2148.088134765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.360586166381836, + "rewards/margins": 14.676782608032227, + "rewards/rejected": -21.037370681762695, + "step": 40640 + }, + { + "epoch": 2.42, + "learning_rate": 5.408180742843069e-07, + "logits/chosen": -2.550476312637329, + "logits/rejected": -1.9643179178237915, + "logps/chosen": -658.4500732421875, + "logps/rejected": -2120.74658203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.887632846832275, + "rewards/margins": 14.881265640258789, + "rewards/rejected": -20.76889991760254, + "step": 40650 + }, + { + "epoch": 2.42, + "learning_rate": 5.397410733328109e-07, + "logits/chosen": -2.546142578125, + "logits/rejected": -1.8481343984603882, + "logps/chosen": -673.3447875976562, + "logps/rejected": -2052.1171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.033003807067871, + "rewards/margins": 14.04870891571045, + "rewards/rejected": -20.081714630126953, + "step": 40660 + }, + { + "epoch": 2.43, + "learning_rate": 5.386650160499371e-07, + "logits/chosen": -2.5081875324249268, + "logits/rejected": -1.893781304359436, + "logps/chosen": -680.9620971679688, + "logps/rejected": -2173.598876953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1221022605896, + "rewards/margins": 15.169731140136719, + "rewards/rejected": -21.291833877563477, + "step": 40670 + }, + { + "epoch": 2.43, + "learning_rate": 5.375899029536996e-07, + "logits/chosen": -2.5657052993774414, + "logits/rejected": -1.9239635467529297, + "logps/chosen": -675.116455078125, + "logps/rejected": -2124.117919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.045970916748047, + "rewards/margins": 14.753471374511719, + "rewards/rejected": -20.799442291259766, + "step": 40680 + }, + { + "epoch": 2.43, + "learning_rate": 5.365157345616579e-07, + "logits/chosen": -2.5585379600524902, + "logits/rejected": -1.8792352676391602, + "logps/chosen": -684.72412109375, + "logps/rejected": -2159.08544921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.111897945404053, + "rewards/margins": 15.040677070617676, + "rewards/rejected": -21.152576446533203, + "step": 40690 + }, + { + "epoch": 2.43, + "learning_rate": 5.354425113909164e-07, + "logits/chosen": -2.5538759231567383, + "logits/rejected": -1.9202706813812256, + "logps/chosen": -658.3267822265625, + "logps/rejected": -2101.55859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.895502090454102, + "rewards/margins": 14.679435729980469, + "rewards/rejected": -20.57493782043457, + "step": 40700 + }, + { + "epoch": 2.43, + "learning_rate": 5.34370233958125e-07, + "logits/chosen": -2.5248332023620605, + "logits/rejected": -1.8660147190093994, + "logps/chosen": -649.162353515625, + "logps/rejected": -2092.48974609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.778208255767822, + "rewards/margins": 14.699542045593262, + "rewards/rejected": -20.477750778198242, + "step": 40710 + }, + { + "epoch": 2.43, + "learning_rate": 5.332989027794786e-07, + "logits/chosen": -2.52518630027771, + "logits/rejected": -1.8358800411224365, + "logps/chosen": -666.51416015625, + "logps/rejected": -2060.1044921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.000239372253418, + "rewards/margins": 14.162158012390137, + "rewards/rejected": -20.162395477294922, + "step": 40720 + }, + { + "epoch": 2.43, + "learning_rate": 5.322285183707138e-07, + "logits/chosen": -2.5554890632629395, + "logits/rejected": -1.9096119403839111, + "logps/chosen": -684.1126708984375, + "logps/rejected": -2108.68212890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.142311096191406, + "rewards/margins": 14.504859924316406, + "rewards/rejected": -20.647172927856445, + "step": 40730 + }, + { + "epoch": 2.43, + "learning_rate": 5.311590812471165e-07, + "logits/chosen": -2.4942705631256104, + "logits/rejected": -1.722007155418396, + "logps/chosen": -670.1627197265625, + "logps/rejected": -2052.94580078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.004434585571289, + "rewards/margins": 14.086530685424805, + "rewards/rejected": -20.09096908569336, + "step": 40740 + }, + { + "epoch": 2.43, + "learning_rate": 5.300905919235111e-07, + "logits/chosen": -2.523103952407837, + "logits/rejected": -1.834912896156311, + "logps/chosen": -672.9644775390625, + "logps/rejected": -1988.849365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.005269527435303, + "rewards/margins": 13.462176322937012, + "rewards/rejected": -19.46744728088379, + "step": 40750 + }, + { + "epoch": 2.43, + "learning_rate": 5.290230509142711e-07, + "logits/chosen": -2.557657480239868, + "logits/rejected": -1.8525136709213257, + "logps/chosen": -688.9124145507812, + "logps/rejected": -2143.49365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.194756984710693, + "rewards/margins": 14.798192977905273, + "rewards/rejected": -20.992950439453125, + "step": 40760 + }, + { + "epoch": 2.43, + "learning_rate": 5.279564587333077e-07, + "logits/chosen": -2.5575509071350098, + "logits/rejected": -1.9407813549041748, + "logps/chosen": -658.8856201171875, + "logps/rejected": -2156.474365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.7934250831604, + "rewards/margins": 15.32635498046875, + "rewards/rejected": -21.119779586791992, + "step": 40770 + }, + { + "epoch": 2.43, + "learning_rate": 5.268908158940814e-07, + "logits/chosen": -2.5899834632873535, + "logits/rejected": -1.921142339706421, + "logps/chosen": -679.4961547851562, + "logps/rejected": -2080.686279296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.101577281951904, + "rewards/margins": 14.279993057250977, + "rewards/rejected": -20.38157081604004, + "step": 40780 + }, + { + "epoch": 2.43, + "learning_rate": 5.258261229095898e-07, + "logits/chosen": -2.5468339920043945, + "logits/rejected": -1.8259636163711548, + "logps/chosen": -657.2356567382812, + "logps/rejected": -2179.805908203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.809520721435547, + "rewards/margins": 15.544604301452637, + "rewards/rejected": -21.3541259765625, + "step": 40790 + }, + { + "epoch": 2.43, + "learning_rate": 5.247623802923788e-07, + "logits/chosen": -2.530661106109619, + "logits/rejected": -1.8426344394683838, + "logps/chosen": -653.8090209960938, + "logps/rejected": -2084.05712890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.860320568084717, + "rewards/margins": 14.560625076293945, + "rewards/rejected": -20.420942306518555, + "step": 40800 + }, + { + "epoch": 2.43, + "learning_rate": 5.236995885545321e-07, + "logits/chosen": -2.53466796875, + "logits/rejected": -1.845642328262329, + "logps/chosen": -659.7989501953125, + "logps/rejected": -2112.786865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.968459129333496, + "rewards/margins": 14.729240417480469, + "rewards/rejected": -20.69770050048828, + "step": 40810 + }, + { + "epoch": 2.43, + "learning_rate": 5.226377482076783e-07, + "logits/chosen": -2.494169235229492, + "logits/rejected": -1.817143440246582, + "logps/chosen": -671.300048828125, + "logps/rejected": -2166.37744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.981067657470703, + "rewards/margins": 15.232309341430664, + "rewards/rejected": -21.21337890625, + "step": 40820 + }, + { + "epoch": 2.43, + "learning_rate": 5.215768597629872e-07, + "logits/chosen": -2.6003715991973877, + "logits/rejected": -1.8367187976837158, + "logps/chosen": -668.1174926757812, + "logps/rejected": -2126.070068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.948349952697754, + "rewards/margins": 14.865255355834961, + "rewards/rejected": -20.8136043548584, + "step": 40830 + }, + { + "epoch": 2.44, + "learning_rate": 5.205169237311705e-07, + "logits/chosen": -2.5285115242004395, + "logits/rejected": -1.8884010314941406, + "logps/chosen": -668.4439086914062, + "logps/rejected": -2067.109619140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9809794425964355, + "rewards/margins": 14.262417793273926, + "rewards/rejected": -20.243396759033203, + "step": 40840 + }, + { + "epoch": 2.44, + "learning_rate": 5.194579406224817e-07, + "logits/chosen": -2.560141086578369, + "logits/rejected": -1.8141162395477295, + "logps/chosen": -665.3089599609375, + "logps/rejected": -2103.171142578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.909655570983887, + "rewards/margins": 14.672492980957031, + "rewards/rejected": -20.582149505615234, + "step": 40850 + }, + { + "epoch": 2.44, + "learning_rate": 5.18399910946715e-07, + "logits/chosen": -2.522274971008301, + "logits/rejected": -1.7958223819732666, + "logps/chosen": -688.1649169921875, + "logps/rejected": -2076.35205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.17104959487915, + "rewards/margins": 14.157686233520508, + "rewards/rejected": -20.3287353515625, + "step": 40860 + }, + { + "epoch": 2.44, + "learning_rate": 5.173428352132057e-07, + "logits/chosen": -2.5464720726013184, + "logits/rejected": -1.9378551244735718, + "logps/chosen": -630.3012084960938, + "logps/rejected": -2177.47900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.702511310577393, + "rewards/margins": 15.614710807800293, + "rewards/rejected": -21.317224502563477, + "step": 40870 + }, + { + "epoch": 2.44, + "learning_rate": 5.162867139308306e-07, + "logits/chosen": -2.5330913066864014, + "logits/rejected": -1.897870659828186, + "logps/chosen": -676.3289184570312, + "logps/rejected": -2138.045166015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.036871433258057, + "rewards/margins": 14.892112731933594, + "rewards/rejected": -20.92898178100586, + "step": 40880 + }, + { + "epoch": 2.44, + "learning_rate": 5.152315476080058e-07, + "logits/chosen": -2.5596718788146973, + "logits/rejected": -1.8151493072509766, + "logps/chosen": -685.7166137695312, + "logps/rejected": -2119.637939453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.150971412658691, + "rewards/margins": 14.602030754089355, + "rewards/rejected": -20.75299835205078, + "step": 40890 + }, + { + "epoch": 2.44, + "learning_rate": 5.141773367526887e-07, + "logits/chosen": -2.5129458904266357, + "logits/rejected": -1.809878945350647, + "logps/chosen": -679.5911254882812, + "logps/rejected": -2102.459716796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.08625602722168, + "rewards/margins": 14.491844177246094, + "rewards/rejected": -20.578100204467773, + "step": 40900 + }, + { + "epoch": 2.44, + "learning_rate": 5.131240818723765e-07, + "logits/chosen": -2.529233455657959, + "logits/rejected": -1.840765357017517, + "logps/chosen": -648.1826171875, + "logps/rejected": -2130.79541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8024773597717285, + "rewards/margins": 15.061184883117676, + "rewards/rejected": -20.86366081237793, + "step": 40910 + }, + { + "epoch": 2.44, + "learning_rate": 5.12071783474106e-07, + "logits/chosen": -2.559765100479126, + "logits/rejected": -1.8996843099594116, + "logps/chosen": -657.7160034179688, + "logps/rejected": -2108.0703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.923342704772949, + "rewards/margins": 14.730504989624023, + "rewards/rejected": -20.653846740722656, + "step": 40920 + }, + { + "epoch": 2.44, + "learning_rate": 5.110204420644533e-07, + "logits/chosen": -2.5631370544433594, + "logits/rejected": -1.7522766590118408, + "logps/chosen": -662.1231689453125, + "logps/rejected": -2187.03759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.970643043518066, + "rewards/margins": 15.454791069030762, + "rewards/rejected": -21.42543601989746, + "step": 40930 + }, + { + "epoch": 2.44, + "learning_rate": 5.099700581495346e-07, + "logits/chosen": -2.507456064224243, + "logits/rejected": -1.899255394935608, + "logps/chosen": -680.8018798828125, + "logps/rejected": -2164.590087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.047435760498047, + "rewards/margins": 15.153829574584961, + "rewards/rejected": -21.20126724243164, + "step": 40940 + }, + { + "epoch": 2.44, + "learning_rate": 5.089206322350046e-07, + "logits/chosen": -2.5740902423858643, + "logits/rejected": -1.8564344644546509, + "logps/chosen": -677.1634521484375, + "logps/rejected": -2194.526123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.067424297332764, + "rewards/margins": 15.438560485839844, + "rewards/rejected": -21.505985260009766, + "step": 40950 + }, + { + "epoch": 2.44, + "learning_rate": 5.078721648260568e-07, + "logits/chosen": -2.5709149837493896, + "logits/rejected": -1.91985285282135, + "logps/chosen": -672.1256103515625, + "logps/rejected": -2132.69580078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.00290060043335, + "rewards/margins": 14.879605293273926, + "rewards/rejected": -20.882505416870117, + "step": 40960 + }, + { + "epoch": 2.44, + "learning_rate": 5.068246564274234e-07, + "logits/chosen": -2.5121185779571533, + "logits/rejected": -1.8230507373809814, + "logps/chosen": -660.06201171875, + "logps/rejected": -2139.21044921875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.920217037200928, + "rewards/margins": 15.022743225097656, + "rewards/rejected": -20.942962646484375, + "step": 40970 + }, + { + "epoch": 2.44, + "learning_rate": 5.057781075433751e-07, + "logits/chosen": -2.5497019290924072, + "logits/rejected": -1.853514313697815, + "logps/chosen": -669.8565673828125, + "logps/rejected": -2251.349609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.966736793518066, + "rewards/margins": 16.093019485473633, + "rewards/rejected": -22.059755325317383, + "step": 40980 + }, + { + "epoch": 2.44, + "learning_rate": 5.04732518677721e-07, + "logits/chosen": -2.53751540184021, + "logits/rejected": -1.848806381225586, + "logps/chosen": -671.332275390625, + "logps/rejected": -2045.1695556640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.032817840576172, + "rewards/margins": 13.97883415222168, + "rewards/rejected": -20.011653900146484, + "step": 40990 + }, + { + "epoch": 2.44, + "learning_rate": 5.036878903338055e-07, + "logits/chosen": -2.5607168674468994, + "logits/rejected": -1.8528333902359009, + "logps/chosen": -666.4899291992188, + "logps/rejected": -2118.99658203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.935195446014404, + "rewards/margins": 14.803871154785156, + "rewards/rejected": -20.739070892333984, + "step": 41000 + }, + { + "epoch": 2.45, + "learning_rate": 5.026442230145157e-07, + "logits/chosen": -2.53678822517395, + "logits/rejected": -1.8718181848526, + "logps/chosen": -679.7841186523438, + "logps/rejected": -2084.541748046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.001908779144287, + "rewards/margins": 14.396780014038086, + "rewards/rejected": -20.39868927001953, + "step": 41010 + }, + { + "epoch": 2.45, + "learning_rate": 5.016015172222699e-07, + "logits/chosen": -2.5570945739746094, + "logits/rejected": -1.9442768096923828, + "logps/chosen": -696.3050537109375, + "logps/rejected": -2103.49951171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.216962814331055, + "rewards/margins": 14.368258476257324, + "rewards/rejected": -20.585224151611328, + "step": 41020 + }, + { + "epoch": 2.45, + "learning_rate": 5.005597734590298e-07, + "logits/chosen": -2.5995423793792725, + "logits/rejected": -1.8535734415054321, + "logps/chosen": -676.2281494140625, + "logps/rejected": -2171.39599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.043780326843262, + "rewards/margins": 15.209280014038086, + "rewards/rejected": -21.253061294555664, + "step": 41030 + }, + { + "epoch": 2.45, + "learning_rate": 4.995189922262877e-07, + "logits/chosen": -2.5368316173553467, + "logits/rejected": -1.8642339706420898, + "logps/chosen": -680.5628662109375, + "logps/rejected": -2151.36474609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.096515655517578, + "rewards/margins": 14.968683242797852, + "rewards/rejected": -21.065196990966797, + "step": 41040 + }, + { + "epoch": 2.45, + "learning_rate": 4.984791740250789e-07, + "logits/chosen": -2.5555214881896973, + "logits/rejected": -1.8601996898651123, + "logps/chosen": -655.8018798828125, + "logps/rejected": -2108.815673828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.829837799072266, + "rewards/margins": 14.824676513671875, + "rewards/rejected": -20.654516220092773, + "step": 41050 + }, + { + "epoch": 2.45, + "learning_rate": 4.974403193559685e-07, + "logits/chosen": -2.5301976203918457, + "logits/rejected": -1.7313143014907837, + "logps/chosen": -669.4078369140625, + "logps/rejected": -2080.46337890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.966166973114014, + "rewards/margins": 14.408147811889648, + "rewards/rejected": -20.374317169189453, + "step": 41060 + }, + { + "epoch": 2.45, + "learning_rate": 4.964024287190644e-07, + "logits/chosen": -2.5638606548309326, + "logits/rejected": -1.8380930423736572, + "logps/chosen": -670.8978271484375, + "logps/rejected": -2041.569580078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.021237373352051, + "rewards/margins": 13.964242935180664, + "rewards/rejected": -19.98548126220703, + "step": 41070 + }, + { + "epoch": 2.45, + "learning_rate": 4.953655026140047e-07, + "logits/chosen": -2.567430019378662, + "logits/rejected": -1.911029577255249, + "logps/chosen": -666.9448852539062, + "logps/rejected": -2121.330322265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.955953121185303, + "rewards/margins": 14.824505805969238, + "rewards/rejected": -20.780460357666016, + "step": 41080 + }, + { + "epoch": 2.45, + "learning_rate": 4.943295415399665e-07, + "logits/chosen": -2.5274577140808105, + "logits/rejected": -1.8767728805541992, + "logps/chosen": -661.2052001953125, + "logps/rejected": -2104.359130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9069929122924805, + "rewards/margins": 14.69354248046875, + "rewards/rejected": -20.600536346435547, + "step": 41090 + }, + { + "epoch": 2.45, + "learning_rate": 4.932945459956617e-07, + "logits/chosen": -2.529101610183716, + "logits/rejected": -1.8526971340179443, + "logps/chosen": -655.4729614257812, + "logps/rejected": -2155.1044921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.886415958404541, + "rewards/margins": 15.213427543640137, + "rewards/rejected": -21.099842071533203, + "step": 41100 + }, + { + "epoch": 2.45, + "learning_rate": 4.922605164793367e-07, + "logits/chosen": -2.5493645668029785, + "logits/rejected": -1.8258450031280518, + "logps/chosen": -665.846923828125, + "logps/rejected": -2141.900634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.892909526824951, + "rewards/margins": 15.081387519836426, + "rewards/rejected": -20.97429847717285, + "step": 41110 + }, + { + "epoch": 2.45, + "learning_rate": 4.912274534887734e-07, + "logits/chosen": -2.5564379692077637, + "logits/rejected": -1.9088672399520874, + "logps/chosen": -699.506591796875, + "logps/rejected": -2021.872314453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.234681606292725, + "rewards/margins": 13.532529830932617, + "rewards/rejected": -19.7672119140625, + "step": 41120 + }, + { + "epoch": 2.45, + "learning_rate": 4.901953575212884e-07, + "logits/chosen": -2.5244553089141846, + "logits/rejected": -1.8358415365219116, + "logps/chosen": -663.7564697265625, + "logps/rejected": -2120.517578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.009889602661133, + "rewards/margins": 14.770971298217773, + "rewards/rejected": -20.780860900878906, + "step": 41130 + }, + { + "epoch": 2.45, + "learning_rate": 4.891642290737328e-07, + "logits/chosen": -2.5248405933380127, + "logits/rejected": -1.8736165761947632, + "logps/chosen": -659.2869262695312, + "logps/rejected": -2154.929931640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.94135856628418, + "rewards/margins": 15.168008804321289, + "rewards/rejected": -21.10936737060547, + "step": 41140 + }, + { + "epoch": 2.45, + "learning_rate": 4.881340686424915e-07, + "logits/chosen": -2.5499722957611084, + "logits/rejected": -1.8486969470977783, + "logps/chosen": -676.1099853515625, + "logps/rejected": -2065.888916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0561842918396, + "rewards/margins": 14.164457321166992, + "rewards/rejected": -20.220638275146484, + "step": 41150 + }, + { + "epoch": 2.45, + "learning_rate": 4.87104876723484e-07, + "logits/chosen": -2.5626885890960693, + "logits/rejected": -1.8571040630340576, + "logps/chosen": -697.1173095703125, + "logps/rejected": -2167.33203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.283074378967285, + "rewards/margins": 14.953569412231445, + "rewards/rejected": -21.236642837524414, + "step": 41160 + }, + { + "epoch": 2.45, + "learning_rate": 4.860766538121634e-07, + "logits/chosen": -2.502178192138672, + "logits/rejected": -1.7513805627822876, + "logps/chosen": -674.6160888671875, + "logps/rejected": -2082.00927734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.013796329498291, + "rewards/margins": 14.354347229003906, + "rewards/rejected": -20.36814308166504, + "step": 41170 + }, + { + "epoch": 2.46, + "learning_rate": 4.850494004035156e-07, + "logits/chosen": -2.557612657546997, + "logits/rejected": -1.8837029933929443, + "logps/chosen": -672.6720581054688, + "logps/rejected": -2153.662841796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.051349639892578, + "rewards/margins": 15.052006721496582, + "rewards/rejected": -21.103357315063477, + "step": 41180 + }, + { + "epoch": 2.46, + "learning_rate": 4.840231169920609e-07, + "logits/chosen": -2.5334231853485107, + "logits/rejected": -1.8619626760482788, + "logps/chosen": -655.1824340820312, + "logps/rejected": -2089.858642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.867859840393066, + "rewards/margins": 14.597892761230469, + "rewards/rejected": -20.46575164794922, + "step": 41190 + }, + { + "epoch": 2.46, + "learning_rate": 4.82997804071852e-07, + "logits/chosen": -2.554177761077881, + "logits/rejected": -1.781612753868103, + "logps/chosen": -692.2777709960938, + "logps/rejected": -2039.1773681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.219475269317627, + "rewards/margins": 13.729803085327148, + "rewards/rejected": -19.949277877807617, + "step": 41200 + }, + { + "epoch": 2.46, + "learning_rate": 4.819734621364744e-07, + "logits/chosen": -2.547886371612549, + "logits/rejected": -1.9428682327270508, + "logps/chosen": -665.8411865234375, + "logps/rejected": -2159.76806640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.970856666564941, + "rewards/margins": 15.1824951171875, + "rewards/rejected": -21.153350830078125, + "step": 41210 + }, + { + "epoch": 2.46, + "learning_rate": 4.809500916790466e-07, + "logits/chosen": -2.575817346572876, + "logits/rejected": -1.8436002731323242, + "logps/chosen": -663.5533447265625, + "logps/rejected": -2213.73779296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.894764423370361, + "rewards/margins": 15.782180786132812, + "rewards/rejected": -21.676944732666016, + "step": 41220 + }, + { + "epoch": 2.46, + "learning_rate": 4.799276931922187e-07, + "logits/chosen": -2.535900115966797, + "logits/rejected": -1.8411662578582764, + "logps/chosen": -677.451171875, + "logps/rejected": -2094.63623046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1302170753479, + "rewards/margins": 14.37525463104248, + "rewards/rejected": -20.505470275878906, + "step": 41230 + }, + { + "epoch": 2.46, + "learning_rate": 4.789062671681735e-07, + "logits/chosen": -2.5476863384246826, + "logits/rejected": -1.7454850673675537, + "logps/chosen": -676.0446166992188, + "logps/rejected": -2223.978515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.995916843414307, + "rewards/margins": 15.778173446655273, + "rewards/rejected": -21.774089813232422, + "step": 41240 + }, + { + "epoch": 2.46, + "learning_rate": 4.778858140986259e-07, + "logits/chosen": -2.582204580307007, + "logits/rejected": -1.9720147848129272, + "logps/chosen": -654.52978515625, + "logps/rejected": -2048.646240234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.868691921234131, + "rewards/margins": 14.187106132507324, + "rewards/rejected": -20.055797576904297, + "step": 41250 + }, + { + "epoch": 2.46, + "learning_rate": 4.76866334474822e-07, + "logits/chosen": -2.537651300430298, + "logits/rejected": -1.7567218542099, + "logps/chosen": -668.3717651367188, + "logps/rejected": -2161.6142578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.980715751647949, + "rewards/margins": 15.186413764953613, + "rewards/rejected": -21.167129516601562, + "step": 41260 + }, + { + "epoch": 2.46, + "learning_rate": 4.758478287875376e-07, + "logits/chosen": -2.532456159591675, + "logits/rejected": -1.9053694009780884, + "logps/chosen": -671.0333862304688, + "logps/rejected": -2137.554443359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.031503200531006, + "rewards/margins": 14.907461166381836, + "rewards/rejected": -20.93896484375, + "step": 41270 + }, + { + "epoch": 2.46, + "learning_rate": 4.748302975270838e-07, + "logits/chosen": -2.545600652694702, + "logits/rejected": -1.9030202627182007, + "logps/chosen": -678.7271728515625, + "logps/rejected": -2258.445556640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.106261253356934, + "rewards/margins": 16.030879974365234, + "rewards/rejected": -22.137142181396484, + "step": 41280 + }, + { + "epoch": 2.46, + "learning_rate": 4.738137411832977e-07, + "logits/chosen": -2.5410845279693604, + "logits/rejected": -1.8958820104599, + "logps/chosen": -693.1046142578125, + "logps/rejected": -2056.702880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.25675106048584, + "rewards/margins": 13.879974365234375, + "rewards/rejected": -20.13672637939453, + "step": 41290 + }, + { + "epoch": 2.46, + "learning_rate": 4.7279816024555225e-07, + "logits/chosen": -2.546534538269043, + "logits/rejected": -1.8875999450683594, + "logps/chosen": -679.4595947265625, + "logps/rejected": -2109.947021484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.074402809143066, + "rewards/margins": 14.568278312683105, + "rewards/rejected": -20.642681121826172, + "step": 41300 + }, + { + "epoch": 2.46, + "learning_rate": 4.71783555202745e-07, + "logits/chosen": -2.5283470153808594, + "logits/rejected": -1.7542402744293213, + "logps/chosen": -673.3590087890625, + "logps/rejected": -2206.777099609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.056089878082275, + "rewards/margins": 15.565251350402832, + "rewards/rejected": -21.621341705322266, + "step": 41310 + }, + { + "epoch": 2.46, + "learning_rate": 4.7076992654330993e-07, + "logits/chosen": -2.5029730796813965, + "logits/rejected": -1.8160429000854492, + "logps/chosen": -666.2530517578125, + "logps/rejected": -2205.87890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.97265100479126, + "rewards/margins": 15.6445951461792, + "rewards/rejected": -21.61724853515625, + "step": 41320 + }, + { + "epoch": 2.46, + "learning_rate": 4.697572747552051e-07, + "logits/chosen": -2.56895112991333, + "logits/rejected": -1.8592641353607178, + "logps/chosen": -695.6130981445312, + "logps/rejected": -2218.627685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.235921859741211, + "rewards/margins": 15.513646125793457, + "rewards/rejected": -21.74956703186035, + "step": 41330 + }, + { + "epoch": 2.47, + "learning_rate": 4.6874560032592333e-07, + "logits/chosen": -2.566016674041748, + "logits/rejected": -1.8922803401947021, + "logps/chosen": -671.2903442382812, + "logps/rejected": -2236.658935546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.987648010253906, + "rewards/margins": 15.943829536437988, + "rewards/rejected": -21.93147850036621, + "step": 41340 + }, + { + "epoch": 2.47, + "learning_rate": 4.677349037424833e-07, + "logits/chosen": -2.5346131324768066, + "logits/rejected": -1.922732949256897, + "logps/chosen": -658.3837890625, + "logps/rejected": -2134.403076171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.886420726776123, + "rewards/margins": 15.016790390014648, + "rewards/rejected": -20.903209686279297, + "step": 41350 + }, + { + "epoch": 2.47, + "learning_rate": 4.6672518549143454e-07, + "logits/chosen": -2.5493226051330566, + "logits/rejected": -1.8552844524383545, + "logps/chosen": -684.8785400390625, + "logps/rejected": -2146.727783203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.133220672607422, + "rewards/margins": 14.892572402954102, + "rewards/rejected": -21.025793075561523, + "step": 41360 + }, + { + "epoch": 2.47, + "learning_rate": 4.6571644605885565e-07, + "logits/chosen": -2.577080249786377, + "logits/rejected": -1.8388553857803345, + "logps/chosen": -668.6907958984375, + "logps/rejected": -2100.65380859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.947967529296875, + "rewards/margins": 14.606280326843262, + "rewards/rejected": -20.554248809814453, + "step": 41370 + }, + { + "epoch": 2.47, + "learning_rate": 4.6470868593035376e-07, + "logits/chosen": -2.5566134452819824, + "logits/rejected": -1.9085067510604858, + "logps/chosen": -685.0042724609375, + "logps/rejected": -2187.31005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1776556968688965, + "rewards/margins": 15.242570877075195, + "rewards/rejected": -21.42022705078125, + "step": 41380 + }, + { + "epoch": 2.47, + "learning_rate": 4.637019055910644e-07, + "logits/chosen": -2.575373888015747, + "logits/rejected": -1.926085114479065, + "logps/chosen": -669.4240112304688, + "logps/rejected": -2166.26806640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.995880126953125, + "rewards/margins": 15.227192878723145, + "rewards/rejected": -21.22307586669922, + "step": 41390 + }, + { + "epoch": 2.47, + "learning_rate": 4.6269610552565153e-07, + "logits/chosen": -2.566547393798828, + "logits/rejected": -1.7863870859146118, + "logps/chosen": -683.154296875, + "logps/rejected": -2123.818603515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.109132289886475, + "rewards/margins": 14.685620307922363, + "rewards/rejected": -20.794750213623047, + "step": 41400 + }, + { + "epoch": 2.47, + "learning_rate": 4.616912862183076e-07, + "logits/chosen": -2.5190200805664062, + "logits/rejected": -1.9172775745391846, + "logps/chosen": -690.9026489257812, + "logps/rejected": -2109.8779296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.160746097564697, + "rewards/margins": 14.487512588500977, + "rewards/rejected": -20.648258209228516, + "step": 41410 + }, + { + "epoch": 2.47, + "learning_rate": 4.6068744815275264e-07, + "logits/chosen": -2.576951026916504, + "logits/rejected": -1.9188251495361328, + "logps/chosen": -683.5606079101562, + "logps/rejected": -2119.58349609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.128500461578369, + "rewards/margins": 14.614277839660645, + "rewards/rejected": -20.742778778076172, + "step": 41420 + }, + { + "epoch": 2.47, + "learning_rate": 4.5968459181223416e-07, + "logits/chosen": -2.533501148223877, + "logits/rejected": -1.8952691555023193, + "logps/chosen": -669.16162109375, + "logps/rejected": -2119.64501953125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.018095016479492, + "rewards/margins": 14.736369132995605, + "rewards/rejected": -20.754465103149414, + "step": 41430 + }, + { + "epoch": 2.47, + "learning_rate": 4.586827176795269e-07, + "logits/chosen": -2.554046154022217, + "logits/rejected": -1.9354711771011353, + "logps/chosen": -682.2623901367188, + "logps/rejected": -2109.529541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.134248733520508, + "rewards/margins": 14.523097038269043, + "rewards/rejected": -20.657344818115234, + "step": 41440 + }, + { + "epoch": 2.47, + "learning_rate": 4.576818262369337e-07, + "logits/chosen": -2.537785053253174, + "logits/rejected": -1.8276840448379517, + "logps/chosen": -673.1021728515625, + "logps/rejected": -2141.416259765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.047103404998779, + "rewards/margins": 14.916219711303711, + "rewards/rejected": -20.96332550048828, + "step": 41450 + }, + { + "epoch": 2.47, + "learning_rate": 4.566819179662829e-07, + "logits/chosen": -2.5477445125579834, + "logits/rejected": -2.0146148204803467, + "logps/chosen": -707.8687744140625, + "logps/rejected": -2087.246826171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.327849864959717, + "rewards/margins": 14.086750984191895, + "rewards/rejected": -20.414600372314453, + "step": 41460 + }, + { + "epoch": 2.47, + "learning_rate": 4.5568299334893095e-07, + "logits/chosen": -2.5283617973327637, + "logits/rejected": -1.8633372783660889, + "logps/chosen": -670.6337890625, + "logps/rejected": -2125.842529296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0114336013793945, + "rewards/margins": 14.798295974731445, + "rewards/rejected": -20.809728622436523, + "step": 41470 + }, + { + "epoch": 2.47, + "learning_rate": 4.546850528657601e-07, + "logits/chosen": -2.4746451377868652, + "logits/rejected": -1.8987953662872314, + "logps/chosen": -675.09619140625, + "logps/rejected": -2108.26904296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.038559913635254, + "rewards/margins": 14.611079216003418, + "rewards/rejected": -20.649639129638672, + "step": 41480 + }, + { + "epoch": 2.47, + "learning_rate": 4.5368809699717855e-07, + "logits/chosen": -2.5565154552459717, + "logits/rejected": -1.967140555381775, + "logps/chosen": -661.868408203125, + "logps/rejected": -2071.48876953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.872971534729004, + "rewards/margins": 14.413719177246094, + "rewards/rejected": -20.28668975830078, + "step": 41490 + }, + { + "epoch": 2.47, + "learning_rate": 4.5269212622312097e-07, + "logits/chosen": -2.5272746086120605, + "logits/rejected": -1.8340442180633545, + "logps/chosen": -681.4362182617188, + "logps/rejected": -2198.869384765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.147745609283447, + "rewards/margins": 15.400230407714844, + "rewards/rejected": -21.547977447509766, + "step": 41500 + }, + { + "epoch": 2.48, + "learning_rate": 4.5169714102304836e-07, + "logits/chosen": -2.516277313232422, + "logits/rejected": -1.8939584493637085, + "logps/chosen": -690.8638916015625, + "logps/rejected": -2111.55810546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.221890449523926, + "rewards/margins": 14.43342113494873, + "rewards/rejected": -20.655311584472656, + "step": 41510 + }, + { + "epoch": 2.48, + "learning_rate": 4.507031418759447e-07, + "logits/chosen": -2.540179491043091, + "logits/rejected": -1.9675792455673218, + "logps/chosen": -687.1053466796875, + "logps/rejected": -2052.97900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1947102546691895, + "rewards/margins": 13.903070449829102, + "rewards/rejected": -20.097780227661133, + "step": 41520 + }, + { + "epoch": 2.48, + "learning_rate": 4.497101292603237e-07, + "logits/chosen": -2.5795211791992188, + "logits/rejected": -1.8577855825424194, + "logps/chosen": -693.7371215820312, + "logps/rejected": -2148.15380859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.205355167388916, + "rewards/margins": 14.834175109863281, + "rewards/rejected": -21.039531707763672, + "step": 41530 + }, + { + "epoch": 2.48, + "learning_rate": 4.4871810365421904e-07, + "logits/chosen": -2.5303103923797607, + "logits/rejected": -1.8762420415878296, + "logps/chosen": -692.954345703125, + "logps/rejected": -2062.94287109375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.17506742477417, + "rewards/margins": 14.02367877960205, + "rewards/rejected": -20.198749542236328, + "step": 41540 + }, + { + "epoch": 2.48, + "learning_rate": 4.477270655351942e-07, + "logits/chosen": -2.577338457107544, + "logits/rejected": -1.9192349910736084, + "logps/chosen": -681.8521728515625, + "logps/rejected": -2157.400634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0935282707214355, + "rewards/margins": 15.0324068069458, + "rewards/rejected": -21.125934600830078, + "step": 41550 + }, + { + "epoch": 2.48, + "learning_rate": 4.46737015380333e-07, + "logits/chosen": -2.5222325325012207, + "logits/rejected": -1.8049161434173584, + "logps/chosen": -684.8095703125, + "logps/rejected": -2201.009033203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.179574966430664, + "rewards/margins": 15.376680374145508, + "rewards/rejected": -21.556255340576172, + "step": 41560 + }, + { + "epoch": 2.48, + "learning_rate": 4.4574795366624733e-07, + "logits/chosen": -2.5595571994781494, + "logits/rejected": -1.8679697513580322, + "logps/chosen": -673.8756713867188, + "logps/rejected": -2136.66796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.053811073303223, + "rewards/margins": 14.865318298339844, + "rewards/rejected": -20.919130325317383, + "step": 41570 + }, + { + "epoch": 2.48, + "learning_rate": 4.447598808690695e-07, + "logits/chosen": -2.5743179321289062, + "logits/rejected": -1.869629144668579, + "logps/chosen": -678.3665161132812, + "logps/rejected": -2183.2890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.105561256408691, + "rewards/margins": 15.265069961547852, + "rewards/rejected": -21.370630264282227, + "step": 41580 + }, + { + "epoch": 2.48, + "learning_rate": 4.437727974644598e-07, + "logits/chosen": -2.550617218017578, + "logits/rejected": -1.9142253398895264, + "logps/chosen": -669.9147338867188, + "logps/rejected": -2124.60498046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.008391380310059, + "rewards/margins": 14.796565055847168, + "rewards/rejected": -20.804956436157227, + "step": 41590 + }, + { + "epoch": 2.48, + "learning_rate": 4.4278670392759876e-07, + "logits/chosen": -2.5450587272644043, + "logits/rejected": -1.8571159839630127, + "logps/chosen": -700.6234741210938, + "logps/rejected": -2189.593505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.27959680557251, + "rewards/margins": 15.179883003234863, + "rewards/rejected": -21.459476470947266, + "step": 41600 + }, + { + "epoch": 2.48, + "learning_rate": 4.418016007331924e-07, + "logits/chosen": -2.4607338905334473, + "logits/rejected": -1.788204550743103, + "logps/chosen": -671.5883178710938, + "logps/rejected": -2071.255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.107806205749512, + "rewards/margins": 14.152612686157227, + "rewards/rejected": -20.260417938232422, + "step": 41610 + }, + { + "epoch": 2.48, + "learning_rate": 4.408174883554689e-07, + "logits/chosen": -2.555565357208252, + "logits/rejected": -1.8342069387435913, + "logps/chosen": -692.8160400390625, + "logps/rejected": -2096.53759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.160201072692871, + "rewards/margins": 14.355979919433594, + "rewards/rejected": -20.51618003845215, + "step": 41620 + }, + { + "epoch": 2.48, + "learning_rate": 4.398343672681804e-07, + "logits/chosen": -2.5481715202331543, + "logits/rejected": -1.805834412574768, + "logps/chosen": -680.5081787109375, + "logps/rejected": -2156.920654296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.10155725479126, + "rewards/margins": 15.036094665527344, + "rewards/rejected": -21.137653350830078, + "step": 41630 + }, + { + "epoch": 2.48, + "learning_rate": 4.3885223794460114e-07, + "logits/chosen": -2.5000057220458984, + "logits/rejected": -1.826030969619751, + "logps/chosen": -702.1094970703125, + "logps/rejected": -2157.471435546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.292414665222168, + "rewards/margins": 14.834980964660645, + "rewards/rejected": -21.127395629882812, + "step": 41640 + }, + { + "epoch": 2.48, + "learning_rate": 4.3787110085752844e-07, + "logits/chosen": -2.56577730178833, + "logits/rejected": -1.8940823078155518, + "logps/chosen": -672.3062744140625, + "logps/rejected": -2272.0087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.027530193328857, + "rewards/margins": 16.248096466064453, + "rewards/rejected": -22.27562713623047, + "step": 41650 + }, + { + "epoch": 2.48, + "learning_rate": 4.3689095647928117e-07, + "logits/chosen": -2.5716354846954346, + "logits/rejected": -1.878257155418396, + "logps/chosen": -662.7984619140625, + "logps/rejected": -2075.44189453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.947155952453613, + "rewards/margins": 14.37523365020752, + "rewards/rejected": -20.322389602661133, + "step": 41660 + }, + { + "epoch": 2.48, + "learning_rate": 4.359118052817013e-07, + "logits/chosen": -2.5383105278015137, + "logits/rejected": -1.8157879114151, + "logps/chosen": -656.7630615234375, + "logps/rejected": -2056.103515625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.799504280090332, + "rewards/margins": 14.306111335754395, + "rewards/rejected": -20.105615615844727, + "step": 41670 + }, + { + "epoch": 2.49, + "learning_rate": 4.349336477361521e-07, + "logits/chosen": -2.528557062149048, + "logits/rejected": -1.8860437870025635, + "logps/chosen": -673.6136474609375, + "logps/rejected": -2234.3095703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0456767082214355, + "rewards/margins": 15.861764907836914, + "rewards/rejected": -21.907442092895508, + "step": 41680 + }, + { + "epoch": 2.49, + "learning_rate": 4.3395648431351856e-07, + "logits/chosen": -2.5512728691101074, + "logits/rejected": -1.8546701669692993, + "logps/chosen": -676.1406860351562, + "logps/rejected": -2171.131591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.01172399520874, + "rewards/margins": 15.270376205444336, + "rewards/rejected": -21.282100677490234, + "step": 41690 + }, + { + "epoch": 2.49, + "learning_rate": 4.3298031548420716e-07, + "logits/chosen": -2.555121660232544, + "logits/rejected": -1.9029754400253296, + "logps/chosen": -662.857421875, + "logps/rejected": -2203.016845703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9544196128845215, + "rewards/margins": 15.617773056030273, + "rewards/rejected": -21.572193145751953, + "step": 41700 + }, + { + "epoch": 2.49, + "learning_rate": 4.3200514171814536e-07, + "logits/chosen": -2.5417659282684326, + "logits/rejected": -1.8141587972640991, + "logps/chosen": -690.7754516601562, + "logps/rejected": -2142.964599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.202120780944824, + "rewards/margins": 14.777769088745117, + "rewards/rejected": -20.97989273071289, + "step": 41710 + }, + { + "epoch": 2.49, + "learning_rate": 4.310309634847823e-07, + "logits/chosen": -2.5708720684051514, + "logits/rejected": -1.9193788766860962, + "logps/chosen": -671.6292724609375, + "logps/rejected": -2100.06494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.980690956115723, + "rewards/margins": 14.563908576965332, + "rewards/rejected": -20.544601440429688, + "step": 41720 + }, + { + "epoch": 2.49, + "learning_rate": 4.300577812530868e-07, + "logits/chosen": -2.5650625228881836, + "logits/rejected": -1.9320300817489624, + "logps/chosen": -666.5905151367188, + "logps/rejected": -2169.05224609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.023373603820801, + "rewards/margins": 15.221084594726562, + "rewards/rejected": -21.244461059570312, + "step": 41730 + }, + { + "epoch": 2.49, + "learning_rate": 4.290855954915488e-07, + "logits/chosen": -2.5739378929138184, + "logits/rejected": -1.9194729328155518, + "logps/chosen": -669.0414428710938, + "logps/rejected": -2202.612548828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.000436305999756, + "rewards/margins": 15.581918716430664, + "rewards/rejected": -21.582351684570312, + "step": 41740 + }, + { + "epoch": 2.49, + "learning_rate": 4.2811440666817917e-07, + "logits/chosen": -2.552755117416382, + "logits/rejected": -1.7816441059112549, + "logps/chosen": -677.717529296875, + "logps/rejected": -2084.461181640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.053200721740723, + "rewards/margins": 14.35260009765625, + "rewards/rejected": -20.405803680419922, + "step": 41750 + }, + { + "epoch": 2.49, + "learning_rate": 4.2714421525050734e-07, + "logits/chosen": -2.527561902999878, + "logits/rejected": -1.832079291343689, + "logps/chosen": -681.8748779296875, + "logps/rejected": -2104.44775390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.095529079437256, + "rewards/margins": 14.489484786987305, + "rewards/rejected": -20.58501434326172, + "step": 41760 + }, + { + "epoch": 2.49, + "learning_rate": 4.2617502170558407e-07, + "logits/chosen": -2.5589816570281982, + "logits/rejected": -1.8422431945800781, + "logps/chosen": -670.5550537109375, + "logps/rejected": -2152.458251953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.03486967086792, + "rewards/margins": 15.059529304504395, + "rewards/rejected": -21.094398498535156, + "step": 41770 + }, + { + "epoch": 2.49, + "learning_rate": 4.252068264999795e-07, + "logits/chosen": -2.5117459297180176, + "logits/rejected": -1.8223154544830322, + "logps/chosen": -674.0010375976562, + "logps/rejected": -2113.4130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.095263481140137, + "rewards/margins": 14.59168529510498, + "rewards/rejected": -20.686946868896484, + "step": 41780 + }, + { + "epoch": 2.49, + "learning_rate": 4.242396300997809e-07, + "logits/chosen": -2.527758836746216, + "logits/rejected": -1.9108165502548218, + "logps/chosen": -665.1453857421875, + "logps/rejected": -2131.065185546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.992560863494873, + "rewards/margins": 14.87700366973877, + "rewards/rejected": -20.869565963745117, + "step": 41790 + }, + { + "epoch": 2.49, + "learning_rate": 4.2327343297059924e-07, + "logits/chosen": -2.5406131744384766, + "logits/rejected": -1.8949607610702515, + "logps/chosen": -694.4806518554688, + "logps/rejected": -2111.33544921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.256230354309082, + "rewards/margins": 14.40764045715332, + "rewards/rejected": -20.663869857788086, + "step": 41800 + }, + { + "epoch": 2.49, + "learning_rate": 4.223082355775593e-07, + "logits/chosen": -2.5449748039245605, + "logits/rejected": -1.7278972864151, + "logps/chosen": -701.054443359375, + "logps/rejected": -2139.254638671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.335141181945801, + "rewards/margins": 14.615379333496094, + "rewards/rejected": -20.950519561767578, + "step": 41810 + }, + { + "epoch": 2.49, + "learning_rate": 4.213440383853093e-07, + "logits/chosen": -2.5302722454071045, + "logits/rejected": -1.8545650243759155, + "logps/chosen": -680.2680053710938, + "logps/rejected": -2196.10791015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.103662490844727, + "rewards/margins": 15.396032333374023, + "rewards/rejected": -21.49969482421875, + "step": 41820 + }, + { + "epoch": 2.49, + "learning_rate": 4.203808418580116e-07, + "logits/chosen": -2.572566270828247, + "logits/rejected": -1.8595826625823975, + "logps/chosen": -681.1734619140625, + "logps/rejected": -2218.786376953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.067371845245361, + "rewards/margins": 15.67497444152832, + "rewards/rejected": -21.74234390258789, + "step": 41830 + }, + { + "epoch": 2.49, + "learning_rate": 4.1941864645935136e-07, + "logits/chosen": -2.521692991256714, + "logits/rejected": -1.7764173746109009, + "logps/chosen": -679.5719604492188, + "logps/rejected": -2191.1318359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.130465507507324, + "rewards/margins": 15.327728271484375, + "rewards/rejected": -21.458194732666016, + "step": 41840 + }, + { + "epoch": 2.5, + "learning_rate": 4.1845745265252673e-07, + "logits/chosen": -2.5456039905548096, + "logits/rejected": -1.919651746749878, + "logps/chosen": -687.4251708984375, + "logps/rejected": -2085.626220703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.13748025894165, + "rewards/margins": 14.2708740234375, + "rewards/rejected": -20.408353805541992, + "step": 41850 + }, + { + "epoch": 2.5, + "learning_rate": 4.174972609002592e-07, + "logits/chosen": -2.533416271209717, + "logits/rejected": -1.8113237619400024, + "logps/chosen": -700.3028564453125, + "logps/rejected": -2103.048583984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.298403263092041, + "rewards/margins": 14.299734115600586, + "rewards/rejected": -20.59813690185547, + "step": 41860 + }, + { + "epoch": 2.5, + "learning_rate": 4.165380716647832e-07, + "logits/chosen": -2.5479841232299805, + "logits/rejected": -1.855752944946289, + "logps/chosen": -676.8448486328125, + "logps/rejected": -2130.31494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.080774784088135, + "rewards/margins": 14.777758598327637, + "rewards/rejected": -20.858535766601562, + "step": 41870 + }, + { + "epoch": 2.5, + "learning_rate": 4.15579885407853e-07, + "logits/chosen": -2.5067501068115234, + "logits/rejected": -1.843621850013733, + "logps/chosen": -677.0447998046875, + "logps/rejected": -2119.173095703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0759196281433105, + "rewards/margins": 14.684710502624512, + "rewards/rejected": -20.760631561279297, + "step": 41880 + }, + { + "epoch": 2.5, + "learning_rate": 4.146227025907393e-07, + "logits/chosen": -2.552560329437256, + "logits/rejected": -1.8959558010101318, + "logps/chosen": -683.2362060546875, + "logps/rejected": -2162.445068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.095638751983643, + "rewards/margins": 15.08411979675293, + "rewards/rejected": -21.179759979248047, + "step": 41890 + }, + { + "epoch": 2.5, + "learning_rate": 4.1366652367422987e-07, + "logits/chosen": -2.5428619384765625, + "logits/rejected": -1.9457063674926758, + "logps/chosen": -652.8184204101562, + "logps/rejected": -2097.087158203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.795094013214111, + "rewards/margins": 14.746902465820312, + "rewards/rejected": -20.541996002197266, + "step": 41900 + }, + { + "epoch": 2.5, + "learning_rate": 4.1271134911862936e-07, + "logits/chosen": -2.5231680870056152, + "logits/rejected": -1.8271898031234741, + "logps/chosen": -662.4869384765625, + "logps/rejected": -2174.777099609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.909327507019043, + "rewards/margins": 15.389875411987305, + "rewards/rejected": -21.29920196533203, + "step": 41910 + }, + { + "epoch": 2.5, + "learning_rate": 4.117571793837591e-07, + "logits/chosen": -2.5296249389648438, + "logits/rejected": -1.8617126941680908, + "logps/chosen": -668.227294921875, + "logps/rejected": -2145.092529296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.009939193725586, + "rewards/margins": 15.004812240600586, + "rewards/rejected": -21.014751434326172, + "step": 41920 + }, + { + "epoch": 2.5, + "learning_rate": 4.1080401492895575e-07, + "logits/chosen": -2.5605382919311523, + "logits/rejected": -1.9778000116348267, + "logps/chosen": -678.643798828125, + "logps/rejected": -2062.38623046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0694732666015625, + "rewards/margins": 14.127981185913086, + "rewards/rejected": -20.19745445251465, + "step": 41930 + }, + { + "epoch": 2.5, + "learning_rate": 4.0985185621307293e-07, + "logits/chosen": -2.5331454277038574, + "logits/rejected": -1.8573232889175415, + "logps/chosen": -723.7398681640625, + "logps/rejected": -2124.60986328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.535793304443359, + "rewards/margins": 14.264376640319824, + "rewards/rejected": -20.800172805786133, + "step": 41940 + }, + { + "epoch": 2.5, + "learning_rate": 4.0890070369448006e-07, + "logits/chosen": -2.588787317276001, + "logits/rejected": -1.910825490951538, + "logps/chosen": -659.8246459960938, + "logps/rejected": -2119.85595703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.83551549911499, + "rewards/margins": 14.93153190612793, + "rewards/rejected": -20.767047882080078, + "step": 41950 + }, + { + "epoch": 2.5, + "learning_rate": 4.079505578310616e-07, + "logits/chosen": -2.5928568840026855, + "logits/rejected": -1.8754746913909912, + "logps/chosen": -686.5358276367188, + "logps/rejected": -2106.252197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1861114501953125, + "rewards/margins": 14.430511474609375, + "rewards/rejected": -20.616622924804688, + "step": 41960 + }, + { + "epoch": 2.5, + "learning_rate": 4.0700141908021793e-07, + "logits/chosen": -2.5680229663848877, + "logits/rejected": -1.7276016473770142, + "logps/chosen": -688.4574584960938, + "logps/rejected": -2225.481689453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.076161861419678, + "rewards/margins": 15.732818603515625, + "rewards/rejected": -21.808982849121094, + "step": 41970 + }, + { + "epoch": 2.5, + "learning_rate": 4.060532878988646e-07, + "logits/chosen": -2.5206682682037354, + "logits/rejected": -1.726746916770935, + "logps/chosen": -682.021240234375, + "logps/rejected": -2159.364990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.098508358001709, + "rewards/margins": 15.046762466430664, + "rewards/rejected": -21.1452693939209, + "step": 41980 + }, + { + "epoch": 2.5, + "learning_rate": 4.051061647434315e-07, + "logits/chosen": -2.5480523109436035, + "logits/rejected": -1.8150146007537842, + "logps/chosen": -668.4762573242188, + "logps/rejected": -2140.173828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.004542350769043, + "rewards/margins": 14.948640823364258, + "rewards/rejected": -20.953182220458984, + "step": 41990 + }, + { + "epoch": 2.5, + "learning_rate": 4.041600500698642e-07, + "logits/chosen": -2.578770875930786, + "logits/rejected": -1.8580152988433838, + "logps/chosen": -670.1262817382812, + "logps/rejected": -2083.762451171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.014847755432129, + "rewards/margins": 14.386212348937988, + "rewards/rejected": -20.401060104370117, + "step": 42000 + }, + { + "epoch": 2.5, + "eval_logits/chosen": -2.488325357437134, + "eval_logits/rejected": -2.0592870712280273, + "eval_logps/chosen": -730.76416015625, + "eval_logps/rejected": -2000.848388671875, + "eval_loss": 2.8207336072227918e-05, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": -6.63689661026001, + "eval_rewards/margins": 12.91610336303711, + "eval_rewards/rejected": -19.553001403808594, + "eval_runtime": 3.9014, + "eval_samples_per_second": 1.282, + "eval_steps_per_second": 0.256, + "step": 42000 + }, + { + "epoch": 2.51, + "learning_rate": 4.0321494433362225e-07, + "logits/chosen": -2.5607917308807373, + "logits/rejected": -1.9343971014022827, + "logps/chosen": -675.4789428710938, + "logps/rejected": -2153.573486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.085306167602539, + "rewards/margins": 15.01629638671875, + "rewards/rejected": -21.10160255432129, + "step": 42010 + }, + { + "epoch": 2.51, + "learning_rate": 4.022708479896789e-07, + "logits/chosen": -2.5745739936828613, + "logits/rejected": -1.980685830116272, + "logps/chosen": -671.9363403320312, + "logps/rejected": -2248.65283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9728803634643555, + "rewards/margins": 16.06320571899414, + "rewards/rejected": -22.036087036132812, + "step": 42020 + }, + { + "epoch": 2.51, + "learning_rate": 4.013277614925229e-07, + "logits/chosen": -2.512087345123291, + "logits/rejected": -1.7915420532226562, + "logps/chosen": -694.6151123046875, + "logps/rejected": -2261.036865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.209036827087402, + "rewards/margins": 15.9509916305542, + "rewards/rejected": -22.1600284576416, + "step": 42030 + }, + { + "epoch": 2.51, + "learning_rate": 4.003856852961557e-07, + "logits/chosen": -2.5078890323638916, + "logits/rejected": -1.8848358392715454, + "logps/chosen": -679.7755737304688, + "logps/rejected": -2150.86767578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.149945259094238, + "rewards/margins": 14.90929889678955, + "rewards/rejected": -21.059246063232422, + "step": 42040 + }, + { + "epoch": 2.51, + "learning_rate": 3.994446198540933e-07, + "logits/chosen": -2.5482072830200195, + "logits/rejected": -1.9019193649291992, + "logps/chosen": -681.3031005859375, + "logps/rejected": -2069.681884765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.128972053527832, + "rewards/margins": 14.125677108764648, + "rewards/rejected": -20.254648208618164, + "step": 42050 + }, + { + "epoch": 2.51, + "learning_rate": 3.985045656193631e-07, + "logits/chosen": -2.467419385910034, + "logits/rejected": -1.8222122192382812, + "logps/chosen": -693.3245849609375, + "logps/rejected": -2174.041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2248687744140625, + "rewards/margins": 15.083608627319336, + "rewards/rejected": -21.308475494384766, + "step": 42060 + }, + { + "epoch": 2.51, + "learning_rate": 3.975655230445094e-07, + "logits/chosen": -2.561519145965576, + "logits/rejected": -1.8790801763534546, + "logps/chosen": -668.747802734375, + "logps/rejected": -2138.02099609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.927594184875488, + "rewards/margins": 15.01159381866455, + "rewards/rejected": -20.939189910888672, + "step": 42070 + }, + { + "epoch": 2.51, + "learning_rate": 3.9662749258158466e-07, + "logits/chosen": -2.5133204460144043, + "logits/rejected": -1.8651880025863647, + "logps/chosen": -662.7083740234375, + "logps/rejected": -2209.578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9342732429504395, + "rewards/margins": 15.722152709960938, + "rewards/rejected": -21.65642547607422, + "step": 42080 + }, + { + "epoch": 2.51, + "learning_rate": 3.9569047468215967e-07, + "logits/chosen": -2.5372135639190674, + "logits/rejected": -1.9438565969467163, + "logps/chosen": -660.9811401367188, + "logps/rejected": -2189.788818359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.898501396179199, + "rewards/margins": 15.560046195983887, + "rewards/rejected": -21.458547592163086, + "step": 42090 + }, + { + "epoch": 2.51, + "learning_rate": 3.9475446979731214e-07, + "logits/chosen": -2.4959423542022705, + "logits/rejected": -1.7327264547348022, + "logps/chosen": -684.81884765625, + "logps/rejected": -2106.60400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.168558597564697, + "rewards/margins": 14.45952320098877, + "rewards/rejected": -20.628082275390625, + "step": 42100 + }, + { + "epoch": 2.51, + "learning_rate": 3.9381947837763696e-07, + "logits/chosen": -2.5100412368774414, + "logits/rejected": -1.8350574970245361, + "logps/chosen": -683.7547607421875, + "logps/rejected": -2020.8101806640625, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.075535297393799, + "rewards/margins": 13.694819450378418, + "rewards/rejected": -19.770355224609375, + "step": 42110 + }, + { + "epoch": 2.51, + "learning_rate": 3.9288550087323687e-07, + "logits/chosen": -2.5123705863952637, + "logits/rejected": -1.9087188243865967, + "logps/chosen": -680.1853637695312, + "logps/rejected": -2109.27685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.131197929382324, + "rewards/margins": 14.520484924316406, + "rewards/rejected": -20.651683807373047, + "step": 42120 + }, + { + "epoch": 2.51, + "learning_rate": 3.91952537733731e-07, + "logits/chosen": -2.561730146408081, + "logits/rejected": -1.9021574258804321, + "logps/chosen": -672.5549926757812, + "logps/rejected": -2224.48681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0263776779174805, + "rewards/margins": 15.76249885559082, + "rewards/rejected": -21.78887367248535, + "step": 42130 + }, + { + "epoch": 2.51, + "learning_rate": 3.910205894082461e-07, + "logits/chosen": -2.539097547531128, + "logits/rejected": -1.7649405002593994, + "logps/chosen": -684.0809936523438, + "logps/rejected": -2111.172119140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1367597579956055, + "rewards/margins": 14.521440505981445, + "rewards/rejected": -20.658199310302734, + "step": 42140 + }, + { + "epoch": 2.51, + "learning_rate": 3.900896563454226e-07, + "logits/chosen": -2.5647168159484863, + "logits/rejected": -1.796866774559021, + "logps/chosen": -661.5699462890625, + "logps/rejected": -2125.77783203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.914206027984619, + "rewards/margins": 14.898981094360352, + "rewards/rejected": -20.81318473815918, + "step": 42150 + }, + { + "epoch": 2.51, + "learning_rate": 3.8915973899341154e-07, + "logits/chosen": -2.4965925216674805, + "logits/rejected": -1.8519483804702759, + "logps/chosen": -672.4778442382812, + "logps/rejected": -2042.421142578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.065593242645264, + "rewards/margins": 13.916913986206055, + "rewards/rejected": -19.982507705688477, + "step": 42160 + }, + { + "epoch": 2.51, + "learning_rate": 3.882308377998753e-07, + "logits/chosen": -2.509289026260376, + "logits/rejected": -1.8065643310546875, + "logps/chosen": -704.0732421875, + "logps/rejected": -2082.55712890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.338688850402832, + "rewards/margins": 14.053731918334961, + "rewards/rejected": -20.39242172241211, + "step": 42170 + }, + { + "epoch": 2.52, + "learning_rate": 3.873029532119868e-07, + "logits/chosen": -2.541114330291748, + "logits/rejected": -1.7964999675750732, + "logps/chosen": -680.0362548828125, + "logps/rejected": -2090.647705078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.052260398864746, + "rewards/margins": 14.414196968078613, + "rewards/rejected": -20.46645736694336, + "step": 42180 + }, + { + "epoch": 2.52, + "learning_rate": 3.8637608567642954e-07, + "logits/chosen": -2.534682035446167, + "logits/rejected": -1.8219964504241943, + "logps/chosen": -668.2068481445312, + "logps/rejected": -2157.946044921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.981810569763184, + "rewards/margins": 15.143438339233398, + "rewards/rejected": -21.125247955322266, + "step": 42190 + }, + { + "epoch": 2.52, + "learning_rate": 3.854502356393977e-07, + "logits/chosen": -2.546729564666748, + "logits/rejected": -1.8524621725082397, + "logps/chosen": -657.0057983398438, + "logps/rejected": -2164.60400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.865413188934326, + "rewards/margins": 15.342005729675293, + "rewards/rejected": -21.207422256469727, + "step": 42200 + }, + { + "epoch": 2.52, + "learning_rate": 3.845254035465951e-07, + "logits/chosen": -2.547370195388794, + "logits/rejected": -1.7879562377929688, + "logps/chosen": -672.2354125976562, + "logps/rejected": -2145.5732421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.016470909118652, + "rewards/margins": 14.992304801940918, + "rewards/rejected": -21.008777618408203, + "step": 42210 + }, + { + "epoch": 2.52, + "learning_rate": 3.8360158984323714e-07, + "logits/chosen": -2.569371461868286, + "logits/rejected": -1.8907206058502197, + "logps/chosen": -677.2481079101562, + "logps/rejected": -2161.435546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.071313381195068, + "rewards/margins": 15.096545219421387, + "rewards/rejected": -21.167858123779297, + "step": 42220 + }, + { + "epoch": 2.52, + "learning_rate": 3.8267879497404655e-07, + "logits/chosen": -2.5619616508483887, + "logits/rejected": -1.9746217727661133, + "logps/chosen": -688.2042846679688, + "logps/rejected": -2225.044921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.11981201171875, + "rewards/margins": 15.69444465637207, + "rewards/rejected": -21.81425666809082, + "step": 42230 + }, + { + "epoch": 2.52, + "learning_rate": 3.8175701938325677e-07, + "logits/chosen": -2.5074820518493652, + "logits/rejected": -1.8163135051727295, + "logps/chosen": -675.7034912109375, + "logps/rejected": -2056.123046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.06587028503418, + "rewards/margins": 14.057699203491211, + "rewards/rejected": -20.12356948852539, + "step": 42240 + }, + { + "epoch": 2.52, + "learning_rate": 3.8083626351461096e-07, + "logits/chosen": -2.5085809230804443, + "logits/rejected": -1.7826576232910156, + "logps/chosen": -671.2238159179688, + "logps/rejected": -2120.400146484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.065907001495361, + "rewards/margins": 14.690423965454102, + "rewards/rejected": -20.756328582763672, + "step": 42250 + }, + { + "epoch": 2.52, + "learning_rate": 3.7991652781136093e-07, + "logits/chosen": -2.525747776031494, + "logits/rejected": -1.7117736339569092, + "logps/chosen": -668.4039916992188, + "logps/rejected": -2166.609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.944070339202881, + "rewards/margins": 15.281646728515625, + "rewards/rejected": -21.2257137298584, + "step": 42260 + }, + { + "epoch": 2.52, + "learning_rate": 3.7899781271626747e-07, + "logits/chosen": -2.563704252243042, + "logits/rejected": -1.8276948928833008, + "logps/chosen": -659.5574340820312, + "logps/rejected": -2161.01123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.920351982116699, + "rewards/margins": 15.246770858764648, + "rewards/rejected": -21.167123794555664, + "step": 42270 + }, + { + "epoch": 2.52, + "learning_rate": 3.780801186715996e-07, + "logits/chosen": -2.5215771198272705, + "logits/rejected": -1.9230962991714478, + "logps/chosen": -655.546875, + "logps/rejected": -2040.8092041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.895138740539551, + "rewards/margins": 14.079660415649414, + "rewards/rejected": -19.974796295166016, + "step": 42280 + }, + { + "epoch": 2.52, + "learning_rate": 3.771634461191359e-07, + "logits/chosen": -2.585216999053955, + "logits/rejected": -1.8546091318130493, + "logps/chosen": -653.7403564453125, + "logps/rejected": -2144.426025390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.863303184509277, + "rewards/margins": 15.151090621948242, + "rewards/rejected": -21.014394760131836, + "step": 42290 + }, + { + "epoch": 2.52, + "learning_rate": 3.76247795500162e-07, + "logits/chosen": -2.5439906120300293, + "logits/rejected": -1.8743298053741455, + "logps/chosen": -659.5541381835938, + "logps/rejected": -2151.869873046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.860300540924072, + "rewards/margins": 15.209081649780273, + "rewards/rejected": -21.069381713867188, + "step": 42300 + }, + { + "epoch": 2.52, + "learning_rate": 3.753331672554722e-07, + "logits/chosen": -2.480710744857788, + "logits/rejected": -1.8473106622695923, + "logps/chosen": -676.9384765625, + "logps/rejected": -2178.566162109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.10324764251709, + "rewards/margins": 15.255467414855957, + "rewards/rejected": -21.358715057373047, + "step": 42310 + }, + { + "epoch": 2.52, + "learning_rate": 3.744195618253693e-07, + "logits/chosen": -2.5440125465393066, + "logits/rejected": -1.9386427402496338, + "logps/chosen": -673.4140014648438, + "logps/rejected": -2200.13134765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.03539514541626, + "rewards/margins": 15.515963554382324, + "rewards/rejected": -21.55135726928711, + "step": 42320 + }, + { + "epoch": 2.52, + "learning_rate": 3.73506979649661e-07, + "logits/chosen": -2.5109658241271973, + "logits/rejected": -1.7783441543579102, + "logps/chosen": -677.0964965820312, + "logps/rejected": -2111.73974609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.099194526672363, + "rewards/margins": 14.585795402526855, + "rewards/rejected": -20.684988021850586, + "step": 42330 + }, + { + "epoch": 2.52, + "learning_rate": 3.725954211676666e-07, + "logits/chosen": -2.4858479499816895, + "logits/rejected": -1.7615665197372437, + "logps/chosen": -667.9063720703125, + "logps/rejected": -2166.933837890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.928255558013916, + "rewards/margins": 15.305188179016113, + "rewards/rejected": -21.233442306518555, + "step": 42340 + }, + { + "epoch": 2.53, + "learning_rate": 3.716848868182085e-07, + "logits/chosen": -2.5153355598449707, + "logits/rejected": -1.844878911972046, + "logps/chosen": -702.3790893554688, + "logps/rejected": -2156.45166015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.308177471160889, + "rewards/margins": 14.819494247436523, + "rewards/rejected": -21.127674102783203, + "step": 42350 + }, + { + "epoch": 2.53, + "learning_rate": 3.707753770396197e-07, + "logits/chosen": -2.542536497116089, + "logits/rejected": -1.8796279430389404, + "logps/chosen": -681.790283203125, + "logps/rejected": -2043.794921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.13308572769165, + "rewards/margins": 13.874364852905273, + "rewards/rejected": -20.007450103759766, + "step": 42360 + }, + { + "epoch": 2.53, + "learning_rate": 3.698668922697363e-07, + "logits/chosen": -2.5347886085510254, + "logits/rejected": -1.9031622409820557, + "logps/chosen": -664.5105590820312, + "logps/rejected": -2050.216552734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.939157962799072, + "rewards/margins": 14.130297660827637, + "rewards/rejected": -20.069454193115234, + "step": 42370 + }, + { + "epoch": 2.53, + "learning_rate": 3.689594329459045e-07, + "logits/chosen": -2.571730136871338, + "logits/rejected": -1.9387069940567017, + "logps/chosen": -675.1978759765625, + "logps/rejected": -2216.647216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.011783123016357, + "rewards/margins": 15.703450202941895, + "rewards/rejected": -21.71523094177246, + "step": 42380 + }, + { + "epoch": 2.53, + "learning_rate": 3.6805299950497366e-07, + "logits/chosen": -2.502190351486206, + "logits/rejected": -1.8665111064910889, + "logps/chosen": -675.1910400390625, + "logps/rejected": -2062.555908203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0900702476501465, + "rewards/margins": 14.09355354309082, + "rewards/rejected": -20.183622360229492, + "step": 42390 + }, + { + "epoch": 2.53, + "learning_rate": 3.671475923833029e-07, + "logits/chosen": -2.6105782985687256, + "logits/rejected": -2.008732557296753, + "logps/chosen": -709.301513671875, + "logps/rejected": -2083.15771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.294754981994629, + "rewards/margins": 14.100560188293457, + "rewards/rejected": -20.395313262939453, + "step": 42400 + }, + { + "epoch": 2.53, + "learning_rate": 3.6624321201675334e-07, + "logits/chosen": -2.587904453277588, + "logits/rejected": -1.9232141971588135, + "logps/chosen": -679.3438110351562, + "logps/rejected": -2204.344482421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.088616847991943, + "rewards/margins": 15.507716178894043, + "rewards/rejected": -21.596332550048828, + "step": 42410 + }, + { + "epoch": 2.53, + "learning_rate": 3.653398588406937e-07, + "logits/chosen": -2.5095691680908203, + "logits/rejected": -1.9512317180633545, + "logps/chosen": -695.5866088867188, + "logps/rejected": -2132.86376953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.199579238891602, + "rewards/margins": 14.67218017578125, + "rewards/rejected": -20.87175941467285, + "step": 42420 + }, + { + "epoch": 2.53, + "learning_rate": 3.6443753328999994e-07, + "logits/chosen": -2.4979851245880127, + "logits/rejected": -1.8624130487442017, + "logps/chosen": -673.892822265625, + "logps/rejected": -2150.41259765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.05908203125, + "rewards/margins": 14.99451732635498, + "rewards/rejected": -21.053600311279297, + "step": 42430 + }, + { + "epoch": 2.53, + "learning_rate": 3.6353623579904905e-07, + "logits/chosen": -2.5409369468688965, + "logits/rejected": -1.947153091430664, + "logps/chosen": -676.60888671875, + "logps/rejected": -2100.20703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.050106048583984, + "rewards/margins": 14.511236190795898, + "rewards/rejected": -20.561342239379883, + "step": 42440 + }, + { + "epoch": 2.53, + "learning_rate": 3.626359668017285e-07, + "logits/chosen": -2.56050705909729, + "logits/rejected": -1.8215196132659912, + "logps/chosen": -666.5372314453125, + "logps/rejected": -2118.89013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.922192573547363, + "rewards/margins": 14.813835144042969, + "rewards/rejected": -20.73602867126465, + "step": 42450 + }, + { + "epoch": 2.53, + "learning_rate": 3.6173672673142506e-07, + "logits/chosen": -2.5445706844329834, + "logits/rejected": -1.8348537683486938, + "logps/chosen": -688.4744873046875, + "logps/rejected": -2102.497802734375, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.139774322509766, + "rewards/margins": 14.44328784942627, + "rewards/rejected": -20.58306312561035, + "step": 42460 + }, + { + "epoch": 2.53, + "learning_rate": 3.6083851602103507e-07, + "logits/chosen": -2.553401470184326, + "logits/rejected": -1.934679388999939, + "logps/chosen": -682.666259765625, + "logps/rejected": -2180.35009765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.071780681610107, + "rewards/margins": 15.284765243530273, + "rewards/rejected": -21.35654640197754, + "step": 42470 + }, + { + "epoch": 2.53, + "learning_rate": 3.5994133510295517e-07, + "logits/chosen": -2.511472225189209, + "logits/rejected": -1.7702802419662476, + "logps/chosen": -651.0765380859375, + "logps/rejected": -2140.45068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.753665924072266, + "rewards/margins": 15.210392951965332, + "rewards/rejected": -20.964059829711914, + "step": 42480 + }, + { + "epoch": 2.53, + "learning_rate": 3.590451844090903e-07, + "logits/chosen": -2.533801555633545, + "logits/rejected": -1.8350780010223389, + "logps/chosen": -685.8434448242188, + "logps/rejected": -2046.7203369140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.129049301147461, + "rewards/margins": 13.895075798034668, + "rewards/rejected": -20.024127960205078, + "step": 42490 + }, + { + "epoch": 2.53, + "learning_rate": 3.5815006437084604e-07, + "logits/chosen": -2.5326647758483887, + "logits/rejected": -1.9672014713287354, + "logps/chosen": -689.5415649414062, + "logps/rejected": -2105.2646484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.174215793609619, + "rewards/margins": 14.431445121765137, + "rewards/rejected": -20.60565948486328, + "step": 42500 + }, + { + "epoch": 2.53, + "learning_rate": 3.572559754191332e-07, + "logits/chosen": -2.5708184242248535, + "logits/rejected": -1.8721933364868164, + "logps/chosen": -674.13232421875, + "logps/rejected": -2173.53125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9961442947387695, + "rewards/margins": 15.302452087402344, + "rewards/rejected": -21.29859733581543, + "step": 42510 + }, + { + "epoch": 2.54, + "learning_rate": 3.563629179843667e-07, + "logits/chosen": -2.540271282196045, + "logits/rejected": -1.7680728435516357, + "logps/chosen": -679.657958984375, + "logps/rejected": -2145.37255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.025543212890625, + "rewards/margins": 14.985750198364258, + "rewards/rejected": -21.011295318603516, + "step": 42520 + }, + { + "epoch": 2.54, + "learning_rate": 3.5547089249646396e-07, + "logits/chosen": -2.4791626930236816, + "logits/rejected": -1.8334630727767944, + "logps/chosen": -655.7330322265625, + "logps/rejected": -2182.594970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.81198787689209, + "rewards/margins": 15.568567276000977, + "rewards/rejected": -21.38055419921875, + "step": 42530 + }, + { + "epoch": 2.54, + "learning_rate": 3.545798993848465e-07, + "logits/chosen": -2.545685291290283, + "logits/rejected": -1.8699709177017212, + "logps/chosen": -680.96826171875, + "logps/rejected": -2171.48291015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.163500785827637, + "rewards/margins": 15.123054504394531, + "rewards/rejected": -21.286556243896484, + "step": 42540 + }, + { + "epoch": 2.54, + "learning_rate": 3.5368993907843793e-07, + "logits/chosen": -2.5644755363464355, + "logits/rejected": -1.8923680782318115, + "logps/chosen": -674.9183349609375, + "logps/rejected": -2089.763916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.05203914642334, + "rewards/margins": 14.409555435180664, + "rewards/rejected": -20.461593627929688, + "step": 42550 + }, + { + "epoch": 2.54, + "learning_rate": 3.5280101200566517e-07, + "logits/chosen": -2.5342094898223877, + "logits/rejected": -1.9081065654754639, + "logps/chosen": -688.1568603515625, + "logps/rejected": -2109.68701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.216904640197754, + "rewards/margins": 14.445358276367188, + "rewards/rejected": -20.662263870239258, + "step": 42560 + }, + { + "epoch": 2.54, + "learning_rate": 3.51913118594458e-07, + "logits/chosen": -2.595510482788086, + "logits/rejected": -1.9508062601089478, + "logps/chosen": -668.1697998046875, + "logps/rejected": -2160.669189453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.956933498382568, + "rewards/margins": 15.21051025390625, + "rewards/rejected": -21.167444229125977, + "step": 42570 + }, + { + "epoch": 2.54, + "learning_rate": 3.5102625927224827e-07, + "logits/chosen": -2.5063469409942627, + "logits/rejected": -1.8765029907226562, + "logps/chosen": -697.3021240234375, + "logps/rejected": -2207.54541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.223868370056152, + "rewards/margins": 15.402885437011719, + "rewards/rejected": -21.626754760742188, + "step": 42580 + }, + { + "epoch": 2.54, + "learning_rate": 3.5014043446597037e-07, + "logits/chosen": -2.559542655944824, + "logits/rejected": -1.7669941186904907, + "logps/chosen": -667.0044555664062, + "logps/rejected": -2083.27001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.970376014709473, + "rewards/margins": 14.422281265258789, + "rewards/rejected": -20.392654418945312, + "step": 42590 + }, + { + "epoch": 2.54, + "learning_rate": 3.492556446020587e-07, + "logits/chosen": -2.5092220306396484, + "logits/rejected": -1.7312473058700562, + "logps/chosen": -688.35009765625, + "logps/rejected": -2168.57177734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.229752063751221, + "rewards/margins": 15.02044677734375, + "rewards/rejected": -21.250194549560547, + "step": 42600 + }, + { + "epoch": 2.54, + "learning_rate": 3.483718901064537e-07, + "logits/chosen": -2.5655357837677, + "logits/rejected": -1.9585611820220947, + "logps/chosen": -670.1060791015625, + "logps/rejected": -2077.520751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.928145885467529, + "rewards/margins": 14.407136917114258, + "rewards/rejected": -20.335283279418945, + "step": 42610 + }, + { + "epoch": 2.54, + "learning_rate": 3.474891714045919e-07, + "logits/chosen": -2.4800899028778076, + "logits/rejected": -1.8274787664413452, + "logps/chosen": -682.31005859375, + "logps/rejected": -2155.754150390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.173080921173096, + "rewards/margins": 14.933334350585938, + "rewards/rejected": -21.106416702270508, + "step": 42620 + }, + { + "epoch": 2.54, + "learning_rate": 3.466074889214169e-07, + "logits/chosen": -2.5458502769470215, + "logits/rejected": -1.978600263595581, + "logps/chosen": -671.6137084960938, + "logps/rejected": -2147.077880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.030228614807129, + "rewards/margins": 14.987627983093262, + "rewards/rejected": -21.01785659790039, + "step": 42630 + }, + { + "epoch": 2.54, + "learning_rate": 3.457268430813679e-07, + "logits/chosen": -2.5134103298187256, + "logits/rejected": -1.858399748802185, + "logps/chosen": -672.3219604492188, + "logps/rejected": -2145.236083984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.048104763031006, + "rewards/margins": 14.954595565795898, + "rewards/rejected": -21.002702713012695, + "step": 42640 + }, + { + "epoch": 2.54, + "learning_rate": 3.4484723430839e-07, + "logits/chosen": -2.5367488861083984, + "logits/rejected": -1.8402131795883179, + "logps/chosen": -677.1158447265625, + "logps/rejected": -2112.520751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.084343433380127, + "rewards/margins": 14.608108520507812, + "rewards/rejected": -20.69245147705078, + "step": 42650 + }, + { + "epoch": 2.54, + "learning_rate": 3.4396866302592593e-07, + "logits/chosen": -2.5591490268707275, + "logits/rejected": -1.9660899639129639, + "logps/chosen": -661.5247192382812, + "logps/rejected": -2100.6611328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.901612281799316, + "rewards/margins": 14.663432121276855, + "rewards/rejected": -20.56504249572754, + "step": 42660 + }, + { + "epoch": 2.54, + "learning_rate": 3.4309112965692006e-07, + "logits/chosen": -2.6202330589294434, + "logits/rejected": -1.9310182332992554, + "logps/chosen": -695.272705078125, + "logps/rejected": -2083.5791015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.262884140014648, + "rewards/margins": 14.117701530456543, + "rewards/rejected": -20.380584716796875, + "step": 42670 + }, + { + "epoch": 2.55, + "learning_rate": 3.422146346238173e-07, + "logits/chosen": -2.537501573562622, + "logits/rejected": -1.8258960247039795, + "logps/chosen": -690.8477172851562, + "logps/rejected": -2101.587890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.166714191436768, + "rewards/margins": 14.388916015625, + "rewards/rejected": -20.55562973022461, + "step": 42680 + }, + { + "epoch": 2.55, + "learning_rate": 3.413391783485606e-07, + "logits/chosen": -2.554379940032959, + "logits/rejected": -1.9704253673553467, + "logps/chosen": -688.27294921875, + "logps/rejected": -2137.482666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2741193771362305, + "rewards/margins": 14.672235488891602, + "rewards/rejected": -20.946353912353516, + "step": 42690 + }, + { + "epoch": 2.55, + "learning_rate": 3.4046476125259743e-07, + "logits/chosen": -2.5228559970855713, + "logits/rejected": -1.8744380474090576, + "logps/chosen": -676.64794921875, + "logps/rejected": -2224.28515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.09534215927124, + "rewards/margins": 15.699854850769043, + "rewards/rejected": -21.795198440551758, + "step": 42700 + }, + { + "epoch": 2.55, + "learning_rate": 3.395913837568693e-07, + "logits/chosen": -2.5434608459472656, + "logits/rejected": -1.934761643409729, + "logps/chosen": -663.0567626953125, + "logps/rejected": -2123.33447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.930596351623535, + "rewards/margins": 14.880365371704102, + "rewards/rejected": -20.810962677001953, + "step": 42710 + }, + { + "epoch": 2.55, + "learning_rate": 3.3871904628182267e-07, + "logits/chosen": -2.5374972820281982, + "logits/rejected": -1.854233980178833, + "logps/chosen": -679.3184814453125, + "logps/rejected": -2143.97900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.02978515625, + "rewards/margins": 14.969975471496582, + "rewards/rejected": -20.999759674072266, + "step": 42720 + }, + { + "epoch": 2.55, + "learning_rate": 3.378477492473986e-07, + "logits/chosen": -2.5010390281677246, + "logits/rejected": -1.7969138622283936, + "logps/chosen": -668.9170532226562, + "logps/rejected": -2090.643310546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.972380638122559, + "rewards/margins": 14.488192558288574, + "rewards/rejected": -20.460573196411133, + "step": 42730 + }, + { + "epoch": 2.55, + "learning_rate": 3.3697749307304156e-07, + "logits/chosen": -2.547548294067383, + "logits/rejected": -1.8759605884552002, + "logps/chosen": -688.720947265625, + "logps/rejected": -2191.296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.199646472930908, + "rewards/margins": 15.270971298217773, + "rewards/rejected": -21.470617294311523, + "step": 42740 + }, + { + "epoch": 2.55, + "learning_rate": 3.361082781776906e-07, + "logits/chosen": -2.5555758476257324, + "logits/rejected": -1.8621604442596436, + "logps/chosen": -659.7650756835938, + "logps/rejected": -2116.18115234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.910068988800049, + "rewards/margins": 14.791757583618164, + "rewards/rejected": -20.701824188232422, + "step": 42750 + }, + { + "epoch": 2.55, + "learning_rate": 3.3524010497978793e-07, + "logits/chosen": -2.5220999717712402, + "logits/rejected": -1.9079545736312866, + "logps/chosen": -673.8246459960938, + "logps/rejected": -2137.45166015625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.048910617828369, + "rewards/margins": 14.881036758422852, + "rewards/rejected": -20.929946899414062, + "step": 42760 + }, + { + "epoch": 2.55, + "learning_rate": 3.343729738972706e-07, + "logits/chosen": -2.549398899078369, + "logits/rejected": -1.7779515981674194, + "logps/chosen": -678.02001953125, + "logps/rejected": -2174.039794921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0667619705200195, + "rewards/margins": 15.228462219238281, + "rewards/rejected": -21.295223236083984, + "step": 42770 + }, + { + "epoch": 2.55, + "learning_rate": 3.335068853475762e-07, + "logits/chosen": -2.5496368408203125, + "logits/rejected": -1.8221992254257202, + "logps/chosen": -703.4022827148438, + "logps/rejected": -2153.361328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.32608699798584, + "rewards/margins": 14.763463973999023, + "rewards/rejected": -21.089553833007812, + "step": 42780 + }, + { + "epoch": 2.55, + "learning_rate": 3.326418397476394e-07, + "logits/chosen": -2.5798020362854004, + "logits/rejected": -1.895896315574646, + "logps/chosen": -688.6338500976562, + "logps/rejected": -2063.8447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.179116249084473, + "rewards/margins": 14.018648147583008, + "rewards/rejected": -20.197763442993164, + "step": 42790 + }, + { + "epoch": 2.55, + "learning_rate": 3.317778375138933e-07, + "logits/chosen": -2.5825657844543457, + "logits/rejected": -1.8642256259918213, + "logps/chosen": -683.3881225585938, + "logps/rejected": -2014.9078369140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.123284816741943, + "rewards/margins": 13.585668563842773, + "rewards/rejected": -19.708955764770508, + "step": 42800 + }, + { + "epoch": 2.55, + "learning_rate": 3.309148790622688e-07, + "logits/chosen": -2.566603899002075, + "logits/rejected": -1.9801509380340576, + "logps/chosen": -662.87451171875, + "logps/rejected": -1985.108154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.946787357330322, + "rewards/margins": 13.46742057800293, + "rewards/rejected": -19.414209365844727, + "step": 42810 + }, + { + "epoch": 2.55, + "learning_rate": 3.3005296480819376e-07, + "logits/chosen": -2.5443813800811768, + "logits/rejected": -1.8617826700210571, + "logps/chosen": -674.6171875, + "logps/rejected": -2129.45556640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0618109703063965, + "rewards/margins": 14.783699035644531, + "rewards/rejected": -20.845508575439453, + "step": 42820 + }, + { + "epoch": 2.55, + "learning_rate": 3.291920951665936e-07, + "logits/chosen": -2.5685782432556152, + "logits/rejected": -1.8886492252349854, + "logps/chosen": -683.7181396484375, + "logps/rejected": -2138.95556640625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.129151344299316, + "rewards/margins": 14.8096923828125, + "rewards/rejected": -20.938844680786133, + "step": 42830 + }, + { + "epoch": 2.55, + "learning_rate": 3.2833227055189126e-07, + "logits/chosen": -2.573035478591919, + "logits/rejected": -1.9615806341171265, + "logps/chosen": -653.6937255859375, + "logps/rejected": -2236.139892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.872474670410156, + "rewards/margins": 16.03866958618164, + "rewards/rejected": -21.911142349243164, + "step": 42840 + }, + { + "epoch": 2.56, + "learning_rate": 3.2747349137800615e-07, + "logits/chosen": -2.5331504344940186, + "logits/rejected": -1.9148271083831787, + "logps/chosen": -692.5384521484375, + "logps/rejected": -2170.46337890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.20050048828125, + "rewards/margins": 15.059941291809082, + "rewards/rejected": -21.260440826416016, + "step": 42850 + }, + { + "epoch": 2.56, + "learning_rate": 3.266157580583548e-07, + "logits/chosen": -2.581366539001465, + "logits/rejected": -1.9673774242401123, + "logps/chosen": -664.8687133789062, + "logps/rejected": -2123.81298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.971001148223877, + "rewards/margins": 14.830877304077148, + "rewards/rejected": -20.801876068115234, + "step": 42860 + }, + { + "epoch": 2.56, + "learning_rate": 3.2575907100584976e-07, + "logits/chosen": -2.518261194229126, + "logits/rejected": -1.875769853591919, + "logps/chosen": -668.1173706054688, + "logps/rejected": -2147.413330078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0286173820495605, + "rewards/margins": 15.004159927368164, + "rewards/rejected": -21.032777786254883, + "step": 42870 + }, + { + "epoch": 2.56, + "learning_rate": 3.249034306328999e-07, + "logits/chosen": -2.497748613357544, + "logits/rejected": -1.845918893814087, + "logps/chosen": -705.6256713867188, + "logps/rejected": -2163.38427734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3589863777160645, + "rewards/margins": 14.846888542175293, + "rewards/rejected": -21.205875396728516, + "step": 42880 + }, + { + "epoch": 2.56, + "learning_rate": 3.240488373514111e-07, + "logits/chosen": -2.5504794120788574, + "logits/rejected": -1.935394287109375, + "logps/chosen": -684.293212890625, + "logps/rejected": -2242.32568359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.106916904449463, + "rewards/margins": 15.882118225097656, + "rewards/rejected": -21.989036560058594, + "step": 42890 + }, + { + "epoch": 2.56, + "learning_rate": 3.2319529157278427e-07, + "logits/chosen": -2.5779292583465576, + "logits/rejected": -1.9428770542144775, + "logps/chosen": -680.9351806640625, + "logps/rejected": -2063.81689453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.149292945861816, + "rewards/margins": 14.059873580932617, + "rewards/rejected": -20.20916748046875, + "step": 42900 + }, + { + "epoch": 2.56, + "learning_rate": 3.2234279370791597e-07, + "logits/chosen": -2.5141379833221436, + "logits/rejected": -1.8514817953109741, + "logps/chosen": -680.1712036132812, + "logps/rejected": -2033.791259765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.11202335357666, + "rewards/margins": 13.790575981140137, + "rewards/rejected": -19.902597427368164, + "step": 42910 + }, + { + "epoch": 2.56, + "learning_rate": 3.2149134416719933e-07, + "logits/chosen": -2.5909740924835205, + "logits/rejected": -1.9238910675048828, + "logps/chosen": -669.8184814453125, + "logps/rejected": -2209.70263671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0026068687438965, + "rewards/margins": 15.636584281921387, + "rewards/rejected": -21.639192581176758, + "step": 42920 + }, + { + "epoch": 2.56, + "learning_rate": 3.2064094336052176e-07, + "logits/chosen": -2.5345652103424072, + "logits/rejected": -1.8343029022216797, + "logps/chosen": -687.8270263671875, + "logps/rejected": -2142.080810546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.125842094421387, + "rewards/margins": 14.84478759765625, + "rewards/rejected": -20.97062873840332, + "step": 42930 + }, + { + "epoch": 2.56, + "learning_rate": 3.1979159169726633e-07, + "logits/chosen": -2.5462374687194824, + "logits/rejected": -1.8550636768341064, + "logps/chosen": -660.7280883789062, + "logps/rejected": -2160.110595703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.916969299316406, + "rewards/margins": 15.232810974121094, + "rewards/rejected": -21.1497802734375, + "step": 42940 + }, + { + "epoch": 2.56, + "learning_rate": 3.189432895863112e-07, + "logits/chosen": -2.5479767322540283, + "logits/rejected": -1.9814157485961914, + "logps/chosen": -666.7677001953125, + "logps/rejected": -2298.159912109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.961241722106934, + "rewards/margins": 16.57448959350586, + "rewards/rejected": -22.53573226928711, + "step": 42950 + }, + { + "epoch": 2.56, + "learning_rate": 3.1809603743602783e-07, + "logits/chosen": -2.5591607093811035, + "logits/rejected": -1.9941303730010986, + "logps/chosen": -680.2266235351562, + "logps/rejected": -2091.49365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.062686920166016, + "rewards/margins": 14.405412673950195, + "rewards/rejected": -20.468097686767578, + "step": 42960 + }, + { + "epoch": 2.56, + "learning_rate": 3.1724983565428525e-07, + "logits/chosen": -2.5594985485076904, + "logits/rejected": -1.8431237936019897, + "logps/chosen": -677.8245239257812, + "logps/rejected": -2085.64453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.082740783691406, + "rewards/margins": 14.331201553344727, + "rewards/rejected": -20.413944244384766, + "step": 42970 + }, + { + "epoch": 2.56, + "learning_rate": 3.164046846484425e-07, + "logits/chosen": -2.5352675914764404, + "logits/rejected": -1.8090488910675049, + "logps/chosen": -681.6437377929688, + "logps/rejected": -2156.441162109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.157219886779785, + "rewards/margins": 14.965472221374512, + "rewards/rejected": -21.122692108154297, + "step": 42980 + }, + { + "epoch": 2.56, + "learning_rate": 3.1556058482535817e-07, + "logits/chosen": -2.508016586303711, + "logits/rejected": -1.9310028553009033, + "logps/chosen": -675.9422607421875, + "logps/rejected": -2140.16650390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.033352851867676, + "rewards/margins": 14.921426773071289, + "rewards/rejected": -20.95477867126465, + "step": 42990 + }, + { + "epoch": 2.56, + "learning_rate": 3.147175365913793e-07, + "logits/chosen": -2.537635087966919, + "logits/rejected": -1.8764913082122803, + "logps/chosen": -668.0074462890625, + "logps/rejected": -2070.64501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.935111045837402, + "rewards/margins": 14.335365295410156, + "rewards/rejected": -20.27047348022461, + "step": 43000 + }, + { + "epoch": 2.56, + "learning_rate": 3.1387554035235165e-07, + "logits/chosen": -2.560467481613159, + "logits/rejected": -1.8558552265167236, + "logps/chosen": -677.5531005859375, + "logps/rejected": -2182.7080078125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.041201591491699, + "rewards/margins": 15.33820629119873, + "rewards/rejected": -21.379405975341797, + "step": 43010 + }, + { + "epoch": 2.57, + "learning_rate": 3.1303459651361027e-07, + "logits/chosen": -2.5992960929870605, + "logits/rejected": -1.8201364278793335, + "logps/chosen": -696.6963500976562, + "logps/rejected": -2206.546142578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.22497034072876, + "rewards/margins": 15.397415161132812, + "rewards/rejected": -21.622386932373047, + "step": 43020 + }, + { + "epoch": 2.57, + "learning_rate": 3.121947054799862e-07, + "logits/chosen": -2.548570156097412, + "logits/rejected": -1.7931077480316162, + "logps/chosen": -676.5667724609375, + "logps/rejected": -2204.78466796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.038107395172119, + "rewards/margins": 15.557001113891602, + "rewards/rejected": -21.595109939575195, + "step": 43030 + }, + { + "epoch": 2.57, + "learning_rate": 3.113558676558032e-07, + "logits/chosen": -2.55365252494812, + "logits/rejected": -1.9208526611328125, + "logps/chosen": -676.748779296875, + "logps/rejected": -2170.701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.042106628417969, + "rewards/margins": 15.221799850463867, + "rewards/rejected": -21.263906478881836, + "step": 43040 + }, + { + "epoch": 2.57, + "learning_rate": 3.105180834448776e-07, + "logits/chosen": -2.5554308891296387, + "logits/rejected": -1.8920732736587524, + "logps/chosen": -700.3391723632812, + "logps/rejected": -2140.22998046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.308506488800049, + "rewards/margins": 14.659051895141602, + "rewards/rejected": -20.96755599975586, + "step": 43050 + }, + { + "epoch": 2.57, + "learning_rate": 3.0968135325051856e-07, + "logits/chosen": -2.549333095550537, + "logits/rejected": -1.8732315301895142, + "logps/chosen": -678.3470458984375, + "logps/rejected": -2155.25634765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1013689041137695, + "rewards/margins": 14.999798774719238, + "rewards/rejected": -21.101165771484375, + "step": 43060 + }, + { + "epoch": 2.57, + "learning_rate": 3.0884567747552827e-07, + "logits/chosen": -2.572211503982544, + "logits/rejected": -1.9172155857086182, + "logps/chosen": -687.9012451171875, + "logps/rejected": -2106.89892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.189769744873047, + "rewards/margins": 14.429913520812988, + "rewards/rejected": -20.61968231201172, + "step": 43070 + }, + { + "epoch": 2.57, + "learning_rate": 3.080110565222008e-07, + "logits/chosen": -2.5243873596191406, + "logits/rejected": -1.836735486984253, + "logps/chosen": -692.4917602539062, + "logps/rejected": -2090.811767578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.184443473815918, + "rewards/margins": 14.273880004882812, + "rewards/rejected": -20.458324432373047, + "step": 43080 + }, + { + "epoch": 2.57, + "learning_rate": 3.071774907923228e-07, + "logits/chosen": -2.4712939262390137, + "logits/rejected": -1.771399736404419, + "logps/chosen": -668.1177978515625, + "logps/rejected": -2083.76953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0194807052612305, + "rewards/margins": 14.387140274047852, + "rewards/rejected": -20.406618118286133, + "step": 43090 + }, + { + "epoch": 2.57, + "learning_rate": 3.0634498068717267e-07, + "logits/chosen": -2.544491767883301, + "logits/rejected": -1.7962095737457275, + "logps/chosen": -667.5520629882812, + "logps/rejected": -2186.73583984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.009136199951172, + "rewards/margins": 15.419087409973145, + "rewards/rejected": -21.42822265625, + "step": 43100 + }, + { + "epoch": 2.57, + "learning_rate": 3.05513526607521e-07, + "logits/chosen": -2.603963851928711, + "logits/rejected": -1.857848882675171, + "logps/chosen": -678.069091796875, + "logps/rejected": -2186.983154296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.022498607635498, + "rewards/margins": 15.42051887512207, + "rewards/rejected": -21.443017959594727, + "step": 43110 + }, + { + "epoch": 2.57, + "learning_rate": 3.046831289536298e-07, + "logits/chosen": -2.534224510192871, + "logits/rejected": -1.9389148950576782, + "logps/chosen": -703.4140014648438, + "logps/rejected": -2151.136962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.313352108001709, + "rewards/margins": 14.751106262207031, + "rewards/rejected": -21.064456939697266, + "step": 43120 + }, + { + "epoch": 2.57, + "learning_rate": 3.0385378812525207e-07, + "logits/chosen": -2.5559685230255127, + "logits/rejected": -1.8743622303009033, + "logps/chosen": -693.9612426757812, + "logps/rejected": -2136.00390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.263823986053467, + "rewards/margins": 14.65832805633545, + "rewards/rejected": -20.92215347290039, + "step": 43130 + }, + { + "epoch": 2.57, + "learning_rate": 3.0302550452163294e-07, + "logits/chosen": -2.5112967491149902, + "logits/rejected": -1.887817144393921, + "logps/chosen": -656.4869384765625, + "logps/rejected": -2023.0006103515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.89965295791626, + "rewards/margins": 13.894183158874512, + "rewards/rejected": -19.79383659362793, + "step": 43140 + }, + { + "epoch": 2.57, + "learning_rate": 3.021982785415076e-07, + "logits/chosen": -2.595947504043579, + "logits/rejected": -1.9624824523925781, + "logps/chosen": -660.5987548828125, + "logps/rejected": -2123.57666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.898858070373535, + "rewards/margins": 14.883096694946289, + "rewards/rejected": -20.78195571899414, + "step": 43150 + }, + { + "epoch": 2.57, + "learning_rate": 3.013721105831033e-07, + "logits/chosen": -2.5113353729248047, + "logits/rejected": -1.8371502161026, + "logps/chosen": -697.7833251953125, + "logps/rejected": -2089.82421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.252183437347412, + "rewards/margins": 14.209024429321289, + "rewards/rejected": -20.461210250854492, + "step": 43160 + }, + { + "epoch": 2.57, + "learning_rate": 3.0054700104413666e-07, + "logits/chosen": -2.53835129737854, + "logits/rejected": -1.940403699874878, + "logps/chosen": -681.0126953125, + "logps/rejected": -2129.109619140625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.095268726348877, + "rewards/margins": 14.756362915039062, + "rewards/rejected": -20.85163116455078, + "step": 43170 + }, + { + "epoch": 2.57, + "learning_rate": 2.9972295032181576e-07, + "logits/chosen": -2.53467059135437, + "logits/rejected": -1.896324872970581, + "logps/chosen": -662.340576171875, + "logps/rejected": -2132.258544921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.872785568237305, + "rewards/margins": 15.005815505981445, + "rewards/rejected": -20.878602981567383, + "step": 43180 + }, + { + "epoch": 2.58, + "learning_rate": 2.9889995881283856e-07, + "logits/chosen": -2.530057191848755, + "logits/rejected": -1.8605544567108154, + "logps/chosen": -675.1300659179688, + "logps/rejected": -2028.7122802734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0774760246276855, + "rewards/margins": 13.768592834472656, + "rewards/rejected": -19.846067428588867, + "step": 43190 + }, + { + "epoch": 2.58, + "learning_rate": 2.980780269133937e-07, + "logits/chosen": -2.4790520668029785, + "logits/rejected": -1.835584044456482, + "logps/chosen": -684.4954833984375, + "logps/rejected": -2121.682861328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.203097343444824, + "rewards/margins": 14.573928833007812, + "rewards/rejected": -20.777027130126953, + "step": 43200 + }, + { + "epoch": 2.58, + "learning_rate": 2.9725715501915746e-07, + "logits/chosen": -2.5618317127227783, + "logits/rejected": -1.8960689306259155, + "logps/chosen": -684.5155029296875, + "logps/rejected": -2194.4931640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.102415084838867, + "rewards/margins": 15.402185440063477, + "rewards/rejected": -21.50459861755371, + "step": 43210 + }, + { + "epoch": 2.58, + "learning_rate": 2.964373435252996e-07, + "logits/chosen": -2.5370750427246094, + "logits/rejected": -1.8902698755264282, + "logps/chosen": -684.8563842773438, + "logps/rejected": -2144.566162109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.148340702056885, + "rewards/margins": 14.859911918640137, + "rewards/rejected": -21.008251190185547, + "step": 43220 + }, + { + "epoch": 2.58, + "learning_rate": 2.956185928264757e-07, + "logits/chosen": -2.5488767623901367, + "logits/rejected": -1.885664701461792, + "logps/chosen": -683.4825439453125, + "logps/rejected": -2132.630859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.183382987976074, + "rewards/margins": 14.712038040161133, + "rewards/rejected": -20.895423889160156, + "step": 43230 + }, + { + "epoch": 2.58, + "learning_rate": 2.948009033168339e-07, + "logits/chosen": -2.5476839542388916, + "logits/rejected": -1.8858016729354858, + "logps/chosen": -699.3642578125, + "logps/rejected": -2175.73779296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.25946569442749, + "rewards/margins": 15.050440788269043, + "rewards/rejected": -21.309906005859375, + "step": 43240 + }, + { + "epoch": 2.58, + "learning_rate": 2.939842753900085e-07, + "logits/chosen": -2.565464496612549, + "logits/rejected": -1.8829195499420166, + "logps/chosen": -677.1176147460938, + "logps/rejected": -2237.48486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.058618068695068, + "rewards/margins": 15.870912551879883, + "rewards/rejected": -21.929529190063477, + "step": 43250 + }, + { + "epoch": 2.58, + "learning_rate": 2.9316870943912554e-07, + "logits/chosen": -2.588291883468628, + "logits/rejected": -2.0408012866973877, + "logps/chosen": -680.0743408203125, + "logps/rejected": -2186.537109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.092430591583252, + "rewards/margins": 15.340042114257812, + "rewards/rejected": -21.432472229003906, + "step": 43260 + }, + { + "epoch": 2.58, + "learning_rate": 2.923542058567966e-07, + "logits/chosen": -2.5564980506896973, + "logits/rejected": -1.8554083108901978, + "logps/chosen": -697.0520629882812, + "logps/rejected": -2122.38330078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.261134147644043, + "rewards/margins": 14.511802673339844, + "rewards/rejected": -20.772937774658203, + "step": 43270 + }, + { + "epoch": 2.58, + "learning_rate": 2.91540765035126e-07, + "logits/chosen": -2.4984240531921387, + "logits/rejected": -1.8350121974945068, + "logps/chosen": -655.7050170898438, + "logps/rejected": -2028.668212890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.857572555541992, + "rewards/margins": 13.9985933303833, + "rewards/rejected": -19.856164932250977, + "step": 43280 + }, + { + "epoch": 2.58, + "learning_rate": 2.9072838736570243e-07, + "logits/chosen": -2.526671886444092, + "logits/rejected": -1.8949333429336548, + "logps/chosen": -674.4698486328125, + "logps/rejected": -2156.47607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.117147445678711, + "rewards/margins": 14.99125862121582, + "rewards/rejected": -21.108407974243164, + "step": 43290 + }, + { + "epoch": 2.58, + "learning_rate": 2.899170732396048e-07, + "logits/chosen": -2.5692310333251953, + "logits/rejected": -1.9288513660430908, + "logps/chosen": -673.2752685546875, + "logps/rejected": -2090.037841796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.040065765380859, + "rewards/margins": 14.406824111938477, + "rewards/rejected": -20.446887969970703, + "step": 43300 + }, + { + "epoch": 2.58, + "learning_rate": 2.8910682304740006e-07, + "logits/chosen": -2.5655970573425293, + "logits/rejected": -1.9280529022216797, + "logps/chosen": -661.4610595703125, + "logps/rejected": -2142.878662109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.983691692352295, + "rewards/margins": 15.008875846862793, + "rewards/rejected": -20.992568969726562, + "step": 43310 + }, + { + "epoch": 2.58, + "learning_rate": 2.8829763717914266e-07, + "logits/chosen": -2.5772640705108643, + "logits/rejected": -1.881370186805725, + "logps/chosen": -667.884765625, + "logps/rejected": -2044.776123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.023702144622803, + "rewards/margins": 13.994158744812012, + "rewards/rejected": -20.017858505249023, + "step": 43320 + }, + { + "epoch": 2.58, + "learning_rate": 2.8748951602437457e-07, + "logits/chosen": -2.5182082653045654, + "logits/rejected": -1.7910182476043701, + "logps/chosen": -695.7083740234375, + "logps/rejected": -2192.3681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2264251708984375, + "rewards/margins": 15.247854232788086, + "rewards/rejected": -21.474279403686523, + "step": 43330 + }, + { + "epoch": 2.58, + "learning_rate": 2.866824599721249e-07, + "logits/chosen": -2.569013833999634, + "logits/rejected": -1.881450891494751, + "logps/chosen": -668.176513671875, + "logps/rejected": -2160.7939453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.99445104598999, + "rewards/margins": 15.164172172546387, + "rewards/rejected": -21.15862464904785, + "step": 43340 + }, + { + "epoch": 2.58, + "learning_rate": 2.8587646941091116e-07, + "logits/chosen": -2.5242905616760254, + "logits/rejected": -1.8690420389175415, + "logps/chosen": -691.7935791015625, + "logps/rejected": -2107.240478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.252623081207275, + "rewards/margins": 14.378820419311523, + "rewards/rejected": -20.63144302368164, + "step": 43350 + }, + { + "epoch": 2.59, + "learning_rate": 2.850715447287367e-07, + "logits/chosen": -2.549304962158203, + "logits/rejected": -1.8751332759857178, + "logps/chosen": -658.8277587890625, + "logps/rejected": -2103.418212890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8866496086120605, + "rewards/margins": 14.714141845703125, + "rewards/rejected": -20.600791931152344, + "step": 43360 + }, + { + "epoch": 2.59, + "learning_rate": 2.842676863130922e-07, + "logits/chosen": -2.5244438648223877, + "logits/rejected": -1.8065474033355713, + "logps/chosen": -673.2145385742188, + "logps/rejected": -2165.06103515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.968230247497559, + "rewards/margins": 15.236910820007324, + "rewards/rejected": -21.20513916015625, + "step": 43370 + }, + { + "epoch": 2.59, + "learning_rate": 2.834648945509552e-07, + "logits/chosen": -2.551081895828247, + "logits/rejected": -1.771485686302185, + "logps/chosen": -666.9126586914062, + "logps/rejected": -2071.011962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.012345314025879, + "rewards/margins": 14.264440536499023, + "rewards/rejected": -20.27678871154785, + "step": 43380 + }, + { + "epoch": 2.59, + "learning_rate": 2.826631698287899e-07, + "logits/chosen": -2.5971367359161377, + "logits/rejected": -1.9733960628509521, + "logps/chosen": -672.1779174804688, + "logps/rejected": -2116.155517578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.994187831878662, + "rewards/margins": 14.739178657531738, + "rewards/rejected": -20.73336410522461, + "step": 43390 + }, + { + "epoch": 2.59, + "learning_rate": 2.8186251253254636e-07, + "logits/chosen": -2.5481345653533936, + "logits/rejected": -1.8638505935668945, + "logps/chosen": -681.1910400390625, + "logps/rejected": -2186.45068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.101096153259277, + "rewards/margins": 15.3339262008667, + "rewards/rejected": -21.435022354125977, + "step": 43400 + }, + { + "epoch": 2.59, + "learning_rate": 2.810629230476611e-07, + "logits/chosen": -2.52878737449646, + "logits/rejected": -1.892134428024292, + "logps/chosen": -676.7836303710938, + "logps/rejected": -2165.013427734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.082050323486328, + "rewards/margins": 15.111993789672852, + "rewards/rejected": -21.194042205810547, + "step": 43410 + }, + { + "epoch": 2.59, + "learning_rate": 2.802644017590561e-07, + "logits/chosen": -2.537935733795166, + "logits/rejected": -1.7702763080596924, + "logps/chosen": -660.318603515625, + "logps/rejected": -2116.064453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.932346343994141, + "rewards/margins": 14.778096199035645, + "rewards/rejected": -20.7104434967041, + "step": 43420 + }, + { + "epoch": 2.59, + "learning_rate": 2.794669490511401e-07, + "logits/chosen": -2.5497422218322754, + "logits/rejected": -1.8641936779022217, + "logps/chosen": -696.9022216796875, + "logps/rejected": -2167.45947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.198629379272461, + "rewards/margins": 15.040185928344727, + "rewards/rejected": -21.238815307617188, + "step": 43430 + }, + { + "epoch": 2.59, + "learning_rate": 2.786705653078062e-07, + "logits/chosen": -2.532102108001709, + "logits/rejected": -1.837161660194397, + "logps/chosen": -666.665771484375, + "logps/rejected": -2250.9794921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9856061935424805, + "rewards/margins": 16.078046798706055, + "rewards/rejected": -22.06365394592285, + "step": 43440 + }, + { + "epoch": 2.59, + "learning_rate": 2.7787525091243413e-07, + "logits/chosen": -2.516885995864868, + "logits/rejected": -1.8780620098114014, + "logps/chosen": -685.5119018554688, + "logps/rejected": -2131.955322265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.180842876434326, + "rewards/margins": 14.702505111694336, + "rewards/rejected": -20.88334846496582, + "step": 43450 + }, + { + "epoch": 2.59, + "learning_rate": 2.770810062478876e-07, + "logits/chosen": -2.5373730659484863, + "logits/rejected": -1.8705781698226929, + "logps/chosen": -680.6566162109375, + "logps/rejected": -2119.752685546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1299333572387695, + "rewards/margins": 14.62666130065918, + "rewards/rejected": -20.756595611572266, + "step": 43460 + }, + { + "epoch": 2.59, + "learning_rate": 2.76287831696517e-07, + "logits/chosen": -2.5600414276123047, + "logits/rejected": -1.8750470876693726, + "logps/chosen": -701.73095703125, + "logps/rejected": -2141.3203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.330010890960693, + "rewards/margins": 14.631070137023926, + "rewards/rejected": -20.96108055114746, + "step": 43470 + }, + { + "epoch": 2.59, + "learning_rate": 2.7549572764015517e-07, + "logits/chosen": -2.535656213760376, + "logits/rejected": -1.9196052551269531, + "logps/chosen": -683.2463989257812, + "logps/rejected": -2174.86279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.115756034851074, + "rewards/margins": 15.19452953338623, + "rewards/rejected": -21.310285568237305, + "step": 43480 + }, + { + "epoch": 2.59, + "learning_rate": 2.7470469446012225e-07, + "logits/chosen": -2.515747308731079, + "logits/rejected": -1.7759008407592773, + "logps/chosen": -684.2420043945312, + "logps/rejected": -2117.91064453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.167482376098633, + "rewards/margins": 14.568554878234863, + "rewards/rejected": -20.73603630065918, + "step": 43490 + }, + { + "epoch": 2.59, + "learning_rate": 2.7391473253722017e-07, + "logits/chosen": -2.5583229064941406, + "logits/rejected": -1.9646389484405518, + "logps/chosen": -692.3055419921875, + "logps/rejected": -2193.02294921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.241490364074707, + "rewards/margins": 15.241912841796875, + "rewards/rejected": -21.483402252197266, + "step": 43500 + }, + { + "epoch": 2.59, + "learning_rate": 2.731258422517385e-07, + "logits/chosen": -2.529717206954956, + "logits/rejected": -1.8187291622161865, + "logps/chosen": -695.785888671875, + "logps/rejected": -2141.46923828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.258998870849609, + "rewards/margins": 14.70501708984375, + "rewards/rejected": -20.96401596069336, + "step": 43510 + }, + { + "epoch": 2.6, + "learning_rate": 2.7233802398344696e-07, + "logits/chosen": -2.560774564743042, + "logits/rejected": -1.9041640758514404, + "logps/chosen": -653.8675537109375, + "logps/rejected": -2119.681396484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.875884056091309, + "rewards/margins": 14.87891960144043, + "rewards/rejected": -20.754804611206055, + "step": 43520 + }, + { + "epoch": 2.6, + "learning_rate": 2.7155127811160336e-07, + "logits/chosen": -2.5494375228881836, + "logits/rejected": -1.8660112619400024, + "logps/chosen": -677.9654541015625, + "logps/rejected": -2193.196533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.025216579437256, + "rewards/margins": 15.458259582519531, + "rewards/rejected": -21.483474731445312, + "step": 43530 + }, + { + "epoch": 2.6, + "learning_rate": 2.7076560501494486e-07, + "logits/chosen": -2.532541036605835, + "logits/rejected": -1.866878867149353, + "logps/chosen": -697.6578369140625, + "logps/rejected": -2132.09228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.298453330993652, + "rewards/margins": 14.577032089233398, + "rewards/rejected": -20.875484466552734, + "step": 43540 + }, + { + "epoch": 2.6, + "learning_rate": 2.699810050716969e-07, + "logits/chosen": -2.5223355293273926, + "logits/rejected": -1.8068993091583252, + "logps/chosen": -678.67138671875, + "logps/rejected": -2049.01611328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.069387912750244, + "rewards/margins": 13.992576599121094, + "rewards/rejected": -20.06196403503418, + "step": 43550 + }, + { + "epoch": 2.6, + "learning_rate": 2.6919747865956413e-07, + "logits/chosen": -2.5758984088897705, + "logits/rejected": -1.9495518207550049, + "logps/chosen": -657.3486938476562, + "logps/rejected": -2050.566162109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.886526107788086, + "rewards/margins": 14.174118041992188, + "rewards/rejected": -20.06064224243164, + "step": 43560 + }, + { + "epoch": 2.6, + "learning_rate": 2.684150261557364e-07, + "logits/chosen": -2.5589585304260254, + "logits/rejected": -1.8184421062469482, + "logps/chosen": -667.0135498046875, + "logps/rejected": -2183.88818359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.961116313934326, + "rewards/margins": 15.432809829711914, + "rewards/rejected": -21.39392852783203, + "step": 43570 + }, + { + "epoch": 2.6, + "learning_rate": 2.6763364793688685e-07, + "logits/chosen": -2.540605068206787, + "logits/rejected": -1.9615854024887085, + "logps/chosen": -660.1180419921875, + "logps/rejected": -2099.17919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.894336700439453, + "rewards/margins": 14.663113594055176, + "rewards/rejected": -20.557453155517578, + "step": 43580 + }, + { + "epoch": 2.6, + "learning_rate": 2.668533443791707e-07, + "logits/chosen": -2.566080331802368, + "logits/rejected": -1.8845055103302002, + "logps/chosen": -688.0859375, + "logps/rejected": -2046.5257568359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.205933570861816, + "rewards/margins": 13.830090522766113, + "rewards/rejected": -20.036022186279297, + "step": 43590 + }, + { + "epoch": 2.6, + "learning_rate": 2.660741158582261e-07, + "logits/chosen": -2.5017807483673096, + "logits/rejected": -1.8502733707427979, + "logps/chosen": -675.8678588867188, + "logps/rejected": -2176.177001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.030211448669434, + "rewards/margins": 15.282707214355469, + "rewards/rejected": -21.312915802001953, + "step": 43600 + }, + { + "epoch": 2.6, + "learning_rate": 2.6529596274917384e-07, + "logits/chosen": -2.5673325061798096, + "logits/rejected": -1.9223772287368774, + "logps/chosen": -692.1470947265625, + "logps/rejected": -1946.404541015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.158687591552734, + "rewards/margins": 12.853963851928711, + "rewards/rejected": -19.012651443481445, + "step": 43610 + }, + { + "epoch": 2.6, + "learning_rate": 2.645188854266162e-07, + "logits/chosen": -2.50749135017395, + "logits/rejected": -1.8000433444976807, + "logps/chosen": -687.2020263671875, + "logps/rejected": -2130.85595703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.140256881713867, + "rewards/margins": 14.726911544799805, + "rewards/rejected": -20.867168426513672, + "step": 43620 + }, + { + "epoch": 2.6, + "learning_rate": 2.637428842646389e-07, + "logits/chosen": -2.5364766120910645, + "logits/rejected": -1.885171890258789, + "logps/chosen": -676.960205078125, + "logps/rejected": -2113.789306640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.001902103424072, + "rewards/margins": 14.703015327453613, + "rewards/rejected": -20.70491600036621, + "step": 43630 + }, + { + "epoch": 2.6, + "learning_rate": 2.629679596368087e-07, + "logits/chosen": -2.5199637413024902, + "logits/rejected": -1.8609262704849243, + "logps/chosen": -671.8983154296875, + "logps/rejected": -1997.3880615234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.999728202819824, + "rewards/margins": 13.539959907531738, + "rewards/rejected": -19.539688110351562, + "step": 43640 + }, + { + "epoch": 2.6, + "learning_rate": 2.621941119161739e-07, + "logits/chosen": -2.511395215988159, + "logits/rejected": -1.7274303436279297, + "logps/chosen": -687.4722290039062, + "logps/rejected": -2147.615234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.128517150878906, + "rewards/margins": 14.909143447875977, + "rewards/rejected": -21.037662506103516, + "step": 43650 + }, + { + "epoch": 2.6, + "learning_rate": 2.6142134147526533e-07, + "logits/chosen": -2.5729598999023438, + "logits/rejected": -1.9233009815216064, + "logps/chosen": -684.6629028320312, + "logps/rejected": -2140.094970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.117138862609863, + "rewards/margins": 14.847668647766113, + "rewards/rejected": -20.96480941772461, + "step": 43660 + }, + { + "epoch": 2.6, + "learning_rate": 2.6064964868609435e-07, + "logits/chosen": -2.562119960784912, + "logits/rejected": -1.8464187383651733, + "logps/chosen": -681.1934204101562, + "logps/rejected": -2177.54345703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.13309383392334, + "rewards/margins": 15.202458381652832, + "rewards/rejected": -21.335552215576172, + "step": 43670 + }, + { + "epoch": 2.6, + "learning_rate": 2.598790339201537e-07, + "logits/chosen": -2.5352063179016113, + "logits/rejected": -1.8532028198242188, + "logps/chosen": -668.2425537109375, + "logps/rejected": -2141.65087890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.997504711151123, + "rewards/margins": 14.972749710083008, + "rewards/rejected": -20.97025489807129, + "step": 43680 + }, + { + "epoch": 2.61, + "learning_rate": 2.591094975484179e-07, + "logits/chosen": -2.508899450302124, + "logits/rejected": -1.8498777151107788, + "logps/chosen": -681.271240234375, + "logps/rejected": -2170.5576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1407928466796875, + "rewards/margins": 15.117283821105957, + "rewards/rejected": -21.25807762145996, + "step": 43690 + }, + { + "epoch": 2.61, + "learning_rate": 2.5834103994134136e-07, + "logits/chosen": -2.562079906463623, + "logits/rejected": -1.9471886157989502, + "logps/chosen": -683.3792114257812, + "logps/rejected": -2068.908203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.151249408721924, + "rewards/margins": 14.100885391235352, + "rewards/rejected": -20.252134323120117, + "step": 43700 + }, + { + "epoch": 2.61, + "learning_rate": 2.575736614688595e-07, + "logits/chosen": -2.5619382858276367, + "logits/rejected": -1.9179134368896484, + "logps/chosen": -675.89453125, + "logps/rejected": -2233.81787109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0457048416137695, + "rewards/margins": 15.863311767578125, + "rewards/rejected": -21.909015655517578, + "step": 43710 + }, + { + "epoch": 2.61, + "learning_rate": 2.5680736250038836e-07, + "logits/chosen": -2.5704755783081055, + "logits/rejected": -1.946160912513733, + "logps/chosen": -675.9750366210938, + "logps/rejected": -2201.642578125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.04416561126709, + "rewards/margins": 15.532623291015625, + "rewards/rejected": -21.57678985595703, + "step": 43720 + }, + { + "epoch": 2.61, + "learning_rate": 2.5604214340482436e-07, + "logits/chosen": -2.5219638347625732, + "logits/rejected": -1.8609148263931274, + "logps/chosen": -672.7902221679688, + "logps/rejected": -2142.684326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.014512062072754, + "rewards/margins": 14.956754684448242, + "rewards/rejected": -20.971267700195312, + "step": 43730 + }, + { + "epoch": 2.61, + "learning_rate": 2.552780045505446e-07, + "logits/chosen": -2.5149009227752686, + "logits/rejected": -1.8144075870513916, + "logps/chosen": -666.1499633789062, + "logps/rejected": -2133.299072265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.975118160247803, + "rewards/margins": 14.912347793579102, + "rewards/rejected": -20.887462615966797, + "step": 43740 + }, + { + "epoch": 2.61, + "learning_rate": 2.545149463054036e-07, + "logits/chosen": -2.557084560394287, + "logits/rejected": -1.9437341690063477, + "logps/chosen": -677.4676513671875, + "logps/rejected": -2182.9375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.057971000671387, + "rewards/margins": 15.328313827514648, + "rewards/rejected": -21.386287689208984, + "step": 43750 + }, + { + "epoch": 2.61, + "learning_rate": 2.537529690367399e-07, + "logits/chosen": -2.5547072887420654, + "logits/rejected": -1.8641163110733032, + "logps/chosen": -701.3267822265625, + "logps/rejected": -2147.87158203125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.278835773468018, + "rewards/margins": 14.76011848449707, + "rewards/rejected": -21.038955688476562, + "step": 43760 + }, + { + "epoch": 2.61, + "learning_rate": 2.529920731113672e-07, + "logits/chosen": -2.5098328590393066, + "logits/rejected": -1.9295438528060913, + "logps/chosen": -698.7689208984375, + "logps/rejected": -2143.849609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.319578647613525, + "rewards/margins": 14.650609970092773, + "rewards/rejected": -20.97018814086914, + "step": 43770 + }, + { + "epoch": 2.61, + "learning_rate": 2.5223225889558277e-07, + "logits/chosen": -2.5369439125061035, + "logits/rejected": -2.0146782398223877, + "logps/chosen": -666.2467041015625, + "logps/rejected": -2144.83203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.015988349914551, + "rewards/margins": 14.981036186218262, + "rewards/rejected": -20.997024536132812, + "step": 43780 + }, + { + "epoch": 2.61, + "learning_rate": 2.514735267551596e-07, + "logits/chosen": -2.5540120601654053, + "logits/rejected": -1.9329379796981812, + "logps/chosen": -686.006591796875, + "logps/rejected": -2140.322998046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.159303188323975, + "rewards/margins": 14.793255805969238, + "rewards/rejected": -20.952556610107422, + "step": 43790 + }, + { + "epoch": 2.61, + "learning_rate": 2.507158770553528e-07, + "logits/chosen": -2.5562243461608887, + "logits/rejected": -1.9396816492080688, + "logps/chosen": -693.6673583984375, + "logps/rejected": -2133.864501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.228621006011963, + "rewards/margins": 14.680135726928711, + "rewards/rejected": -20.90875816345215, + "step": 43800 + }, + { + "epoch": 2.61, + "learning_rate": 2.49959310160893e-07, + "logits/chosen": -2.5424365997314453, + "logits/rejected": -1.8433481454849243, + "logps/chosen": -685.4205932617188, + "logps/rejected": -2091.57958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.130174160003662, + "rewards/margins": 14.338308334350586, + "rewards/rejected": -20.46848487854004, + "step": 43810 + }, + { + "epoch": 2.61, + "learning_rate": 2.4920382643599363e-07, + "logits/chosen": -2.5474772453308105, + "logits/rejected": -1.7877212762832642, + "logps/chosen": -693.7890625, + "logps/rejected": -2147.317138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.220323085784912, + "rewards/margins": 14.81959342956543, + "rewards/rejected": -21.039915084838867, + "step": 43820 + }, + { + "epoch": 2.61, + "learning_rate": 2.484494262443429e-07, + "logits/chosen": -2.5805270671844482, + "logits/rejected": -1.9364516735076904, + "logps/chosen": -660.5382690429688, + "logps/rejected": -2189.22265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.886005401611328, + "rewards/margins": 15.561366081237793, + "rewards/rejected": -21.447370529174805, + "step": 43830 + }, + { + "epoch": 2.61, + "learning_rate": 2.476961099491099e-07, + "logits/chosen": -2.5095763206481934, + "logits/rejected": -1.8431379795074463, + "logps/chosen": -670.5327758789062, + "logps/rejected": -2116.866943359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.989325523376465, + "rewards/margins": 14.732879638671875, + "rewards/rejected": -20.722206115722656, + "step": 43840 + }, + { + "epoch": 2.61, + "learning_rate": 2.4694387791294073e-07, + "logits/chosen": -2.5408339500427246, + "logits/rejected": -1.9052143096923828, + "logps/chosen": -661.5115966796875, + "logps/rejected": -2086.118408203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.94037389755249, + "rewards/margins": 14.481849670410156, + "rewards/rejected": -20.422225952148438, + "step": 43850 + }, + { + "epoch": 2.62, + "learning_rate": 2.4619273049796e-07, + "logits/chosen": -2.492274761199951, + "logits/rejected": -1.675675392150879, + "logps/chosen": -683.8228759765625, + "logps/rejected": -2027.8736572265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1057281494140625, + "rewards/margins": 13.743690490722656, + "rewards/rejected": -19.849422454833984, + "step": 43860 + }, + { + "epoch": 2.62, + "learning_rate": 2.454426680657701e-07, + "logits/chosen": -2.5430243015289307, + "logits/rejected": -1.8514482975006104, + "logps/chosen": -675.2879638671875, + "logps/rejected": -2134.45751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.068929672241211, + "rewards/margins": 14.833783149719238, + "rewards/rejected": -20.902713775634766, + "step": 43870 + }, + { + "epoch": 2.62, + "learning_rate": 2.446936909774511e-07, + "logits/chosen": -2.5331053733825684, + "logits/rejected": -1.7878711223602295, + "logps/chosen": -681.8941040039062, + "logps/rejected": -2226.260498046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.107563495635986, + "rewards/margins": 15.716543197631836, + "rewards/rejected": -21.824106216430664, + "step": 43880 + }, + { + "epoch": 2.62, + "learning_rate": 2.439457995935604e-07, + "logits/chosen": -2.529655694961548, + "logits/rejected": -1.849977731704712, + "logps/chosen": -687.7484130859375, + "logps/rejected": -2102.41845703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.131040573120117, + "rewards/margins": 14.444639205932617, + "rewards/rejected": -20.575679779052734, + "step": 43890 + }, + { + "epoch": 2.62, + "learning_rate": 2.4319899427413307e-07, + "logits/chosen": -2.5253217220306396, + "logits/rejected": -1.7711273431777954, + "logps/chosen": -659.1385498046875, + "logps/rejected": -2080.21240234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.878170967102051, + "rewards/margins": 14.49272346496582, + "rewards/rejected": -20.370891571044922, + "step": 43900 + }, + { + "epoch": 2.62, + "learning_rate": 2.4245327537868125e-07, + "logits/chosen": -2.5571348667144775, + "logits/rejected": -1.9250379800796509, + "logps/chosen": -681.4091796875, + "logps/rejected": -2186.81494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.124737739562988, + "rewards/margins": 15.300130844116211, + "rewards/rejected": -21.424867630004883, + "step": 43910 + }, + { + "epoch": 2.62, + "learning_rate": 2.417086432661939e-07, + "logits/chosen": -2.5338428020477295, + "logits/rejected": -1.8651021718978882, + "logps/chosen": -656.3678588867188, + "logps/rejected": -2175.630126953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.894047260284424, + "rewards/margins": 15.41291618347168, + "rewards/rejected": -21.306964874267578, + "step": 43920 + }, + { + "epoch": 2.62, + "learning_rate": 2.409650982951367e-07, + "logits/chosen": -2.538728713989258, + "logits/rejected": -1.8126493692398071, + "logps/chosen": -660.3666381835938, + "logps/rejected": -2208.97998046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.903162956237793, + "rewards/margins": 15.731348037719727, + "rewards/rejected": -21.634510040283203, + "step": 43930 + }, + { + "epoch": 2.62, + "learning_rate": 2.402226408234523e-07, + "logits/chosen": -2.5079145431518555, + "logits/rejected": -1.9217504262924194, + "logps/chosen": -670.4319458007812, + "logps/rejected": -2167.84423828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.001666069030762, + "rewards/margins": 15.227567672729492, + "rewards/rejected": -21.229236602783203, + "step": 43940 + }, + { + "epoch": 2.62, + "learning_rate": 2.394812712085598e-07, + "logits/chosen": -2.556056261062622, + "logits/rejected": -1.9563144445419312, + "logps/chosen": -686.4887084960938, + "logps/rejected": -2132.386474609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.148682594299316, + "rewards/margins": 14.748876571655273, + "rewards/rejected": -20.89756202697754, + "step": 43950 + }, + { + "epoch": 2.62, + "learning_rate": 2.3874098980735477e-07, + "logits/chosen": -2.5344173908233643, + "logits/rejected": -1.8350805044174194, + "logps/chosen": -704.6154174804688, + "logps/rejected": -2164.51708984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3057541847229, + "rewards/margins": 14.897201538085938, + "rewards/rejected": -21.202957153320312, + "step": 43960 + }, + { + "epoch": 2.62, + "learning_rate": 2.380017969762083e-07, + "logits/chosen": -2.5238680839538574, + "logits/rejected": -1.85552179813385, + "logps/chosen": -683.0340576171875, + "logps/rejected": -2144.90478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.13840913772583, + "rewards/margins": 14.857562065124512, + "rewards/rejected": -20.995975494384766, + "step": 43970 + }, + { + "epoch": 2.62, + "learning_rate": 2.3726369307096765e-07, + "logits/chosen": -2.606393337249756, + "logits/rejected": -2.0021116733551025, + "logps/chosen": -722.7989501953125, + "logps/rejected": -2169.78759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.504502773284912, + "rewards/margins": 14.732261657714844, + "rewards/rejected": -21.236764907836914, + "step": 43980 + }, + { + "epoch": 2.62, + "learning_rate": 2.3652667844695664e-07, + "logits/chosen": -2.5306529998779297, + "logits/rejected": -1.9280483722686768, + "logps/chosen": -685.6576538085938, + "logps/rejected": -2082.950927734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.158915042877197, + "rewards/margins": 14.22564697265625, + "rewards/rejected": -20.384563446044922, + "step": 43990 + }, + { + "epoch": 2.62, + "learning_rate": 2.3579075345897372e-07, + "logits/chosen": -2.533174514770508, + "logits/rejected": -1.882086157798767, + "logps/chosen": -681.0252685546875, + "logps/rejected": -2100.16796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.07619571685791, + "rewards/margins": 14.488273620605469, + "rewards/rejected": -20.56446647644043, + "step": 44000 + }, + { + "epoch": 2.62, + "learning_rate": 2.3505591846129356e-07, + "logits/chosen": -2.510244369506836, + "logits/rejected": -1.8118194341659546, + "logps/chosen": -666.9215087890625, + "logps/rejected": -2129.709716796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.950585842132568, + "rewards/margins": 14.903169631958008, + "rewards/rejected": -20.853755950927734, + "step": 44010 + }, + { + "epoch": 2.62, + "learning_rate": 2.3432217380766454e-07, + "logits/chosen": -2.5211098194122314, + "logits/rejected": -1.921063780784607, + "logps/chosen": -683.354248046875, + "logps/rejected": -2107.342041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.060769081115723, + "rewards/margins": 14.574470520019531, + "rewards/rejected": -20.635238647460938, + "step": 44020 + }, + { + "epoch": 2.63, + "learning_rate": 2.33589519851313e-07, + "logits/chosen": -2.492428779602051, + "logits/rejected": -1.8621432781219482, + "logps/chosen": -682.3558959960938, + "logps/rejected": -2199.873046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.135482311248779, + "rewards/margins": 15.430198669433594, + "rewards/rejected": -21.565683364868164, + "step": 44030 + }, + { + "epoch": 2.63, + "learning_rate": 2.3285795694493686e-07, + "logits/chosen": -2.543793201446533, + "logits/rejected": -1.8027108907699585, + "logps/chosen": -692.3094482421875, + "logps/rejected": -2108.6123046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2548136711120605, + "rewards/margins": 14.386927604675293, + "rewards/rejected": -20.641742706298828, + "step": 44040 + }, + { + "epoch": 2.63, + "learning_rate": 2.3212748544071213e-07, + "logits/chosen": -2.560563564300537, + "logits/rejected": -1.887393593788147, + "logps/chosen": -667.9370727539062, + "logps/rejected": -2148.27197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.983885765075684, + "rewards/margins": 15.049123764038086, + "rewards/rejected": -21.033008575439453, + "step": 44050 + }, + { + "epoch": 2.63, + "learning_rate": 2.3139810569028614e-07, + "logits/chosen": -2.58461332321167, + "logits/rejected": -1.8888040781021118, + "logps/chosen": -694.3236694335938, + "logps/rejected": -2129.982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.226531982421875, + "rewards/margins": 14.634753227233887, + "rewards/rejected": -20.861284255981445, + "step": 44060 + }, + { + "epoch": 2.63, + "learning_rate": 2.3066981804478416e-07, + "logits/chosen": -2.5422046184539795, + "logits/rejected": -1.8649542331695557, + "logps/chosen": -687.2705688476562, + "logps/rejected": -2137.89892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2087554931640625, + "rewards/margins": 14.72668170928955, + "rewards/rejected": -20.93543815612793, + "step": 44070 + }, + { + "epoch": 2.63, + "learning_rate": 2.2994262285480184e-07, + "logits/chosen": -2.5571742057800293, + "logits/rejected": -1.9086748361587524, + "logps/chosen": -684.5209350585938, + "logps/rejected": -2097.958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.108360290527344, + "rewards/margins": 14.439010620117188, + "rewards/rejected": -20.5473690032959, + "step": 44080 + }, + { + "epoch": 2.63, + "learning_rate": 2.2921652047041305e-07, + "logits/chosen": -2.5600192546844482, + "logits/rejected": -1.7745411396026611, + "logps/chosen": -678.7566528320312, + "logps/rejected": -2182.705322265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.134227752685547, + "rewards/margins": 15.259759902954102, + "rewards/rejected": -21.393985748291016, + "step": 44090 + }, + { + "epoch": 2.63, + "learning_rate": 2.2849151124116148e-07, + "logits/chosen": -2.5928969383239746, + "logits/rejected": -1.8476699590682983, + "logps/chosen": -701.3411865234375, + "logps/rejected": -2089.87255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.234593868255615, + "rewards/margins": 14.233221054077148, + "rewards/rejected": -20.467815399169922, + "step": 44100 + }, + { + "epoch": 2.63, + "learning_rate": 2.277675955160677e-07, + "logits/chosen": -2.5495500564575195, + "logits/rejected": -1.7613856792449951, + "logps/chosen": -664.7073974609375, + "logps/rejected": -2104.217041015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.965691566467285, + "rewards/margins": 14.634040832519531, + "rewards/rejected": -20.5997314453125, + "step": 44110 + }, + { + "epoch": 2.63, + "learning_rate": 2.270447736436246e-07, + "logits/chosen": -2.526334285736084, + "logits/rejected": -1.8431228399276733, + "logps/chosen": -694.0440673828125, + "logps/rejected": -2120.92529296875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.261017799377441, + "rewards/margins": 14.506746292114258, + "rewards/rejected": -20.767763137817383, + "step": 44120 + }, + { + "epoch": 2.63, + "learning_rate": 2.2632304597179827e-07, + "logits/chosen": -2.525639533996582, + "logits/rejected": -1.8823432922363281, + "logps/chosen": -691.1519775390625, + "logps/rejected": -2149.857421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.193012714385986, + "rewards/margins": 14.857144355773926, + "rewards/rejected": -21.05015754699707, + "step": 44130 + }, + { + "epoch": 2.63, + "learning_rate": 2.2560241284802858e-07, + "logits/chosen": -2.556870937347412, + "logits/rejected": -1.8375122547149658, + "logps/chosen": -679.71337890625, + "logps/rejected": -2209.651611328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.085842132568359, + "rewards/margins": 15.573678970336914, + "rewards/rejected": -21.65951919555664, + "step": 44140 + }, + { + "epoch": 2.63, + "learning_rate": 2.2488287461922832e-07, + "logits/chosen": -2.550276279449463, + "logits/rejected": -1.8910239934921265, + "logps/chosen": -699.2559204101562, + "logps/rejected": -2181.013427734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.315288543701172, + "rewards/margins": 15.061140060424805, + "rewards/rejected": -21.376426696777344, + "step": 44150 + }, + { + "epoch": 2.63, + "learning_rate": 2.2416443163178342e-07, + "logits/chosen": -2.5557873249053955, + "logits/rejected": -1.9258455038070679, + "logps/chosen": -664.8626708984375, + "logps/rejected": -2153.662109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.914478778839111, + "rewards/margins": 15.178277969360352, + "rewards/rejected": -21.092754364013672, + "step": 44160 + }, + { + "epoch": 2.63, + "learning_rate": 2.2344708423155198e-07, + "logits/chosen": -2.541090488433838, + "logits/rejected": -1.8036470413208008, + "logps/chosen": -690.3538818359375, + "logps/rejected": -2263.553466796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.209784984588623, + "rewards/margins": 15.988975524902344, + "rewards/rejected": -22.198760986328125, + "step": 44170 + }, + { + "epoch": 2.63, + "learning_rate": 2.2273083276386525e-07, + "logits/chosen": -2.563621759414673, + "logits/rejected": -1.9628524780273438, + "logps/chosen": -661.7813110351562, + "logps/rejected": -2140.49072265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.914398193359375, + "rewards/margins": 15.039767265319824, + "rewards/rejected": -20.954166412353516, + "step": 44180 + }, + { + "epoch": 2.64, + "learning_rate": 2.2201567757352631e-07, + "logits/chosen": -2.5335307121276855, + "logits/rejected": -1.936256766319275, + "logps/chosen": -658.7298583984375, + "logps/rejected": -2154.97216796875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.882620811462402, + "rewards/margins": 15.21857738494873, + "rewards/rejected": -21.101198196411133, + "step": 44190 + }, + { + "epoch": 2.64, + "learning_rate": 2.2130161900481146e-07, + "logits/chosen": -2.5000901222229004, + "logits/rejected": -1.8574527502059937, + "logps/chosen": -669.2509155273438, + "logps/rejected": -2115.163818359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.982776641845703, + "rewards/margins": 14.716526985168457, + "rewards/rejected": -20.69930648803711, + "step": 44200 + }, + { + "epoch": 2.64, + "learning_rate": 2.2058865740146817e-07, + "logits/chosen": -2.532754898071289, + "logits/rejected": -1.753395676612854, + "logps/chosen": -682.6295166015625, + "logps/rejected": -2102.72802734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.039119720458984, + "rewards/margins": 14.542097091674805, + "rewards/rejected": -20.58121681213379, + "step": 44210 + }, + { + "epoch": 2.64, + "learning_rate": 2.1987679310671582e-07, + "logits/chosen": -2.554064989089966, + "logits/rejected": -1.7970161437988281, + "logps/chosen": -701.1690063476562, + "logps/rejected": -2198.55908203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3091559410095215, + "rewards/margins": 15.224069595336914, + "rewards/rejected": -21.53322410583496, + "step": 44220 + }, + { + "epoch": 2.64, + "learning_rate": 2.1916602646324643e-07, + "logits/chosen": -2.5942301750183105, + "logits/rejected": -1.8199265003204346, + "logps/chosen": -664.264892578125, + "logps/rejected": -2117.33740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.994997024536133, + "rewards/margins": 14.744024276733398, + "rewards/rejected": -20.739023208618164, + "step": 44230 + }, + { + "epoch": 2.64, + "learning_rate": 2.184563578132229e-07, + "logits/chosen": -2.5502982139587402, + "logits/rejected": -1.817283272743225, + "logps/chosen": -669.9608764648438, + "logps/rejected": -2193.39013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.993613243103027, + "rewards/margins": 15.489163398742676, + "rewards/rejected": -21.482776641845703, + "step": 44240 + }, + { + "epoch": 2.64, + "learning_rate": 2.1774778749827946e-07, + "logits/chosen": -2.5105013847351074, + "logits/rejected": -1.8458240032196045, + "logps/chosen": -664.108154296875, + "logps/rejected": -2132.582275390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9065165519714355, + "rewards/margins": 14.985746383666992, + "rewards/rejected": -20.892261505126953, + "step": 44250 + }, + { + "epoch": 2.64, + "learning_rate": 2.1704031585952218e-07, + "logits/chosen": -2.5645480155944824, + "logits/rejected": -1.9585273265838623, + "logps/chosen": -678.0354614257812, + "logps/rejected": -2120.0400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.050372123718262, + "rewards/margins": 14.696920394897461, + "rewards/rejected": -20.747291564941406, + "step": 44260 + }, + { + "epoch": 2.64, + "learning_rate": 2.1633394323752748e-07, + "logits/chosen": -2.545384168624878, + "logits/rejected": -1.9082529544830322, + "logps/chosen": -680.510009765625, + "logps/rejected": -2244.11865234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.069199562072754, + "rewards/margins": 15.926797866821289, + "rewards/rejected": -21.995996475219727, + "step": 44270 + }, + { + "epoch": 2.64, + "learning_rate": 2.1562866997234421e-07, + "logits/chosen": -2.5217795372009277, + "logits/rejected": -1.9571654796600342, + "logps/chosen": -694.5360107421875, + "logps/rejected": -2126.610107421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.155698299407959, + "rewards/margins": 14.664072036743164, + "rewards/rejected": -20.81976890563965, + "step": 44280 + }, + { + "epoch": 2.64, + "learning_rate": 2.1492449640348883e-07, + "logits/chosen": -2.534111499786377, + "logits/rejected": -1.9676488637924194, + "logps/chosen": -704.1431884765625, + "logps/rejected": -2214.67626953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.360509872436523, + "rewards/margins": 15.336996078491211, + "rewards/rejected": -21.6975040435791, + "step": 44290 + }, + { + "epoch": 2.64, + "learning_rate": 2.1422142286995267e-07, + "logits/chosen": -2.5169262886047363, + "logits/rejected": -1.7960526943206787, + "logps/chosen": -700.5960693359375, + "logps/rejected": -2149.468505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.271807670593262, + "rewards/margins": 14.785314559936523, + "rewards/rejected": -21.0571231842041, + "step": 44300 + }, + { + "epoch": 2.64, + "learning_rate": 2.1351944971019362e-07, + "logits/chosen": -2.5581626892089844, + "logits/rejected": -1.8416430950164795, + "logps/chosen": -681.4512939453125, + "logps/rejected": -2165.931396484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.096410274505615, + "rewards/margins": 15.101861953735352, + "rewards/rejected": -21.198272705078125, + "step": 44310 + }, + { + "epoch": 2.64, + "learning_rate": 2.1281857726214327e-07, + "logits/chosen": -2.5269198417663574, + "logits/rejected": -1.8332500457763672, + "logps/chosen": -682.1232299804688, + "logps/rejected": -2195.65771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.138481140136719, + "rewards/margins": 15.387164115905762, + "rewards/rejected": -21.525646209716797, + "step": 44320 + }, + { + "epoch": 2.64, + "learning_rate": 2.1211880586319982e-07, + "logits/chosen": -2.54643177986145, + "logits/rejected": -1.7899720668792725, + "logps/chosen": -697.0426025390625, + "logps/rejected": -2244.51611328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.251011848449707, + "rewards/margins": 15.745656967163086, + "rewards/rejected": -21.996671676635742, + "step": 44330 + }, + { + "epoch": 2.64, + "learning_rate": 2.1142013585023464e-07, + "logits/chosen": -2.5164237022399902, + "logits/rejected": -1.7160720825195312, + "logps/chosen": -676.9846801757812, + "logps/rejected": -2186.99267578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.021052360534668, + "rewards/margins": 15.386404037475586, + "rewards/rejected": -21.407459259033203, + "step": 44340 + }, + { + "epoch": 2.64, + "learning_rate": 2.1072256755958643e-07, + "logits/chosen": -2.5667037963867188, + "logits/rejected": -1.8354028463363647, + "logps/chosen": -683.3495483398438, + "logps/rejected": -2118.294677734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.106900691986084, + "rewards/margins": 14.629989624023438, + "rewards/rejected": -20.736888885498047, + "step": 44350 + }, + { + "epoch": 2.65, + "learning_rate": 2.1002610132706547e-07, + "logits/chosen": -2.5539612770080566, + "logits/rejected": -1.8277900218963623, + "logps/chosen": -682.5338745117188, + "logps/rejected": -2163.90087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.074443340301514, + "rewards/margins": 15.124300003051758, + "rewards/rejected": -21.198741912841797, + "step": 44360 + }, + { + "epoch": 2.65, + "learning_rate": 2.0933073748794996e-07, + "logits/chosen": -2.580411195755005, + "logits/rejected": -1.8476155996322632, + "logps/chosen": -670.8904418945312, + "logps/rejected": -2211.06884765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.994873523712158, + "rewards/margins": 15.66712760925293, + "rewards/rejected": -21.66200065612793, + "step": 44370 + }, + { + "epoch": 2.65, + "learning_rate": 2.0863647637698797e-07, + "logits/chosen": -2.548180103302002, + "logits/rejected": -1.8867212533950806, + "logps/chosen": -677.5299072265625, + "logps/rejected": -2084.84423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.090893745422363, + "rewards/margins": 14.324551582336426, + "rewards/rejected": -20.41544532775879, + "step": 44380 + }, + { + "epoch": 2.65, + "learning_rate": 2.0794331832839715e-07, + "logits/chosen": -2.5315845012664795, + "logits/rejected": -1.8276838064193726, + "logps/chosen": -683.6823120117188, + "logps/rejected": -2070.696044921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.150466442108154, + "rewards/margins": 14.110163688659668, + "rewards/rejected": -20.260631561279297, + "step": 44390 + }, + { + "epoch": 2.65, + "learning_rate": 2.072512636758639e-07, + "logits/chosen": -2.5283732414245605, + "logits/rejected": -1.8336451053619385, + "logps/chosen": -689.1137084960938, + "logps/rejected": -2157.31201171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.169010639190674, + "rewards/margins": 14.94856071472168, + "rewards/rejected": -21.117572784423828, + "step": 44400 + }, + { + "epoch": 2.65, + "learning_rate": 2.0656031275254283e-07, + "logits/chosen": -2.5966579914093018, + "logits/rejected": -1.9269568920135498, + "logps/chosen": -694.7669677734375, + "logps/rejected": -2161.505615234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.180539131164551, + "rewards/margins": 14.994715690612793, + "rewards/rejected": -21.175256729125977, + "step": 44410 + }, + { + "epoch": 2.65, + "learning_rate": 2.058704658910579e-07, + "logits/chosen": -2.4940617084503174, + "logits/rejected": -1.901330590248108, + "logps/chosen": -700.5225830078125, + "logps/rejected": -2113.16357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.310585975646973, + "rewards/margins": 14.390207290649414, + "rewards/rejected": -20.700790405273438, + "step": 44420 + }, + { + "epoch": 2.65, + "learning_rate": 2.051817234235015e-07, + "logits/chosen": -2.544847249984741, + "logits/rejected": -1.9591470956802368, + "logps/chosen": -681.3679809570312, + "logps/rejected": -2134.88427734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.051067352294922, + "rewards/margins": 14.856239318847656, + "rewards/rejected": -20.907304763793945, + "step": 44430 + }, + { + "epoch": 2.65, + "learning_rate": 2.0449408568143398e-07, + "logits/chosen": -2.5197532176971436, + "logits/rejected": -1.782468557357788, + "logps/chosen": -650.1136474609375, + "logps/rejected": -2083.357666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.809666633605957, + "rewards/margins": 14.596872329711914, + "rewards/rejected": -20.406538009643555, + "step": 44440 + }, + { + "epoch": 2.65, + "learning_rate": 2.0380755299588494e-07, + "logits/chosen": -2.520458221435547, + "logits/rejected": -1.7939449548721313, + "logps/chosen": -676.0679931640625, + "logps/rejected": -2132.24609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.055963039398193, + "rewards/margins": 14.827265739440918, + "rewards/rejected": -20.883228302001953, + "step": 44450 + }, + { + "epoch": 2.65, + "learning_rate": 2.0312212569735035e-07, + "logits/chosen": -2.5713043212890625, + "logits/rejected": -1.9562606811523438, + "logps/chosen": -665.33984375, + "logps/rejected": -2083.159423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.891644477844238, + "rewards/margins": 14.49193286895752, + "rewards/rejected": -20.383575439453125, + "step": 44460 + }, + { + "epoch": 2.65, + "learning_rate": 2.0243780411579484e-07, + "logits/chosen": -2.5398077964782715, + "logits/rejected": -1.8558826446533203, + "logps/chosen": -669.9906005859375, + "logps/rejected": -2146.11083984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.979140758514404, + "rewards/margins": 15.033010482788086, + "rewards/rejected": -21.012147903442383, + "step": 44470 + }, + { + "epoch": 2.65, + "learning_rate": 2.0175458858065155e-07, + "logits/chosen": -2.5533242225646973, + "logits/rejected": -1.8425769805908203, + "logps/chosen": -686.9268188476562, + "logps/rejected": -2228.193603515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.153645992279053, + "rewards/margins": 15.680402755737305, + "rewards/rejected": -21.834049224853516, + "step": 44480 + }, + { + "epoch": 2.65, + "learning_rate": 2.0107247942081963e-07, + "logits/chosen": -2.552149534225464, + "logits/rejected": -1.870370626449585, + "logps/chosen": -684.998046875, + "logps/rejected": -2101.7294921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.122720241546631, + "rewards/margins": 14.450871467590332, + "rewards/rejected": -20.573593139648438, + "step": 44490 + }, + { + "epoch": 2.65, + "learning_rate": 2.00391476964667e-07, + "logits/chosen": -2.5847420692443848, + "logits/rejected": -1.8532317876815796, + "logps/chosen": -687.6377563476562, + "logps/rejected": -2191.31787109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.173498630523682, + "rewards/margins": 15.295252799987793, + "rewards/rejected": -21.468753814697266, + "step": 44500 + }, + { + "epoch": 2.65, + "learning_rate": 1.99711581540028e-07, + "logits/chosen": -2.5225558280944824, + "logits/rejected": -1.8589636087417603, + "logps/chosen": -675.1585693359375, + "logps/rejected": -2160.4736328125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.023730754852295, + "rewards/margins": 15.133509635925293, + "rewards/rejected": -21.157238006591797, + "step": 44510 + }, + { + "epoch": 2.65, + "learning_rate": 1.990327934742045e-07, + "logits/chosen": -2.5686182975769043, + "logits/rejected": -1.899306058883667, + "logps/chosen": -665.5423583984375, + "logps/rejected": -2159.916748046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.973382472991943, + "rewards/margins": 15.17663288116455, + "rewards/rejected": -21.150014877319336, + "step": 44520 + }, + { + "epoch": 2.66, + "learning_rate": 1.9835511309396482e-07, + "logits/chosen": -2.492652416229248, + "logits/rejected": -1.8457190990447998, + "logps/chosen": -692.5197143554688, + "logps/rejected": -2113.442138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1967692375183105, + "rewards/margins": 14.495641708374023, + "rewards/rejected": -20.69240951538086, + "step": 44530 + }, + { + "epoch": 2.66, + "learning_rate": 1.976785407255441e-07, + "logits/chosen": -2.5066068172454834, + "logits/rejected": -1.8815542459487915, + "logps/chosen": -689.8091430664062, + "logps/rejected": -2159.62451171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.166796684265137, + "rewards/margins": 14.986139297485352, + "rewards/rejected": -21.152935028076172, + "step": 44540 + }, + { + "epoch": 2.66, + "learning_rate": 1.9700307669464515e-07, + "logits/chosen": -2.5416362285614014, + "logits/rejected": -1.9220339059829712, + "logps/chosen": -694.8510131835938, + "logps/rejected": -2095.67626953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.248193264007568, + "rewards/margins": 14.263226509094238, + "rewards/rejected": -20.511417388916016, + "step": 44550 + }, + { + "epoch": 2.66, + "learning_rate": 1.9632872132643478e-07, + "logits/chosen": -2.51253604888916, + "logits/rejected": -1.737658143043518, + "logps/chosen": -675.0152587890625, + "logps/rejected": -2160.76953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0061259269714355, + "rewards/margins": 15.151944160461426, + "rewards/rejected": -21.15806770324707, + "step": 44560 + }, + { + "epoch": 2.66, + "learning_rate": 1.9565547494554943e-07, + "logits/chosen": -2.5225987434387207, + "logits/rejected": -1.7898855209350586, + "logps/chosen": -683.181884765625, + "logps/rejected": -2103.81201171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.068061351776123, + "rewards/margins": 14.536938667297363, + "rewards/rejected": -20.604999542236328, + "step": 44570 + }, + { + "epoch": 2.66, + "learning_rate": 1.949833378760882e-07, + "logits/chosen": -2.5695321559906006, + "logits/rejected": -1.9038679599761963, + "logps/chosen": -679.20166015625, + "logps/rejected": -2189.41943359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.120704650878906, + "rewards/margins": 15.333246231079102, + "rewards/rejected": -21.453948974609375, + "step": 44580 + }, + { + "epoch": 2.66, + "learning_rate": 1.9431231044161975e-07, + "logits/chosen": -2.572122812271118, + "logits/rejected": -1.8189646005630493, + "logps/chosen": -671.766845703125, + "logps/rejected": -2178.947998046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9960198402404785, + "rewards/margins": 15.342913627624512, + "rewards/rejected": -21.33893394470215, + "step": 44590 + }, + { + "epoch": 2.66, + "learning_rate": 1.9364239296517456e-07, + "logits/chosen": -2.563720703125, + "logits/rejected": -1.8535493612289429, + "logps/chosen": -659.9681396484375, + "logps/rejected": -2115.97216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.870976448059082, + "rewards/margins": 14.842872619628906, + "rewards/rejected": -20.713848114013672, + "step": 44600 + }, + { + "epoch": 2.66, + "learning_rate": 1.92973585769253e-07, + "logits/chosen": -2.546078681945801, + "logits/rejected": -1.744943380355835, + "logps/chosen": -701.1492309570312, + "logps/rejected": -2189.98681640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.273900032043457, + "rewards/margins": 15.180974006652832, + "rewards/rejected": -21.45487403869629, + "step": 44610 + }, + { + "epoch": 2.66, + "learning_rate": 1.9230588917581698e-07, + "logits/chosen": -2.5391945838928223, + "logits/rejected": -1.8020919561386108, + "logps/chosen": -694.517822265625, + "logps/rejected": -2222.83935546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.231357097625732, + "rewards/margins": 15.543451309204102, + "rewards/rejected": -21.774808883666992, + "step": 44620 + }, + { + "epoch": 2.66, + "learning_rate": 1.9163930350629745e-07, + "logits/chosen": -2.5420446395874023, + "logits/rejected": -1.8474130630493164, + "logps/chosen": -690.0831298828125, + "logps/rejected": -2178.471923828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.192633152008057, + "rewards/margins": 15.153950691223145, + "rewards/rejected": -21.34658432006836, + "step": 44630 + }, + { + "epoch": 2.66, + "learning_rate": 1.9097382908158713e-07, + "logits/chosen": -2.5847086906433105, + "logits/rejected": -1.9838554859161377, + "logps/chosen": -676.5794677734375, + "logps/rejected": -2103.48193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.019449710845947, + "rewards/margins": 14.571932792663574, + "rewards/rejected": -20.59138298034668, + "step": 44640 + }, + { + "epoch": 2.66, + "learning_rate": 1.903094662220456e-07, + "logits/chosen": -2.5232343673706055, + "logits/rejected": -1.8593761920928955, + "logps/chosen": -682.880126953125, + "logps/rejected": -2172.12890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.066157341003418, + "rewards/margins": 15.209733963012695, + "rewards/rejected": -21.27589225769043, + "step": 44650 + }, + { + "epoch": 2.66, + "learning_rate": 1.8964621524749844e-07, + "logits/chosen": -2.521489381790161, + "logits/rejected": -1.8502460718154907, + "logps/chosen": -676.634521484375, + "logps/rejected": -2087.23779296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.110325813293457, + "rewards/margins": 14.328788757324219, + "rewards/rejected": -20.43911361694336, + "step": 44660 + }, + { + "epoch": 2.66, + "learning_rate": 1.8898407647723327e-07, + "logits/chosen": -2.504671573638916, + "logits/rejected": -1.8123939037322998, + "logps/chosen": -691.1763305664062, + "logps/rejected": -2149.61279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.193833351135254, + "rewards/margins": 14.844736099243164, + "rewards/rejected": -21.0385684967041, + "step": 44670 + }, + { + "epoch": 2.66, + "learning_rate": 1.8832305023000458e-07, + "logits/chosen": -2.5519254207611084, + "logits/rejected": -1.9015159606933594, + "logps/chosen": -698.730712890625, + "logps/rejected": -2167.486572265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2761125564575195, + "rewards/margins": 14.960345268249512, + "rewards/rejected": -21.236454010009766, + "step": 44680 + }, + { + "epoch": 2.66, + "learning_rate": 1.8766313682402949e-07, + "logits/chosen": -2.524135112762451, + "logits/rejected": -1.8432340621948242, + "logps/chosen": -700.8872680664062, + "logps/rejected": -2138.392578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.276895046234131, + "rewards/margins": 14.657712936401367, + "rewards/rejected": -20.934606552124023, + "step": 44690 + }, + { + "epoch": 2.67, + "learning_rate": 1.8700433657699162e-07, + "logits/chosen": -2.511951446533203, + "logits/rejected": -1.8199949264526367, + "logps/chosen": -671.8721923828125, + "logps/rejected": -2142.79345703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.067221641540527, + "rewards/margins": 14.928800582885742, + "rewards/rejected": -20.996021270751953, + "step": 44700 + }, + { + "epoch": 2.67, + "learning_rate": 1.8634664980603623e-07, + "logits/chosen": -2.5747997760772705, + "logits/rejected": -1.8813406229019165, + "logps/chosen": -664.52587890625, + "logps/rejected": -2147.76416015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.935168266296387, + "rewards/margins": 15.099626541137695, + "rewards/rejected": -21.0347957611084, + "step": 44710 + }, + { + "epoch": 2.67, + "learning_rate": 1.8569007682777417e-07, + "logits/chosen": -2.5785727500915527, + "logits/rejected": -1.8947765827178955, + "logps/chosen": -689.1798095703125, + "logps/rejected": -2135.440673828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.172406196594238, + "rewards/margins": 14.726323127746582, + "rewards/rejected": -20.898731231689453, + "step": 44720 + }, + { + "epoch": 2.67, + "learning_rate": 1.8503461795827958e-07, + "logits/chosen": -2.5224416255950928, + "logits/rejected": -1.855172872543335, + "logps/chosen": -688.805908203125, + "logps/rejected": -2047.9765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.177419185638428, + "rewards/margins": 13.866888046264648, + "rewards/rejected": -20.044307708740234, + "step": 44730 + }, + { + "epoch": 2.67, + "learning_rate": 1.8438027351309034e-07, + "logits/chosen": -2.5823543071746826, + "logits/rejected": -1.8437902927398682, + "logps/chosen": -686.99755859375, + "logps/rejected": -2151.1083984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.161993026733398, + "rewards/margins": 14.90600299835205, + "rewards/rejected": -21.067995071411133, + "step": 44740 + }, + { + "epoch": 2.67, + "learning_rate": 1.8372704380720806e-07, + "logits/chosen": -2.548576831817627, + "logits/rejected": -1.8540292978286743, + "logps/chosen": -662.9678955078125, + "logps/rejected": -2149.880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.938958644866943, + "rewards/margins": 15.116172790527344, + "rewards/rejected": -21.055131912231445, + "step": 44750 + }, + { + "epoch": 2.67, + "learning_rate": 1.8307492915509705e-07, + "logits/chosen": -2.5322771072387695, + "logits/rejected": -1.8504669666290283, + "logps/chosen": -682.9413452148438, + "logps/rejected": -2030.1005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1377763748168945, + "rewards/margins": 13.727335929870605, + "rewards/rejected": -19.8651123046875, + "step": 44760 + }, + { + "epoch": 2.67, + "learning_rate": 1.8242392987068592e-07, + "logits/chosen": -2.561950922012329, + "logits/rejected": -1.8917529582977295, + "logps/chosen": -691.7089233398438, + "logps/rejected": -2181.234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.240292549133301, + "rewards/margins": 15.140670776367188, + "rewards/rejected": -21.380964279174805, + "step": 44770 + }, + { + "epoch": 2.67, + "learning_rate": 1.8177404626736505e-07, + "logits/chosen": -2.533979654312134, + "logits/rejected": -1.8383433818817139, + "logps/chosen": -678.947998046875, + "logps/rejected": -2140.453857421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.089865207672119, + "rewards/margins": 14.871790885925293, + "rewards/rejected": -20.961652755737305, + "step": 44780 + }, + { + "epoch": 2.67, + "learning_rate": 1.8112527865798896e-07, + "logits/chosen": -2.566288709640503, + "logits/rejected": -1.8730313777923584, + "logps/chosen": -650.0435791015625, + "logps/rejected": -2083.911376953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.751354217529297, + "rewards/margins": 14.63859748840332, + "rewards/rejected": -20.389949798583984, + "step": 44790 + }, + { + "epoch": 2.67, + "learning_rate": 1.8047762735487473e-07, + "logits/chosen": -2.5087890625, + "logits/rejected": -1.8581342697143555, + "logps/chosen": -681.784423828125, + "logps/rejected": -2068.56298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.082884311676025, + "rewards/margins": 14.158404350280762, + "rewards/rejected": -20.241289138793945, + "step": 44800 + }, + { + "epoch": 2.67, + "learning_rate": 1.798310926698002e-07, + "logits/chosen": -2.487480640411377, + "logits/rejected": -1.851768136024475, + "logps/chosen": -675.0366821289062, + "logps/rejected": -2176.193603515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.022080421447754, + "rewards/margins": 15.304367065429688, + "rewards/rejected": -21.326448440551758, + "step": 44810 + }, + { + "epoch": 2.67, + "learning_rate": 1.7918567491400862e-07, + "logits/chosen": -2.546905517578125, + "logits/rejected": -1.8659833669662476, + "logps/chosen": -677.9242553710938, + "logps/rejected": -2153.603759765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.091195583343506, + "rewards/margins": 15.003021240234375, + "rewards/rejected": -21.09421730041504, + "step": 44820 + }, + { + "epoch": 2.67, + "learning_rate": 1.7854137439820312e-07, + "logits/chosen": -2.5598273277282715, + "logits/rejected": -1.952286720275879, + "logps/chosen": -681.439208984375, + "logps/rejected": -2172.66552734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.101664066314697, + "rewards/margins": 15.177070617675781, + "rewards/rejected": -21.278736114501953, + "step": 44830 + }, + { + "epoch": 2.67, + "learning_rate": 1.7789819143255087e-07, + "logits/chosen": -2.560939073562622, + "logits/rejected": -1.9171314239501953, + "logps/chosen": -707.0374755859375, + "logps/rejected": -2015.1715087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.373701095581055, + "rewards/margins": 13.342394828796387, + "rewards/rejected": -19.716093063354492, + "step": 44840 + }, + { + "epoch": 2.67, + "learning_rate": 1.7725612632667895e-07, + "logits/chosen": -2.61564302444458, + "logits/rejected": -1.9607927799224854, + "logps/chosen": -684.4390869140625, + "logps/rejected": -2155.59716796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.07667350769043, + "rewards/margins": 15.044140815734863, + "rewards/rejected": -21.12081527709961, + "step": 44850 + }, + { + "epoch": 2.68, + "learning_rate": 1.7661517938967866e-07, + "logits/chosen": -2.5173697471618652, + "logits/rejected": -1.858808159828186, + "logps/chosen": -673.8782348632812, + "logps/rejected": -2041.552001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.087123870849609, + "rewards/margins": 13.885080337524414, + "rewards/rejected": -19.972204208374023, + "step": 44860 + }, + { + "epoch": 2.68, + "learning_rate": 1.7597535093010127e-07, + "logits/chosen": -2.567563533782959, + "logits/rejected": -1.8546149730682373, + "logps/chosen": -664.6624755859375, + "logps/rejected": -2149.05517578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.940337181091309, + "rewards/margins": 15.109575271606445, + "rewards/rejected": -21.049911499023438, + "step": 44870 + }, + { + "epoch": 2.68, + "learning_rate": 1.7533664125596038e-07, + "logits/chosen": -2.5892601013183594, + "logits/rejected": -1.7894054651260376, + "logps/chosen": -663.0722045898438, + "logps/rejected": -2194.15673828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.849111557006836, + "rewards/margins": 15.635729789733887, + "rewards/rejected": -21.484838485717773, + "step": 44880 + }, + { + "epoch": 2.68, + "learning_rate": 1.746990506747312e-07, + "logits/chosen": -2.5164220333099365, + "logits/rejected": -1.8233057260513306, + "logps/chosen": -674.8914794921875, + "logps/rejected": -2181.331298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.039699077606201, + "rewards/margins": 15.32502269744873, + "rewards/rejected": -21.36472511291504, + "step": 44890 + }, + { + "epoch": 2.68, + "learning_rate": 1.740625794933487e-07, + "logits/chosen": -2.5564632415771484, + "logits/rejected": -1.859349250793457, + "logps/chosen": -681.4334716796875, + "logps/rejected": -2227.231689453125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.148540496826172, + "rewards/margins": 15.676717758178711, + "rewards/rejected": -21.825260162353516, + "step": 44900 + }, + { + "epoch": 2.68, + "learning_rate": 1.7342722801821143e-07, + "logits/chosen": -2.508639097213745, + "logits/rejected": -1.8529956340789795, + "logps/chosen": -679.30029296875, + "logps/rejected": -2114.10986328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0930705070495605, + "rewards/margins": 14.603856086730957, + "rewards/rejected": -20.69692611694336, + "step": 44910 + }, + { + "epoch": 2.68, + "learning_rate": 1.7279299655517611e-07, + "logits/chosen": -2.5326619148254395, + "logits/rejected": -1.9061237573623657, + "logps/chosen": -699.7101440429688, + "logps/rejected": -2107.065185546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.299792766571045, + "rewards/margins": 14.331342697143555, + "rewards/rejected": -20.631135940551758, + "step": 44920 + }, + { + "epoch": 2.68, + "learning_rate": 1.7215988540956325e-07, + "logits/chosen": -2.5407960414886475, + "logits/rejected": -1.816619873046875, + "logps/chosen": -661.88232421875, + "logps/rejected": -2143.81884765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.929099082946777, + "rewards/margins": 15.06367301940918, + "rewards/rejected": -20.99277114868164, + "step": 44930 + }, + { + "epoch": 2.68, + "learning_rate": 1.7152789488615124e-07, + "logits/chosen": -2.56432843208313, + "logits/rejected": -1.7859035730361938, + "logps/chosen": -679.9788818359375, + "logps/rejected": -2109.705322265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0499982833862305, + "rewards/margins": 14.592679977416992, + "rewards/rejected": -20.64267921447754, + "step": 44940 + }, + { + "epoch": 2.68, + "learning_rate": 1.7089702528918172e-07, + "logits/chosen": -2.5193052291870117, + "logits/rejected": -1.8001104593276978, + "logps/chosen": -650.0623168945312, + "logps/rejected": -2050.365478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.842581748962402, + "rewards/margins": 14.214471817016602, + "rewards/rejected": -20.057056427001953, + "step": 44950 + }, + { + "epoch": 2.68, + "learning_rate": 1.7026727692235373e-07, + "logits/chosen": -2.5558345317840576, + "logits/rejected": -1.9059593677520752, + "logps/chosen": -690.203857421875, + "logps/rejected": -2039.3414306640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.15134334564209, + "rewards/margins": 13.791951179504395, + "rewards/rejected": -19.943296432495117, + "step": 44960 + }, + { + "epoch": 2.68, + "learning_rate": 1.6963865008882975e-07, + "logits/chosen": -2.5398991107940674, + "logits/rejected": -1.8662294149398804, + "logps/chosen": -667.0247192382812, + "logps/rejected": -2116.71728515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9895243644714355, + "rewards/margins": 14.73320484161377, + "rewards/rejected": -20.722728729248047, + "step": 44970 + }, + { + "epoch": 2.68, + "learning_rate": 1.6901114509122934e-07, + "logits/chosen": -2.5570380687713623, + "logits/rejected": -1.9820833206176758, + "logps/chosen": -668.4039306640625, + "logps/rejected": -2141.2177734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9807209968566895, + "rewards/margins": 14.986920356750488, + "rewards/rejected": -20.967641830444336, + "step": 44980 + }, + { + "epoch": 2.68, + "learning_rate": 1.6838476223163396e-07, + "logits/chosen": -2.510603189468384, + "logits/rejected": -1.8385009765625, + "logps/chosen": -696.9086303710938, + "logps/rejected": -2127.270751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2450995445251465, + "rewards/margins": 14.5747709274292, + "rewards/rejected": -20.819869995117188, + "step": 44990 + }, + { + "epoch": 2.68, + "learning_rate": 1.6775950181158462e-07, + "logits/chosen": -2.520603656768799, + "logits/rejected": -1.8222354650497437, + "logps/chosen": -677.8258056640625, + "logps/rejected": -2052.482666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.112728595733643, + "rewards/margins": 13.970514297485352, + "rewards/rejected": -20.083242416381836, + "step": 45000 + }, + { + "epoch": 2.68, + "learning_rate": 1.671353641320811e-07, + "logits/chosen": -2.6069798469543457, + "logits/rejected": -2.021754026412964, + "logps/chosen": -652.4564208984375, + "logps/rejected": -2175.354248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.843060493469238, + "rewards/margins": 15.471817016601562, + "rewards/rejected": -21.314876556396484, + "step": 45010 + }, + { + "epoch": 2.68, + "learning_rate": 1.665123494935836e-07, + "logits/chosen": -2.5383641719818115, + "logits/rejected": -1.9013326168060303, + "logps/chosen": -690.4483032226562, + "logps/rejected": -2144.873046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.153481483459473, + "rewards/margins": 14.85533332824707, + "rewards/rejected": -21.00881576538086, + "step": 45020 + }, + { + "epoch": 2.69, + "learning_rate": 1.6589045819601134e-07, + "logits/chosen": -2.544349193572998, + "logits/rejected": -1.8475080728530884, + "logps/chosen": -667.9837036132812, + "logps/rejected": -2088.54931640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.958503723144531, + "rewards/margins": 14.484275817871094, + "rewards/rejected": -20.442777633666992, + "step": 45030 + }, + { + "epoch": 2.69, + "learning_rate": 1.652696905387427e-07, + "logits/chosen": -2.5101170539855957, + "logits/rejected": -1.842728853225708, + "logps/chosen": -684.5381469726562, + "logps/rejected": -2044.5504150390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.166825294494629, + "rewards/margins": 13.850227355957031, + "rewards/rejected": -20.017051696777344, + "step": 45040 + }, + { + "epoch": 2.69, + "learning_rate": 1.6465004682061525e-07, + "logits/chosen": -2.5577926635742188, + "logits/rejected": -1.897010087966919, + "logps/chosen": -688.574951171875, + "logps/rejected": -2210.42724609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.176199436187744, + "rewards/margins": 15.459724426269531, + "rewards/rejected": -21.63592529296875, + "step": 45050 + }, + { + "epoch": 2.69, + "learning_rate": 1.640315273399254e-07, + "logits/chosen": -2.552337646484375, + "logits/rejected": -1.8794142007827759, + "logps/chosen": -667.5966796875, + "logps/rejected": -2040.5404052734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.967341423034668, + "rewards/margins": 14.002337455749512, + "rewards/rejected": -19.96967887878418, + "step": 45060 + }, + { + "epoch": 2.69, + "learning_rate": 1.634141323944291e-07, + "logits/chosen": -2.5649795532226562, + "logits/rejected": -1.948102593421936, + "logps/chosen": -679.4765014648438, + "logps/rejected": -2197.02099609375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.079475402832031, + "rewards/margins": 15.44188117980957, + "rewards/rejected": -21.5213565826416, + "step": 45070 + }, + { + "epoch": 2.69, + "learning_rate": 1.6279786228133892e-07, + "logits/chosen": -2.575916051864624, + "logits/rejected": -1.8767306804656982, + "logps/chosen": -659.2984619140625, + "logps/rejected": -2180.89599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.860335826873779, + "rewards/margins": 15.513273239135742, + "rewards/rejected": -21.37360954284668, + "step": 45080 + }, + { + "epoch": 2.69, + "learning_rate": 1.621827172973281e-07, + "logits/chosen": -2.504744052886963, + "logits/rejected": -1.817151665687561, + "logps/chosen": -665.2913818359375, + "logps/rejected": -2013.6064453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9801836013793945, + "rewards/margins": 13.723859786987305, + "rewards/rejected": -19.704044342041016, + "step": 45090 + }, + { + "epoch": 2.69, + "learning_rate": 1.6156869773852752e-07, + "logits/chosen": -2.5367274284362793, + "logits/rejected": -1.821298599243164, + "logps/chosen": -682.6070556640625, + "logps/rejected": -2096.31689453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.085451602935791, + "rewards/margins": 14.4373140335083, + "rewards/rejected": -20.52276611328125, + "step": 45100 + }, + { + "epoch": 2.69, + "learning_rate": 1.6095580390052628e-07, + "logits/chosen": -2.5410523414611816, + "logits/rejected": -1.8751916885375977, + "logps/chosen": -674.8438110351562, + "logps/rejected": -2093.772216796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.047679901123047, + "rewards/margins": 14.45722770690918, + "rewards/rejected": -20.504907608032227, + "step": 45110 + }, + { + "epoch": 2.69, + "learning_rate": 1.603440360783709e-07, + "logits/chosen": -2.5636610984802246, + "logits/rejected": -1.8832439184188843, + "logps/chosen": -697.7642822265625, + "logps/rejected": -2091.584716796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.290453910827637, + "rewards/margins": 14.172548294067383, + "rewards/rejected": -20.463003158569336, + "step": 45120 + }, + { + "epoch": 2.69, + "learning_rate": 1.5973339456656688e-07, + "logits/chosen": -2.567690849304199, + "logits/rejected": -1.922174096107483, + "logps/chosen": -660.7579956054688, + "logps/rejected": -2070.82421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.910442352294922, + "rewards/margins": 14.350893020629883, + "rewards/rejected": -20.261335372924805, + "step": 45130 + }, + { + "epoch": 2.69, + "learning_rate": 1.591238796590769e-07, + "logits/chosen": -2.537562608718872, + "logits/rejected": -1.8040130138397217, + "logps/chosen": -685.9291381835938, + "logps/rejected": -2104.975341796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.199491500854492, + "rewards/margins": 14.415946960449219, + "rewards/rejected": -20.615436553955078, + "step": 45140 + }, + { + "epoch": 2.69, + "learning_rate": 1.5851549164932118e-07, + "logits/chosen": -2.5511319637298584, + "logits/rejected": -1.838977575302124, + "logps/chosen": -668.8968505859375, + "logps/rejected": -2215.84716796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.99481201171875, + "rewards/margins": 15.714335441589355, + "rewards/rejected": -21.709148406982422, + "step": 45150 + }, + { + "epoch": 2.69, + "learning_rate": 1.5790823083017853e-07, + "logits/chosen": -2.500429630279541, + "logits/rejected": -1.8543983697891235, + "logps/chosen": -698.613037109375, + "logps/rejected": -2161.53564453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3122735023498535, + "rewards/margins": 14.862378120422363, + "rewards/rejected": -21.174652099609375, + "step": 45160 + }, + { + "epoch": 2.69, + "learning_rate": 1.573020974939829e-07, + "logits/chosen": -2.5212178230285645, + "logits/rejected": -1.8317573070526123, + "logps/chosen": -679.7884521484375, + "logps/rejected": -2098.534423828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1416707038879395, + "rewards/margins": 14.411256790161133, + "rewards/rejected": -20.552927017211914, + "step": 45170 + }, + { + "epoch": 2.69, + "learning_rate": 1.5669709193252835e-07, + "logits/chosen": -2.5045480728149414, + "logits/rejected": -1.8814947605133057, + "logps/chosen": -669.9766845703125, + "logps/rejected": -2134.123779296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.013563632965088, + "rewards/margins": 14.891202926635742, + "rewards/rejected": -20.904766082763672, + "step": 45180 + }, + { + "epoch": 2.69, + "learning_rate": 1.5609321443706305e-07, + "logits/chosen": -2.542151927947998, + "logits/rejected": -1.8516380786895752, + "logps/chosen": -695.4591674804688, + "logps/rejected": -2154.693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.249016761779785, + "rewards/margins": 14.847086906433105, + "rewards/rejected": -21.09610366821289, + "step": 45190 + }, + { + "epoch": 2.7, + "learning_rate": 1.5549046529829527e-07, + "logits/chosen": -2.5550549030303955, + "logits/rejected": -1.9351491928100586, + "logps/chosen": -681.2216186523438, + "logps/rejected": -2158.906494140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.157112121582031, + "rewards/margins": 14.976722717285156, + "rewards/rejected": -21.13383674621582, + "step": 45200 + }, + { + "epoch": 2.7, + "learning_rate": 1.5488884480638677e-07, + "logits/chosen": -2.5211780071258545, + "logits/rejected": -1.9078891277313232, + "logps/chosen": -673.8458251953125, + "logps/rejected": -2093.60791015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.00410795211792, + "rewards/margins": 14.48827075958252, + "rewards/rejected": -20.492380142211914, + "step": 45210 + }, + { + "epoch": 2.7, + "learning_rate": 1.5428835325095953e-07, + "logits/chosen": -2.5736489295959473, + "logits/rejected": -1.7931236028671265, + "logps/chosen": -676.8211669921875, + "logps/rejected": -2080.201904296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.019282817840576, + "rewards/margins": 14.34858512878418, + "rewards/rejected": -20.367870330810547, + "step": 45220 + }, + { + "epoch": 2.7, + "learning_rate": 1.5368899092108812e-07, + "logits/chosen": -2.4686474800109863, + "logits/rejected": -1.8748083114624023, + "logps/chosen": -686.8697509765625, + "logps/rejected": -2091.95849609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.192002773284912, + "rewards/margins": 14.28154182434082, + "rewards/rejected": -20.473546981811523, + "step": 45230 + }, + { + "epoch": 2.7, + "learning_rate": 1.5309075810530732e-07, + "logits/chosen": -2.5344927310943604, + "logits/rejected": -1.851148247718811, + "logps/chosen": -672.4651489257812, + "logps/rejected": -2197.12158203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.039787292480469, + "rewards/margins": 15.490821838378906, + "rewards/rejected": -21.530607223510742, + "step": 45240 + }, + { + "epoch": 2.7, + "learning_rate": 1.5249365509160535e-07, + "logits/chosen": -2.5325751304626465, + "logits/rejected": -1.813248634338379, + "logps/chosen": -676.1532592773438, + "logps/rejected": -2087.783935546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.028704643249512, + "rewards/margins": 14.411623001098633, + "rewards/rejected": -20.44032859802246, + "step": 45250 + }, + { + "epoch": 2.7, + "learning_rate": 1.5189768216742783e-07, + "logits/chosen": -2.5554816722869873, + "logits/rejected": -1.9124311208724976, + "logps/chosen": -684.1528930664062, + "logps/rejected": -2163.60693359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.149857521057129, + "rewards/margins": 15.044021606445312, + "rewards/rejected": -21.193878173828125, + "step": 45260 + }, + { + "epoch": 2.7, + "learning_rate": 1.5130283961967614e-07, + "logits/chosen": -2.5538768768310547, + "logits/rejected": -1.8997745513916016, + "logps/chosen": -689.7227783203125, + "logps/rejected": -2107.154052734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.17332124710083, + "rewards/margins": 14.448860168457031, + "rewards/rejected": -20.622180938720703, + "step": 45270 + }, + { + "epoch": 2.7, + "learning_rate": 1.5070912773470752e-07, + "logits/chosen": -2.5436816215515137, + "logits/rejected": -1.796079397201538, + "logps/chosen": -701.1512451171875, + "logps/rejected": -2277.13525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.322087287902832, + "rewards/margins": 16.00208282470703, + "rewards/rejected": -22.324169158935547, + "step": 45280 + }, + { + "epoch": 2.7, + "learning_rate": 1.5011654679833481e-07, + "logits/chosen": -2.4987971782684326, + "logits/rejected": -1.8135499954223633, + "logps/chosen": -691.3982543945312, + "logps/rejected": -2265.284423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.213127136230469, + "rewards/margins": 16.007492065429688, + "rewards/rejected": -22.220619201660156, + "step": 45290 + }, + { + "epoch": 2.7, + "learning_rate": 1.4952509709582673e-07, + "logits/chosen": -2.5451457500457764, + "logits/rejected": -1.8441162109375, + "logps/chosen": -675.3445434570312, + "logps/rejected": -2175.759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.064024925231934, + "rewards/margins": 15.254000663757324, + "rewards/rejected": -21.318029403686523, + "step": 45300 + }, + { + "epoch": 2.7, + "learning_rate": 1.4893477891190666e-07, + "logits/chosen": -2.587584972381592, + "logits/rejected": -1.7383091449737549, + "logps/chosen": -679.2412719726562, + "logps/rejected": -2170.779541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.091210842132568, + "rewards/margins": 15.180826187133789, + "rewards/rejected": -21.272035598754883, + "step": 45310 + }, + { + "epoch": 2.7, + "learning_rate": 1.483455925307542e-07, + "logits/chosen": -2.537116527557373, + "logits/rejected": -1.9081318378448486, + "logps/chosen": -666.8443603515625, + "logps/rejected": -2122.99365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.017245292663574, + "rewards/margins": 14.77186393737793, + "rewards/rejected": -20.789108276367188, + "step": 45320 + }, + { + "epoch": 2.7, + "learning_rate": 1.4775753823600359e-07, + "logits/chosen": -2.5034146308898926, + "logits/rejected": -1.9013805389404297, + "logps/chosen": -702.5718994140625, + "logps/rejected": -2047.6724853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.309025764465332, + "rewards/margins": 13.730146408081055, + "rewards/rejected": -20.039173126220703, + "step": 45330 + }, + { + "epoch": 2.7, + "learning_rate": 1.471706163107439e-07, + "logits/chosen": -2.5725767612457275, + "logits/rejected": -1.882869005203247, + "logps/chosen": -680.0263671875, + "logps/rejected": -2140.4384765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1176557540893555, + "rewards/margins": 14.856199264526367, + "rewards/rejected": -20.973857879638672, + "step": 45340 + }, + { + "epoch": 2.7, + "learning_rate": 1.4658482703751948e-07, + "logits/chosen": -2.5069825649261475, + "logits/rejected": -1.833905577659607, + "logps/chosen": -667.067626953125, + "logps/rejected": -2147.11279296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.964405059814453, + "rewards/margins": 15.069767951965332, + "rewards/rejected": -21.03417205810547, + "step": 45350 + }, + { + "epoch": 2.7, + "learning_rate": 1.460001706983294e-07, + "logits/chosen": -2.543168544769287, + "logits/rejected": -1.8575029373168945, + "logps/chosen": -666.6058349609375, + "logps/rejected": -2007.212890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.012343406677246, + "rewards/margins": 13.635416030883789, + "rewards/rejected": -19.647756576538086, + "step": 45360 + }, + { + "epoch": 2.71, + "learning_rate": 1.4541664757462715e-07, + "logits/chosen": -2.5365071296691895, + "logits/rejected": -1.8847548961639404, + "logps/chosen": -674.6585693359375, + "logps/rejected": -2195.968017578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.076910495758057, + "rewards/margins": 15.441635131835938, + "rewards/rejected": -21.518543243408203, + "step": 45370 + }, + { + "epoch": 2.71, + "learning_rate": 1.4483425794732082e-07, + "logits/chosen": -2.550748109817505, + "logits/rejected": -1.8923794031143188, + "logps/chosen": -679.0648193359375, + "logps/rejected": -2230.04833984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.046305179595947, + "rewards/margins": 15.814414978027344, + "rewards/rejected": -21.860721588134766, + "step": 45380 + }, + { + "epoch": 2.71, + "learning_rate": 1.442530020967725e-07, + "logits/chosen": -2.534752368927002, + "logits/rejected": -1.7965103387832642, + "logps/chosen": -657.3313598632812, + "logps/rejected": -2021.3984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.856756687164307, + "rewards/margins": 13.915254592895508, + "rewards/rejected": -19.772008895874023, + "step": 45390 + }, + { + "epoch": 2.71, + "learning_rate": 1.4367288030279896e-07, + "logits/chosen": -2.531407356262207, + "logits/rejected": -1.748626470565796, + "logps/chosen": -663.9037475585938, + "logps/rejected": -2133.7353515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.960546970367432, + "rewards/margins": 14.943777084350586, + "rewards/rejected": -20.90432357788086, + "step": 45400 + }, + { + "epoch": 2.71, + "learning_rate": 1.4309389284467095e-07, + "logits/chosen": -2.547577381134033, + "logits/rejected": -1.874463677406311, + "logps/chosen": -660.5790405273438, + "logps/rejected": -2127.25048828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.936125755310059, + "rewards/margins": 14.89924144744873, + "rewards/rejected": -20.835369110107422, + "step": 45410 + }, + { + "epoch": 2.71, + "learning_rate": 1.4251604000111275e-07, + "logits/chosen": -2.527170419692993, + "logits/rejected": -1.9672565460205078, + "logps/chosen": -682.4559326171875, + "logps/rejected": -2187.46142578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.115174293518066, + "rewards/margins": 15.336337089538574, + "rewards/rejected": -21.45151138305664, + "step": 45420 + }, + { + "epoch": 2.71, + "learning_rate": 1.4193932205030318e-07, + "logits/chosen": -2.55413818359375, + "logits/rejected": -1.8452472686767578, + "logps/chosen": -678.1260375976562, + "logps/rejected": -2110.28271484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.072638511657715, + "rewards/margins": 14.579548835754395, + "rewards/rejected": -20.652185440063477, + "step": 45430 + }, + { + "epoch": 2.71, + "learning_rate": 1.413637392698733e-07, + "logits/chosen": -2.507350206375122, + "logits/rejected": -1.7791202068328857, + "logps/chosen": -683.46533203125, + "logps/rejected": -2092.18505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.11217737197876, + "rewards/margins": 14.380165100097656, + "rewards/rejected": -20.492341995239258, + "step": 45440 + }, + { + "epoch": 2.71, + "learning_rate": 1.4078929193691e-07, + "logits/chosen": -2.4859323501586914, + "logits/rejected": -1.782536268234253, + "logps/chosen": -693.6903076171875, + "logps/rejected": -2124.379638671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.247523307800293, + "rewards/margins": 14.556551933288574, + "rewards/rejected": -20.804075241088867, + "step": 45450 + }, + { + "epoch": 2.71, + "learning_rate": 1.402159803279507e-07, + "logits/chosen": -2.566918134689331, + "logits/rejected": -1.7921994924545288, + "logps/chosen": -692.4954833984375, + "logps/rejected": -2175.30517578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.245924949645996, + "rewards/margins": 15.070116996765137, + "rewards/rejected": -21.3160400390625, + "step": 45460 + }, + { + "epoch": 2.71, + "learning_rate": 1.3964380471898907e-07, + "logits/chosen": -2.525813579559326, + "logits/rejected": -1.8919849395751953, + "logps/chosen": -661.8478393554688, + "logps/rejected": -2140.285400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.903847694396973, + "rewards/margins": 15.065702438354492, + "rewards/rejected": -20.96955108642578, + "step": 45470 + }, + { + "epoch": 2.71, + "learning_rate": 1.3907276538546898e-07, + "logits/chosen": -2.506251335144043, + "logits/rejected": -1.729026198387146, + "logps/chosen": -661.2122192382812, + "logps/rejected": -2069.43505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.910597801208496, + "rewards/margins": 14.358332633972168, + "rewards/rejected": -20.268930435180664, + "step": 45480 + }, + { + "epoch": 2.71, + "learning_rate": 1.3850286260229022e-07, + "logits/chosen": -2.520857095718384, + "logits/rejected": -1.8185861110687256, + "logps/chosen": -668.2059326171875, + "logps/rejected": -2155.04052734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.008720397949219, + "rewards/margins": 15.106264114379883, + "rewards/rejected": -21.114986419677734, + "step": 45490 + }, + { + "epoch": 2.71, + "learning_rate": 1.379340966438028e-07, + "logits/chosen": -2.576631784439087, + "logits/rejected": -1.8797212839126587, + "logps/chosen": -695.5106201171875, + "logps/rejected": -2192.8056640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.230842113494873, + "rewards/margins": 15.2573823928833, + "rewards/rejected": -21.488224029541016, + "step": 45500 + }, + { + "epoch": 2.71, + "learning_rate": 1.3736646778381159e-07, + "logits/chosen": -2.504257917404175, + "logits/rejected": -1.8754537105560303, + "logps/chosen": -696.7337036132812, + "logps/rejected": -2180.570068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2515764236450195, + "rewards/margins": 15.111581802368164, + "rewards/rejected": -21.363155364990234, + "step": 45510 + }, + { + "epoch": 2.71, + "learning_rate": 1.367999762955724e-07, + "logits/chosen": -2.5590431690216064, + "logits/rejected": -1.7995601892471313, + "logps/chosen": -678.6055908203125, + "logps/rejected": -2230.35546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.023952960968018, + "rewards/margins": 15.83863353729248, + "rewards/rejected": -21.86258888244629, + "step": 45520 + }, + { + "epoch": 2.71, + "learning_rate": 1.3623462245179465e-07, + "logits/chosen": -2.5049099922180176, + "logits/rejected": -1.8982166051864624, + "logps/chosen": -707.3319091796875, + "logps/rejected": -2181.81982421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.384312629699707, + "rewards/margins": 14.988703727722168, + "rewards/rejected": -21.373016357421875, + "step": 45530 + }, + { + "epoch": 2.72, + "learning_rate": 1.3567040652463946e-07, + "logits/chosen": -2.528928756713867, + "logits/rejected": -1.8385883569717407, + "logps/chosen": -681.6852416992188, + "logps/rejected": -2152.29638671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.095150470733643, + "rewards/margins": 14.986471176147461, + "rewards/rejected": -21.081623077392578, + "step": 45540 + }, + { + "epoch": 2.72, + "learning_rate": 1.351073287857202e-07, + "logits/chosen": -2.5461277961730957, + "logits/rejected": -1.7877569198608398, + "logps/chosen": -658.3231201171875, + "logps/rejected": -2092.102783203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.857592582702637, + "rewards/margins": 14.617724418640137, + "rewards/rejected": -20.475318908691406, + "step": 45550 + }, + { + "epoch": 2.72, + "learning_rate": 1.3454538950610274e-07, + "logits/chosen": -2.559471607208252, + "logits/rejected": -1.8801647424697876, + "logps/chosen": -675.89599609375, + "logps/rejected": -2179.3837890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0343017578125, + "rewards/margins": 15.316554069519043, + "rewards/rejected": -21.35085678100586, + "step": 45560 + }, + { + "epoch": 2.72, + "learning_rate": 1.339845889563049e-07, + "logits/chosen": -2.511197566986084, + "logits/rejected": -1.7583669424057007, + "logps/chosen": -713.2626342773438, + "logps/rejected": -2094.82373046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.464140892028809, + "rewards/margins": 14.048952102661133, + "rewards/rejected": -20.513092041015625, + "step": 45570 + }, + { + "epoch": 2.72, + "learning_rate": 1.334249274062954e-07, + "logits/chosen": -2.554334878921509, + "logits/rejected": -1.9108489751815796, + "logps/chosen": -664.8360595703125, + "logps/rejected": -2200.134033203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.93384313583374, + "rewards/margins": 15.626626968383789, + "rewards/rejected": -21.560468673706055, + "step": 45580 + }, + { + "epoch": 2.72, + "learning_rate": 1.3286640512549604e-07, + "logits/chosen": -2.551002025604248, + "logits/rejected": -1.9067704677581787, + "logps/chosen": -653.1719360351562, + "logps/rejected": -2174.933349609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.82017183303833, + "rewards/margins": 15.49146842956543, + "rewards/rejected": -21.3116397857666, + "step": 45590 + }, + { + "epoch": 2.72, + "learning_rate": 1.3230902238277887e-07, + "logits/chosen": -2.557708263397217, + "logits/rejected": -1.9606717824935913, + "logps/chosen": -687.6436157226562, + "logps/rejected": -2074.789794921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.18765926361084, + "rewards/margins": 14.12434196472168, + "rewards/rejected": -20.312002182006836, + "step": 45600 + }, + { + "epoch": 2.72, + "learning_rate": 1.317527794464682e-07, + "logits/chosen": -2.570535182952881, + "logits/rejected": -1.8812892436981201, + "logps/chosen": -666.1134033203125, + "logps/rejected": -2152.520751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.015326976776123, + "rewards/margins": 15.072423934936523, + "rewards/rejected": -21.087749481201172, + "step": 45610 + }, + { + "epoch": 2.72, + "learning_rate": 1.3119767658433923e-07, + "logits/chosen": -2.565058708190918, + "logits/rejected": -1.9128179550170898, + "logps/chosen": -673.4400634765625, + "logps/rejected": -2173.495849609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.066021919250488, + "rewards/margins": 15.213910102844238, + "rewards/rejected": -21.279932022094727, + "step": 45620 + }, + { + "epoch": 2.72, + "learning_rate": 1.3064371406361854e-07, + "logits/chosen": -2.4987008571624756, + "logits/rejected": -1.895310401916504, + "logps/chosen": -711.4444580078125, + "logps/rejected": -2152.3359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.406195163726807, + "rewards/margins": 14.673063278198242, + "rewards/rejected": -21.079259872436523, + "step": 45630 + }, + { + "epoch": 2.72, + "learning_rate": 1.3009089215098357e-07, + "logits/chosen": -2.540306568145752, + "logits/rejected": -1.8722093105316162, + "logps/chosen": -681.3643798828125, + "logps/rejected": -2157.364990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.128631114959717, + "rewards/margins": 15.00377082824707, + "rewards/rejected": -21.132402420043945, + "step": 45640 + }, + { + "epoch": 2.72, + "learning_rate": 1.295392111125632e-07, + "logits/chosen": -2.5288681983947754, + "logits/rejected": -1.8512725830078125, + "logps/chosen": -679.7206420898438, + "logps/rejected": -2066.70068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.043421745300293, + "rewards/margins": 14.183753967285156, + "rewards/rejected": -20.227176666259766, + "step": 45650 + }, + { + "epoch": 2.72, + "learning_rate": 1.2898867121393627e-07, + "logits/chosen": -2.54510498046875, + "logits/rejected": -1.798043966293335, + "logps/chosen": -677.7357177734375, + "logps/rejected": -2098.1669921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.073541164398193, + "rewards/margins": 14.462808609008789, + "rewards/rejected": -20.53635025024414, + "step": 45660 + }, + { + "epoch": 2.72, + "learning_rate": 1.2843927272013252e-07, + "logits/chosen": -2.554713726043701, + "logits/rejected": -1.8218265771865845, + "logps/chosen": -691.959228515625, + "logps/rejected": -2180.965576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.166695594787598, + "rewards/margins": 15.206682205200195, + "rewards/rejected": -21.37337303161621, + "step": 45670 + }, + { + "epoch": 2.72, + "learning_rate": 1.2789101589563286e-07, + "logits/chosen": -2.5559325218200684, + "logits/rejected": -1.8569297790527344, + "logps/chosen": -669.7140502929688, + "logps/rejected": -2267.120361328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.998517036437988, + "rewards/margins": 16.223360061645508, + "rewards/rejected": -22.221874237060547, + "step": 45680 + }, + { + "epoch": 2.72, + "learning_rate": 1.273439010043681e-07, + "logits/chosen": -2.557796001434326, + "logits/rejected": -1.9572169780731201, + "logps/chosen": -669.4630737304688, + "logps/rejected": -2134.52001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.877072334289551, + "rewards/margins": 15.023033142089844, + "rewards/rejected": -20.900104522705078, + "step": 45690 + }, + { + "epoch": 2.73, + "learning_rate": 1.2679792830971926e-07, + "logits/chosen": -2.524294137954712, + "logits/rejected": -1.8433971405029297, + "logps/chosen": -675.5213623046875, + "logps/rejected": -2134.8720703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.010740756988525, + "rewards/margins": 14.888422966003418, + "rewards/rejected": -20.8991641998291, + "step": 45700 + }, + { + "epoch": 2.73, + "learning_rate": 1.2625309807451664e-07, + "logits/chosen": -2.544485569000244, + "logits/rejected": -1.8624296188354492, + "logps/chosen": -674.6934814453125, + "logps/rejected": -2069.0224609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.011430263519287, + "rewards/margins": 14.232333183288574, + "rewards/rejected": -20.243762969970703, + "step": 45710 + }, + { + "epoch": 2.73, + "learning_rate": 1.2570941056104348e-07, + "logits/chosen": -2.547337055206299, + "logits/rejected": -1.9822756052017212, + "logps/chosen": -697.7584228515625, + "logps/rejected": -2088.097412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.319452285766602, + "rewards/margins": 14.118583679199219, + "rewards/rejected": -20.43803596496582, + "step": 45720 + }, + { + "epoch": 2.73, + "learning_rate": 1.2516686603102874e-07, + "logits/chosen": -2.5239405632019043, + "logits/rejected": -1.8349647521972656, + "logps/chosen": -685.179931640625, + "logps/rejected": -2099.727783203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.16346549987793, + "rewards/margins": 14.381217956542969, + "rewards/rejected": -20.5446834564209, + "step": 45730 + }, + { + "epoch": 2.73, + "learning_rate": 1.2462546474565518e-07, + "logits/chosen": -2.55082368850708, + "logits/rejected": -1.8885371685028076, + "logps/chosen": -676.4364013671875, + "logps/rejected": -2117.314697265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.041884899139404, + "rewards/margins": 14.695314407348633, + "rewards/rejected": -20.737199783325195, + "step": 45740 + }, + { + "epoch": 2.73, + "learning_rate": 1.2408520696555183e-07, + "logits/chosen": -2.5167415142059326, + "logits/rejected": -1.8560771942138672, + "logps/chosen": -651.1461181640625, + "logps/rejected": -2122.531005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.859251499176025, + "rewards/margins": 14.923181533813477, + "rewards/rejected": -20.782434463500977, + "step": 45750 + }, + { + "epoch": 2.73, + "learning_rate": 1.2354609295080034e-07, + "logits/chosen": -2.563802719116211, + "logits/rejected": -1.901980996131897, + "logps/chosen": -680.4904174804688, + "logps/rejected": -2175.44482421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.098872184753418, + "rewards/margins": 15.2102689743042, + "rewards/rejected": -21.309141159057617, + "step": 45760 + }, + { + "epoch": 2.73, + "learning_rate": 1.2300812296092868e-07, + "logits/chosen": -2.5008957386016846, + "logits/rejected": -1.8531396389007568, + "logps/chosen": -674.255615234375, + "logps/rejected": -2147.33642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.031984806060791, + "rewards/margins": 14.989099502563477, + "rewards/rejected": -21.02108383178711, + "step": 45770 + }, + { + "epoch": 2.73, + "learning_rate": 1.224712972549172e-07, + "logits/chosen": -2.546780586242676, + "logits/rejected": -1.7241065502166748, + "logps/chosen": -666.2569580078125, + "logps/rejected": -2101.99267578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.98992395401001, + "rewards/margins": 14.577792167663574, + "rewards/rejected": -20.567716598510742, + "step": 45780 + }, + { + "epoch": 2.73, + "learning_rate": 1.219356160911922e-07, + "logits/chosen": -2.5098423957824707, + "logits/rejected": -1.811449646949768, + "logps/chosen": -657.6712036132812, + "logps/rejected": -2124.26025390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.856420516967773, + "rewards/margins": 14.936243057250977, + "rewards/rejected": -20.792661666870117, + "step": 45790 + }, + { + "epoch": 2.73, + "learning_rate": 1.214010797276316e-07, + "logits/chosen": -2.5811142921447754, + "logits/rejected": -1.9120826721191406, + "logps/chosen": -685.0118408203125, + "logps/rejected": -2134.103759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.128328800201416, + "rewards/margins": 14.769253730773926, + "rewards/rejected": -20.897581100463867, + "step": 45800 + }, + { + "epoch": 2.73, + "learning_rate": 1.2086768842156065e-07, + "logits/chosen": -2.495020627975464, + "logits/rejected": -1.8250277042388916, + "logps/chosen": -661.9515380859375, + "logps/rejected": -2148.2880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.935786724090576, + "rewards/margins": 15.102787971496582, + "rewards/rejected": -21.03857421875, + "step": 45810 + }, + { + "epoch": 2.73, + "learning_rate": 1.2033544242975454e-07, + "logits/chosen": -2.5264735221862793, + "logits/rejected": -1.905159592628479, + "logps/chosen": -677.9827270507812, + "logps/rejected": -2160.37255859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.105383396148682, + "rewards/margins": 15.054422378540039, + "rewards/rejected": -21.159807205200195, + "step": 45820 + }, + { + "epoch": 2.73, + "learning_rate": 1.1980434200843576e-07, + "logits/chosen": -2.5258030891418457, + "logits/rejected": -1.883943796157837, + "logps/chosen": -690.3702392578125, + "logps/rejected": -2143.713134765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.151909828186035, + "rewards/margins": 14.842738151550293, + "rewards/rejected": -20.994647979736328, + "step": 45830 + }, + { + "epoch": 2.73, + "learning_rate": 1.1927438741327652e-07, + "logits/chosen": -2.5797600746154785, + "logits/rejected": -1.8321069478988647, + "logps/chosen": -676.2864379882812, + "logps/rejected": -2242.115234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.04899263381958, + "rewards/margins": 15.928889274597168, + "rewards/rejected": -21.977880477905273, + "step": 45840 + }, + { + "epoch": 2.73, + "learning_rate": 1.1874557889939659e-07, + "logits/chosen": -2.5550551414489746, + "logits/rejected": -1.7928237915039062, + "logps/chosen": -681.7210693359375, + "logps/rejected": -2056.491943359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.142353057861328, + "rewards/margins": 13.97247314453125, + "rewards/rejected": -20.114824295043945, + "step": 45850 + }, + { + "epoch": 2.73, + "learning_rate": 1.1821791672136484e-07, + "logits/chosen": -2.518369674682617, + "logits/rejected": -1.8469091653823853, + "logps/chosen": -668.6463623046875, + "logps/rejected": -2134.545166015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.954498291015625, + "rewards/margins": 14.944523811340332, + "rewards/rejected": -20.89902114868164, + "step": 45860 + }, + { + "epoch": 2.74, + "learning_rate": 1.1769140113319755e-07, + "logits/chosen": -2.5282981395721436, + "logits/rejected": -1.8576526641845703, + "logps/chosen": -693.5540771484375, + "logps/rejected": -2195.53271484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.269179344177246, + "rewards/margins": 15.24403190612793, + "rewards/rejected": -21.513208389282227, + "step": 45870 + }, + { + "epoch": 2.74, + "learning_rate": 1.1716603238835945e-07, + "logits/chosen": -2.583599805831909, + "logits/rejected": -1.8810409307479858, + "logps/chosen": -674.4564208984375, + "logps/rejected": -2122.158203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.965516090393066, + "rewards/margins": 14.803683280944824, + "rewards/rejected": -20.76919937133789, + "step": 45880 + }, + { + "epoch": 2.74, + "learning_rate": 1.1664181073976294e-07, + "logits/chosen": -2.545442819595337, + "logits/rejected": -1.7679446935653687, + "logps/chosen": -703.2205810546875, + "logps/rejected": -2061.20947265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.357237339019775, + "rewards/margins": 13.821775436401367, + "rewards/rejected": -20.179012298583984, + "step": 45890 + }, + { + "epoch": 2.74, + "learning_rate": 1.1611873643976839e-07, + "logits/chosen": -2.5358479022979736, + "logits/rejected": -1.85427725315094, + "logps/chosen": -701.9027709960938, + "logps/rejected": -2151.246826171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3024797439575195, + "rewards/margins": 14.764982223510742, + "rewards/rejected": -21.067462921142578, + "step": 45900 + }, + { + "epoch": 2.74, + "learning_rate": 1.1559680974018356e-07, + "logits/chosen": -2.5075223445892334, + "logits/rejected": -1.8244116306304932, + "logps/chosen": -687.5906982421875, + "logps/rejected": -2186.69091796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.182575702667236, + "rewards/margins": 15.2478609085083, + "rewards/rejected": -21.430435180664062, + "step": 45910 + }, + { + "epoch": 2.74, + "learning_rate": 1.1507603089226438e-07, + "logits/chosen": -2.533510684967041, + "logits/rejected": -1.893072485923767, + "logps/chosen": -692.1107177734375, + "logps/rejected": -2156.99609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.19066047668457, + "rewards/margins": 14.93596363067627, + "rewards/rejected": -21.126623153686523, + "step": 45920 + }, + { + "epoch": 2.74, + "learning_rate": 1.145564001467131e-07, + "logits/chosen": -2.563894748687744, + "logits/rejected": -1.8644949197769165, + "logps/chosen": -662.2108764648438, + "logps/rejected": -2083.41748046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.926508903503418, + "rewards/margins": 14.478528022766113, + "rewards/rejected": -20.40503692626953, + "step": 45930 + }, + { + "epoch": 2.74, + "learning_rate": 1.1403791775368073e-07, + "logits/chosen": -2.55729603767395, + "logits/rejected": -1.8916791677474976, + "logps/chosen": -667.9281005859375, + "logps/rejected": -2073.909423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.037264823913574, + "rewards/margins": 14.266035079956055, + "rewards/rejected": -20.303298950195312, + "step": 45940 + }, + { + "epoch": 2.74, + "learning_rate": 1.1352058396276427e-07, + "logits/chosen": -2.5699708461761475, + "logits/rejected": -1.9315402507781982, + "logps/chosen": -665.7369995117188, + "logps/rejected": -2150.955322265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.92907190322876, + "rewards/margins": 15.137458801269531, + "rewards/rejected": -21.066532135009766, + "step": 45950 + }, + { + "epoch": 2.74, + "learning_rate": 1.1300439902300814e-07, + "logits/chosen": -2.5448038578033447, + "logits/rejected": -1.9523935317993164, + "logps/chosen": -671.6265869140625, + "logps/rejected": -2098.17919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0067362785339355, + "rewards/margins": 14.527804374694824, + "rewards/rejected": -20.534542083740234, + "step": 45960 + }, + { + "epoch": 2.74, + "learning_rate": 1.1248936318290438e-07, + "logits/chosen": -2.5237479209899902, + "logits/rejected": -1.9149738550186157, + "logps/chosen": -701.9244384765625, + "logps/rejected": -1990.5999755859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.311623573303223, + "rewards/margins": 13.159440994262695, + "rewards/rejected": -19.471065521240234, + "step": 45970 + }, + { + "epoch": 2.74, + "learning_rate": 1.1197547669038994e-07, + "logits/chosen": -2.511915922164917, + "logits/rejected": -1.9038501977920532, + "logps/chosen": -674.4967651367188, + "logps/rejected": -2136.378662109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.050522327423096, + "rewards/margins": 14.87285041809082, + "rewards/rejected": -20.92337417602539, + "step": 45980 + }, + { + "epoch": 2.74, + "learning_rate": 1.1146273979285138e-07, + "logits/chosen": -2.5356011390686035, + "logits/rejected": -1.9078214168548584, + "logps/chosen": -679.8086547851562, + "logps/rejected": -2134.34375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.118990898132324, + "rewards/margins": 14.78045654296875, + "rewards/rejected": -20.899444580078125, + "step": 45990 + }, + { + "epoch": 2.74, + "learning_rate": 1.1095115273711876e-07, + "logits/chosen": -2.5452632904052734, + "logits/rejected": -1.8943045139312744, + "logps/chosen": -653.7567138671875, + "logps/rejected": -2127.59033203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.817490577697754, + "rewards/margins": 15.0220947265625, + "rewards/rejected": -20.839584350585938, + "step": 46000 + }, + { + "epoch": 2.74, + "learning_rate": 1.1044071576947119e-07, + "logits/chosen": -2.5290894508361816, + "logits/rejected": -1.7967426776885986, + "logps/chosen": -647.9073486328125, + "logps/rejected": -2103.31103515625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.835109710693359, + "rewards/margins": 14.748346328735352, + "rewards/rejected": -20.58345603942871, + "step": 46010 + }, + { + "epoch": 2.74, + "learning_rate": 1.0993142913563209e-07, + "logits/chosen": -2.5616250038146973, + "logits/rejected": -1.8605238199234009, + "logps/chosen": -657.0343017578125, + "logps/rejected": -2125.7939453125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.859172344207764, + "rewards/margins": 14.956802368164062, + "rewards/rejected": -20.81597328186035, + "step": 46020 + }, + { + "epoch": 2.74, + "learning_rate": 1.0942329308077316e-07, + "logits/chosen": -2.531215190887451, + "logits/rejected": -1.8350274562835693, + "logps/chosen": -677.1444091796875, + "logps/rejected": -2172.771240234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.04797887802124, + "rewards/margins": 15.223983764648438, + "rewards/rejected": -21.271963119506836, + "step": 46030 + }, + { + "epoch": 2.75, + "learning_rate": 1.089163078495098e-07, + "logits/chosen": -2.546416759490967, + "logits/rejected": -1.9327739477157593, + "logps/chosen": -689.015380859375, + "logps/rejected": -2119.75390625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.179478645324707, + "rewards/margins": 14.568280220031738, + "rewards/rejected": -20.747758865356445, + "step": 46040 + }, + { + "epoch": 2.75, + "learning_rate": 1.0841047368590596e-07, + "logits/chosen": -2.5390396118164062, + "logits/rejected": -1.8590484857559204, + "logps/chosen": -680.982421875, + "logps/rejected": -2133.22998046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.08862829208374, + "rewards/margins": 14.811647415161133, + "rewards/rejected": -20.90027618408203, + "step": 46050 + }, + { + "epoch": 2.75, + "learning_rate": 1.0790579083346936e-07, + "logits/chosen": -2.4850802421569824, + "logits/rejected": -1.7560558319091797, + "logps/chosen": -680.3077392578125, + "logps/rejected": -2110.39501953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.125319957733154, + "rewards/margins": 14.537999153137207, + "rewards/rejected": -20.663318634033203, + "step": 46060 + }, + { + "epoch": 2.75, + "learning_rate": 1.0740225953515454e-07, + "logits/chosen": -2.5283362865448, + "logits/rejected": -1.8203229904174805, + "logps/chosen": -682.6251831054688, + "logps/rejected": -2170.256591796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.140351295471191, + "rewards/margins": 15.122477531433105, + "rewards/rejected": -21.262828826904297, + "step": 46070 + }, + { + "epoch": 2.75, + "learning_rate": 1.0689988003336121e-07, + "logits/chosen": -2.5463321208953857, + "logits/rejected": -1.866412878036499, + "logps/chosen": -694.1663208007812, + "logps/rejected": -2109.262451171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.229989051818848, + "rewards/margins": 14.42736530303955, + "rewards/rejected": -20.657352447509766, + "step": 46080 + }, + { + "epoch": 2.75, + "learning_rate": 1.0639865256993536e-07, + "logits/chosen": -2.542966365814209, + "logits/rejected": -1.8619747161865234, + "logps/chosen": -672.7552490234375, + "logps/rejected": -2164.949951171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9879584312438965, + "rewards/margins": 15.216073989868164, + "rewards/rejected": -21.204030990600586, + "step": 46090 + }, + { + "epoch": 2.75, + "learning_rate": 1.058985773861676e-07, + "logits/chosen": -2.5394973754882812, + "logits/rejected": -1.9079868793487549, + "logps/chosen": -698.1444091796875, + "logps/rejected": -2164.40283203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.291393756866455, + "rewards/margins": 14.914507865905762, + "rewards/rejected": -21.205904006958008, + "step": 46100 + }, + { + "epoch": 2.75, + "learning_rate": 1.0539965472279424e-07, + "logits/chosen": -2.503844738006592, + "logits/rejected": -1.7831947803497314, + "logps/chosen": -684.8026733398438, + "logps/rejected": -2063.3427734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.179881572723389, + "rewards/margins": 14.0205717086792, + "rewards/rejected": -20.20045280456543, + "step": 46110 + }, + { + "epoch": 2.75, + "learning_rate": 1.0490188481999647e-07, + "logits/chosen": -2.525968074798584, + "logits/rejected": -1.7350683212280273, + "logps/chosen": -669.5885009765625, + "logps/rejected": -2220.52197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.942196846008301, + "rewards/margins": 15.818440437316895, + "rewards/rejected": -21.760637283325195, + "step": 46120 + }, + { + "epoch": 2.75, + "learning_rate": 1.0440526791740097e-07, + "logits/chosen": -2.5191538333892822, + "logits/rejected": -1.8971912860870361, + "logps/chosen": -691.1187744140625, + "logps/rejected": -2230.63818359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.224823951721191, + "rewards/margins": 15.633378982543945, + "rewards/rejected": -21.85820198059082, + "step": 46130 + }, + { + "epoch": 2.75, + "learning_rate": 1.039098042540787e-07, + "logits/chosen": -2.522630453109741, + "logits/rejected": -1.8442668914794922, + "logps/chosen": -654.2730102539062, + "logps/rejected": -2073.60498046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.877256393432617, + "rewards/margins": 14.417889595031738, + "rewards/rejected": -20.295146942138672, + "step": 46140 + }, + { + "epoch": 2.75, + "learning_rate": 1.0341549406854612e-07, + "logits/chosen": -2.5602831840515137, + "logits/rejected": -1.9445908069610596, + "logps/chosen": -697.8738403320312, + "logps/rejected": -2150.8544921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.254788398742676, + "rewards/margins": 14.82255744934082, + "rewards/rejected": -21.07734489440918, + "step": 46150 + }, + { + "epoch": 2.75, + "learning_rate": 1.0292233759876425e-07, + "logits/chosen": -2.5239977836608887, + "logits/rejected": -1.871771216392517, + "logps/chosen": -684.87646484375, + "logps/rejected": -2153.96044921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.086186408996582, + "rewards/margins": 14.999334335327148, + "rewards/rejected": -21.08552360534668, + "step": 46160 + }, + { + "epoch": 2.75, + "learning_rate": 1.0243033508213873e-07, + "logits/chosen": -2.5539422035217285, + "logits/rejected": -1.868587851524353, + "logps/chosen": -682.8366088867188, + "logps/rejected": -2132.7734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.14653205871582, + "rewards/margins": 14.734281539916992, + "rewards/rejected": -20.880815505981445, + "step": 46170 + }, + { + "epoch": 2.75, + "learning_rate": 1.0193948675551902e-07, + "logits/chosen": -2.544170379638672, + "logits/rejected": -1.845603585243225, + "logps/chosen": -673.6629638671875, + "logps/rejected": -2160.903076171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.041426181793213, + "rewards/margins": 15.123730659484863, + "rewards/rejected": -21.165157318115234, + "step": 46180 + }, + { + "epoch": 2.75, + "learning_rate": 1.0144979285519996e-07, + "logits/chosen": -2.5027976036071777, + "logits/rejected": -1.8324562311172485, + "logps/chosen": -684.7396850585938, + "logps/rejected": -2061.104736328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.144601821899414, + "rewards/margins": 14.0307035446167, + "rewards/rejected": -20.175304412841797, + "step": 46190 + }, + { + "epoch": 2.75, + "learning_rate": 1.0096125361691993e-07, + "logits/chosen": -2.562774181365967, + "logits/rejected": -1.8849499225616455, + "logps/chosen": -712.1607666015625, + "logps/rejected": -2150.169677734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.436521053314209, + "rewards/margins": 14.615649223327637, + "rewards/rejected": -21.05217170715332, + "step": 46200 + }, + { + "epoch": 2.76, + "learning_rate": 1.0047386927586167e-07, + "logits/chosen": -2.5781357288360596, + "logits/rejected": -1.886348009109497, + "logps/chosen": -676.5408935546875, + "logps/rejected": -2149.4541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.007774353027344, + "rewards/margins": 15.030789375305176, + "rewards/rejected": -21.038564682006836, + "step": 46210 + }, + { + "epoch": 2.76, + "learning_rate": 9.998764006665245e-08, + "logits/chosen": -2.5253520011901855, + "logits/rejected": -1.9259926080703735, + "logps/chosen": -663.3134155273438, + "logps/rejected": -2213.182861328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.888674736022949, + "rewards/margins": 15.805353164672852, + "rewards/rejected": -21.69403076171875, + "step": 46220 + }, + { + "epoch": 2.76, + "learning_rate": 9.950256622336258e-08, + "logits/chosen": -2.531167507171631, + "logits/rejected": -1.9063276052474976, + "logps/chosen": -674.6814575195312, + "logps/rejected": -2212.69189453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.056509971618652, + "rewards/margins": 15.63524055480957, + "rewards/rejected": -21.69175148010254, + "step": 46230 + }, + { + "epoch": 2.76, + "learning_rate": 9.901864797950689e-08, + "logits/chosen": -2.5256094932556152, + "logits/rejected": -1.9261829853057861, + "logps/chosen": -695.5675048828125, + "logps/rejected": -2057.934814453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.249433994293213, + "rewards/margins": 13.883193969726562, + "rewards/rejected": -20.132625579833984, + "step": 46240 + }, + { + "epoch": 2.76, + "learning_rate": 9.853588556804295e-08, + "logits/chosen": -2.5307726860046387, + "logits/rejected": -1.8320178985595703, + "logps/chosen": -673.6675415039062, + "logps/rejected": -2158.190673828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.044233798980713, + "rewards/margins": 15.101449966430664, + "rewards/rejected": -21.14568328857422, + "step": 46250 + }, + { + "epoch": 2.76, + "learning_rate": 9.805427922137373e-08, + "logits/chosen": -2.4998269081115723, + "logits/rejected": -1.808350920677185, + "logps/chosen": -691.962158203125, + "logps/rejected": -2114.875732421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.196290969848633, + "rewards/margins": 14.514485359191895, + "rewards/rejected": -20.71077537536621, + "step": 46260 + }, + { + "epoch": 2.76, + "learning_rate": 9.757382917134322e-08, + "logits/chosen": -2.530179500579834, + "logits/rejected": -1.899774193763733, + "logps/chosen": -695.9262084960938, + "logps/rejected": -2227.66455078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.241847515106201, + "rewards/margins": 15.580070495605469, + "rewards/rejected": -21.821918487548828, + "step": 46270 + }, + { + "epoch": 2.76, + "learning_rate": 9.709453564924143e-08, + "logits/chosen": -2.5052247047424316, + "logits/rejected": -1.8191239833831787, + "logps/chosen": -644.980224609375, + "logps/rejected": -2024.820068359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.766826629638672, + "rewards/margins": 14.045585632324219, + "rewards/rejected": -19.81241226196289, + "step": 46280 + }, + { + "epoch": 2.76, + "learning_rate": 9.661639888579877e-08, + "logits/chosen": -2.5932507514953613, + "logits/rejected": -1.9079902172088623, + "logps/chosen": -684.88818359375, + "logps/rejected": -2220.039794921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.045950889587402, + "rewards/margins": 15.702001571655273, + "rewards/rejected": -21.74795150756836, + "step": 46290 + }, + { + "epoch": 2.76, + "learning_rate": 9.613941911119168e-08, + "logits/chosen": -2.5857949256896973, + "logits/rejected": -1.8797643184661865, + "logps/chosen": -706.1104736328125, + "logps/rejected": -2090.85107421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.317006587982178, + "rewards/margins": 14.143465995788574, + "rewards/rejected": -20.460474014282227, + "step": 46300 + }, + { + "epoch": 2.76, + "learning_rate": 9.566359655503732e-08, + "logits/chosen": -2.5851781368255615, + "logits/rejected": -1.8519246578216553, + "logps/chosen": -651.7435302734375, + "logps/rejected": -2171.5205078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.807689666748047, + "rewards/margins": 15.449773788452148, + "rewards/rejected": -21.257465362548828, + "step": 46310 + }, + { + "epoch": 2.76, + "learning_rate": 9.51889314463969e-08, + "logits/chosen": -2.5752739906311035, + "logits/rejected": -1.8884693384170532, + "logps/chosen": -672.3513793945312, + "logps/rejected": -2108.660400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.967018127441406, + "rewards/margins": 14.676997184753418, + "rewards/rejected": -20.644014358520508, + "step": 46320 + }, + { + "epoch": 2.76, + "learning_rate": 9.471542401377404e-08, + "logits/chosen": -2.507636547088623, + "logits/rejected": -1.7843338251113892, + "logps/chosen": -665.5675659179688, + "logps/rejected": -2201.10791015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.919850826263428, + "rewards/margins": 15.642473220825195, + "rewards/rejected": -21.562320709228516, + "step": 46330 + }, + { + "epoch": 2.76, + "learning_rate": 9.424307448511555e-08, + "logits/chosen": -2.5762858390808105, + "logits/rejected": -1.812596321105957, + "logps/chosen": -692.2351684570312, + "logps/rejected": -2144.06591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.207406997680664, + "rewards/margins": 14.78520393371582, + "rewards/rejected": -20.992610931396484, + "step": 46340 + }, + { + "epoch": 2.76, + "learning_rate": 9.377188308781038e-08, + "logits/chosen": -2.5516982078552246, + "logits/rejected": -1.9249560832977295, + "logps/chosen": -691.03076171875, + "logps/rejected": -2186.92529296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.156723976135254, + "rewards/margins": 15.2672119140625, + "rewards/rejected": -21.423938751220703, + "step": 46350 + }, + { + "epoch": 2.76, + "learning_rate": 9.330185004869014e-08, + "logits/chosen": -2.4883522987365723, + "logits/rejected": -1.8024438619613647, + "logps/chosen": -695.0477905273438, + "logps/rejected": -2125.315673828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.244265556335449, + "rewards/margins": 14.57319450378418, + "rewards/rejected": -20.817459106445312, + "step": 46360 + }, + { + "epoch": 2.77, + "learning_rate": 9.283297559402827e-08, + "logits/chosen": -2.5339627265930176, + "logits/rejected": -1.789176344871521, + "logps/chosen": -677.370849609375, + "logps/rejected": -2150.068115234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.101293087005615, + "rewards/margins": 14.968005180358887, + "rewards/rejected": -21.069297790527344, + "step": 46370 + }, + { + "epoch": 2.77, + "learning_rate": 9.236525994954142e-08, + "logits/chosen": -2.587846279144287, + "logits/rejected": -1.918290138244629, + "logps/chosen": -687.1558227539062, + "logps/rejected": -2131.129638671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.165020942687988, + "rewards/margins": 14.683080673217773, + "rewards/rejected": -20.848100662231445, + "step": 46380 + }, + { + "epoch": 2.77, + "learning_rate": 9.189870334038787e-08, + "logits/chosen": -2.582157611846924, + "logits/rejected": -1.9009253978729248, + "logps/chosen": -663.9599609375, + "logps/rejected": -2225.927734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.918181419372559, + "rewards/margins": 15.90839672088623, + "rewards/rejected": -21.826580047607422, + "step": 46390 + }, + { + "epoch": 2.77, + "learning_rate": 9.143330599116762e-08, + "logits/chosen": -2.5285768508911133, + "logits/rejected": -1.8246240615844727, + "logps/chosen": -689.6626586914062, + "logps/rejected": -2166.63916015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.173225402832031, + "rewards/margins": 15.052038192749023, + "rewards/rejected": -21.225263595581055, + "step": 46400 + }, + { + "epoch": 2.77, + "learning_rate": 9.096906812592315e-08, + "logits/chosen": -2.5781171321868896, + "logits/rejected": -1.8816592693328857, + "logps/chosen": -668.2756958007812, + "logps/rejected": -2191.48291015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.938584804534912, + "rewards/margins": 15.538508415222168, + "rewards/rejected": -21.477094650268555, + "step": 46410 + }, + { + "epoch": 2.77, + "learning_rate": 9.050598996813876e-08, + "logits/chosen": -2.526416778564453, + "logits/rejected": -1.7909748554229736, + "logps/chosen": -698.7273559570312, + "logps/rejected": -2089.170654296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.278692722320557, + "rewards/margins": 14.177340507507324, + "rewards/rejected": -20.456029891967773, + "step": 46420 + }, + { + "epoch": 2.77, + "learning_rate": 9.00440717407397e-08, + "logits/chosen": -2.4701247215270996, + "logits/rejected": -1.733519196510315, + "logps/chosen": -681.833984375, + "logps/rejected": -2089.4296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.10954475402832, + "rewards/margins": 14.333473205566406, + "rewards/rejected": -20.443016052246094, + "step": 46430 + }, + { + "epoch": 2.77, + "learning_rate": 8.958331366609424e-08, + "logits/chosen": -2.579963207244873, + "logits/rejected": -1.877178430557251, + "logps/chosen": -704.9564208984375, + "logps/rejected": -2097.597412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.278354644775391, + "rewards/margins": 14.253393173217773, + "rewards/rejected": -20.531749725341797, + "step": 46440 + }, + { + "epoch": 2.77, + "learning_rate": 8.912371596601049e-08, + "logits/chosen": -2.5460236072540283, + "logits/rejected": -1.913169264793396, + "logps/chosen": -663.0614013671875, + "logps/rejected": -2148.23193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.891639709472656, + "rewards/margins": 15.1329984664917, + "rewards/rejected": -21.02463722229004, + "step": 46450 + }, + { + "epoch": 2.77, + "learning_rate": 8.866527886173926e-08, + "logits/chosen": -2.549203395843506, + "logits/rejected": -1.9309781789779663, + "logps/chosen": -687.0798950195312, + "logps/rejected": -2148.70751953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.177243709564209, + "rewards/margins": 14.86322021484375, + "rewards/rejected": -21.040464401245117, + "step": 46460 + }, + { + "epoch": 2.77, + "learning_rate": 8.820800257397205e-08, + "logits/chosen": -2.5465950965881348, + "logits/rejected": -1.8371044397354126, + "logps/chosen": -699.0991821289062, + "logps/rejected": -2043.3031005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.226876735687256, + "rewards/margins": 13.769844055175781, + "rewards/rejected": -19.996723175048828, + "step": 46470 + }, + { + "epoch": 2.77, + "learning_rate": 8.775188732284168e-08, + "logits/chosen": -2.5572400093078613, + "logits/rejected": -1.850885033607483, + "logps/chosen": -647.9083251953125, + "logps/rejected": -2147.587158203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.693978309631348, + "rewards/margins": 15.312365531921387, + "rewards/rejected": -21.0063419342041, + "step": 46480 + }, + { + "epoch": 2.77, + "learning_rate": 8.729693332792221e-08, + "logits/chosen": -2.525461196899414, + "logits/rejected": -1.8677085638046265, + "logps/chosen": -674.7902221679688, + "logps/rejected": -2158.50390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.031523704528809, + "rewards/margins": 15.100900650024414, + "rewards/rejected": -21.13242530822754, + "step": 46490 + }, + { + "epoch": 2.77, + "learning_rate": 8.684314080822764e-08, + "logits/chosen": -2.5573368072509766, + "logits/rejected": -1.9174625873565674, + "logps/chosen": -672.06005859375, + "logps/rejected": -2219.532958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.964483737945557, + "rewards/margins": 15.78648853302002, + "rewards/rejected": -21.750972747802734, + "step": 46500 + }, + { + "epoch": 2.77, + "learning_rate": 8.639050998221516e-08, + "logits/chosen": -2.506544828414917, + "logits/rejected": -1.8075144290924072, + "logps/chosen": -672.8405151367188, + "logps/rejected": -2192.583984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.980275630950928, + "rewards/margins": 15.500837326049805, + "rewards/rejected": -21.48111343383789, + "step": 46510 + }, + { + "epoch": 2.77, + "learning_rate": 8.593904106777962e-08, + "logits/chosen": -2.5336954593658447, + "logits/rejected": -1.8586807250976562, + "logps/chosen": -677.9577026367188, + "logps/rejected": -2135.92431640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.08742618560791, + "rewards/margins": 14.8314790725708, + "rewards/rejected": -20.91890525817871, + "step": 46520 + }, + { + "epoch": 2.77, + "learning_rate": 8.54887342822594e-08, + "logits/chosen": -2.53600811958313, + "logits/rejected": -1.8079860210418701, + "logps/chosen": -696.3847045898438, + "logps/rejected": -2053.21044921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.194714546203613, + "rewards/margins": 13.891328811645508, + "rewards/rejected": -20.086042404174805, + "step": 46530 + }, + { + "epoch": 2.78, + "learning_rate": 8.503958984243138e-08, + "logits/chosen": -2.5824413299560547, + "logits/rejected": -1.9555619955062866, + "logps/chosen": -652.0117797851562, + "logps/rejected": -2160.31201171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.851190090179443, + "rewards/margins": 15.293973922729492, + "rewards/rejected": -21.145160675048828, + "step": 46540 + }, + { + "epoch": 2.78, + "learning_rate": 8.459160796451455e-08, + "logits/chosen": -2.5607192516326904, + "logits/rejected": -1.9114387035369873, + "logps/chosen": -685.5805053710938, + "logps/rejected": -2074.46533203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.183506965637207, + "rewards/margins": 14.119583129882812, + "rewards/rejected": -20.303091049194336, + "step": 46550 + }, + { + "epoch": 2.78, + "learning_rate": 8.414478886416611e-08, + "logits/chosen": -2.5250136852264404, + "logits/rejected": -1.8978891372680664, + "logps/chosen": -677.9891357421875, + "logps/rejected": -2132.08984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.032467842102051, + "rewards/margins": 14.84497356414795, + "rewards/rejected": -20.877439498901367, + "step": 46560 + }, + { + "epoch": 2.78, + "learning_rate": 8.369913275648623e-08, + "logits/chosen": -2.5397074222564697, + "logits/rejected": -1.8134262561798096, + "logps/chosen": -694.9578247070312, + "logps/rejected": -2157.195556640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.194723129272461, + "rewards/margins": 14.934709548950195, + "rewards/rejected": -21.129430770874023, + "step": 46570 + }, + { + "epoch": 2.78, + "learning_rate": 8.325463985601273e-08, + "logits/chosen": -2.579470157623291, + "logits/rejected": -1.9139982461929321, + "logps/chosen": -679.1102294921875, + "logps/rejected": -2163.415771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.099813461303711, + "rewards/margins": 15.083953857421875, + "rewards/rejected": -21.183767318725586, + "step": 46580 + }, + { + "epoch": 2.78, + "learning_rate": 8.281131037672474e-08, + "logits/chosen": -2.5540995597839355, + "logits/rejected": -1.7233469486236572, + "logps/chosen": -688.307861328125, + "logps/rejected": -2111.421142578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.111412048339844, + "rewards/margins": 14.552650451660156, + "rewards/rejected": -20.664064407348633, + "step": 46590 + }, + { + "epoch": 2.78, + "learning_rate": 8.236914453204098e-08, + "logits/chosen": -2.540794849395752, + "logits/rejected": -1.861029863357544, + "logps/chosen": -681.1228637695312, + "logps/rejected": -2040.6724853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.043734073638916, + "rewards/margins": 13.918966293334961, + "rewards/rejected": -19.962697982788086, + "step": 46600 + }, + { + "epoch": 2.78, + "learning_rate": 8.192814253482034e-08, + "logits/chosen": -2.5225865840911865, + "logits/rejected": -1.9383366107940674, + "logps/chosen": -669.0055541992188, + "logps/rejected": -2213.9296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.004886150360107, + "rewards/margins": 15.689691543579102, + "rewards/rejected": -21.694576263427734, + "step": 46610 + }, + { + "epoch": 2.78, + "learning_rate": 8.148830459736106e-08, + "logits/chosen": -2.5222361087799072, + "logits/rejected": -1.8572800159454346, + "logps/chosen": -668.6769409179688, + "logps/rejected": -2184.40185546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.97661018371582, + "rewards/margins": 15.426119804382324, + "rewards/rejected": -21.402732849121094, + "step": 46620 + }, + { + "epoch": 2.78, + "learning_rate": 8.1049630931401e-08, + "logits/chosen": -2.5107977390289307, + "logits/rejected": -1.8544776439666748, + "logps/chosen": -674.0588989257812, + "logps/rejected": -2094.191650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0040178298950195, + "rewards/margins": 14.498089790344238, + "rewards/rejected": -20.50210952758789, + "step": 46630 + }, + { + "epoch": 2.78, + "learning_rate": 8.061212174811789e-08, + "logits/chosen": -2.5683114528656006, + "logits/rejected": -1.9157699346542358, + "logps/chosen": -676.6843872070312, + "logps/rejected": -2137.4296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.065963268280029, + "rewards/margins": 14.869255065917969, + "rewards/rejected": -20.935216903686523, + "step": 46640 + }, + { + "epoch": 2.78, + "learning_rate": 8.017577725812825e-08, + "logits/chosen": -2.5416836738586426, + "logits/rejected": -1.814659833908081, + "logps/chosen": -687.66162109375, + "logps/rejected": -2075.188720703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.091066837310791, + "rewards/margins": 14.222108840942383, + "rewards/rejected": -20.313175201416016, + "step": 46650 + }, + { + "epoch": 2.78, + "learning_rate": 7.974059767148907e-08, + "logits/chosen": -2.554205894470215, + "logits/rejected": -1.8452256917953491, + "logps/chosen": -659.8897094726562, + "logps/rejected": -2046.116455078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.858086585998535, + "rewards/margins": 14.171499252319336, + "rewards/rejected": -20.029586791992188, + "step": 46660 + }, + { + "epoch": 2.78, + "learning_rate": 7.930658319769525e-08, + "logits/chosen": -2.5728988647460938, + "logits/rejected": -1.9148527383804321, + "logps/chosen": -670.3735961914062, + "logps/rejected": -2099.92626953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.945324897766113, + "rewards/margins": 14.607223510742188, + "rewards/rejected": -20.552547454833984, + "step": 46670 + }, + { + "epoch": 2.78, + "learning_rate": 7.887373404568133e-08, + "logits/chosen": -2.541633129119873, + "logits/rejected": -1.8526344299316406, + "logps/chosen": -691.0956420898438, + "logps/rejected": -2131.501953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.145422458648682, + "rewards/margins": 14.727876663208008, + "rewards/rejected": -20.8732967376709, + "step": 46680 + }, + { + "epoch": 2.78, + "learning_rate": 7.844205042382064e-08, + "logits/chosen": -2.518928050994873, + "logits/rejected": -1.7622610330581665, + "logps/chosen": -706.8642578125, + "logps/rejected": -2186.861328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.310729503631592, + "rewards/margins": 15.122041702270508, + "rewards/rejected": -21.43277359008789, + "step": 46690 + }, + { + "epoch": 2.78, + "learning_rate": 7.801153253992638e-08, + "logits/chosen": -2.477043390274048, + "logits/rejected": -1.8457527160644531, + "logps/chosen": -678.7701416015625, + "logps/rejected": -2092.248291015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.151999473571777, + "rewards/margins": 14.319297790527344, + "rewards/rejected": -20.471294403076172, + "step": 46700 + }, + { + "epoch": 2.79, + "learning_rate": 7.758218060124916e-08, + "logits/chosen": -2.495208978652954, + "logits/rejected": -1.9116795063018799, + "logps/chosen": -660.5848388671875, + "logps/rejected": -2056.93212890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.915273189544678, + "rewards/margins": 14.226465225219727, + "rewards/rejected": -20.141738891601562, + "step": 46710 + }, + { + "epoch": 2.79, + "learning_rate": 7.71539948144795e-08, + "logits/chosen": -2.5221335887908936, + "logits/rejected": -1.9146016836166382, + "logps/chosen": -660.3690185546875, + "logps/rejected": -2158.855712890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.928534507751465, + "rewards/margins": 15.213091850280762, + "rewards/rejected": -21.141626358032227, + "step": 46720 + }, + { + "epoch": 2.79, + "learning_rate": 7.67269753857458e-08, + "logits/chosen": -2.578709125518799, + "logits/rejected": -1.861035704612732, + "logps/chosen": -691.591064453125, + "logps/rejected": -2075.834716796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.171484470367432, + "rewards/margins": 14.152093887329102, + "rewards/rejected": -20.323575973510742, + "step": 46730 + }, + { + "epoch": 2.79, + "learning_rate": 7.630112252061534e-08, + "logits/chosen": -2.5321478843688965, + "logits/rejected": -1.8658374547958374, + "logps/chosen": -697.5310668945312, + "logps/rejected": -2136.69482421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.210446357727051, + "rewards/margins": 14.708251953125, + "rewards/rejected": -20.918697357177734, + "step": 46740 + }, + { + "epoch": 2.79, + "learning_rate": 7.587643642409353e-08, + "logits/chosen": -2.497938871383667, + "logits/rejected": -1.8143365383148193, + "logps/chosen": -695.0341186523438, + "logps/rejected": -2077.098876953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.207986354827881, + "rewards/margins": 14.113385200500488, + "rewards/rejected": -20.32137107849121, + "step": 46750 + }, + { + "epoch": 2.79, + "learning_rate": 7.545291730062466e-08, + "logits/chosen": -2.562577486038208, + "logits/rejected": -1.8784573078155518, + "logps/chosen": -694.7552490234375, + "logps/rejected": -2111.567626953125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.212237358093262, + "rewards/margins": 14.461196899414062, + "rewards/rejected": -20.67343521118164, + "step": 46760 + }, + { + "epoch": 2.79, + "learning_rate": 7.503056535408975e-08, + "logits/chosen": -2.5155391693115234, + "logits/rejected": -1.7903215885162354, + "logps/chosen": -693.1727294921875, + "logps/rejected": -2168.31982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.25634765625, + "rewards/margins": 14.987266540527344, + "rewards/rejected": -21.24361228942871, + "step": 46770 + }, + { + "epoch": 2.79, + "learning_rate": 7.460938078781038e-08, + "logits/chosen": -2.519888401031494, + "logits/rejected": -1.7641013860702515, + "logps/chosen": -687.4049072265625, + "logps/rejected": -2083.641845703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1899824142456055, + "rewards/margins": 14.209589958190918, + "rewards/rejected": -20.399574279785156, + "step": 46780 + }, + { + "epoch": 2.79, + "learning_rate": 7.418936380454377e-08, + "logits/chosen": -2.577152729034424, + "logits/rejected": -1.913750410079956, + "logps/chosen": -700.5567626953125, + "logps/rejected": -2160.04736328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.235952854156494, + "rewards/margins": 14.925664901733398, + "rewards/rejected": -21.161617279052734, + "step": 46790 + }, + { + "epoch": 2.79, + "learning_rate": 7.377051460648682e-08, + "logits/chosen": -2.5726306438446045, + "logits/rejected": -1.9131101369857788, + "logps/chosen": -684.0477294921875, + "logps/rejected": -2103.5810546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.122270107269287, + "rewards/margins": 14.475393295288086, + "rewards/rejected": -20.59766387939453, + "step": 46800 + }, + { + "epoch": 2.79, + "learning_rate": 7.335283339527294e-08, + "logits/chosen": -2.5000288486480713, + "logits/rejected": -1.751633882522583, + "logps/chosen": -675.0101318359375, + "logps/rejected": -2157.42333984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.005895137786865, + "rewards/margins": 15.11933422088623, + "rewards/rejected": -21.125228881835938, + "step": 46810 + }, + { + "epoch": 2.79, + "learning_rate": 7.293632037197434e-08, + "logits/chosen": -2.5449118614196777, + "logits/rejected": -1.888602614402771, + "logps/chosen": -698.7644653320312, + "logps/rejected": -2023.073974609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3017168045043945, + "rewards/margins": 13.477874755859375, + "rewards/rejected": -19.779590606689453, + "step": 46820 + }, + { + "epoch": 2.79, + "learning_rate": 7.252097573709982e-08, + "logits/chosen": -2.5182900428771973, + "logits/rejected": -1.8932138681411743, + "logps/chosen": -707.769287109375, + "logps/rejected": -2064.218994140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3232927322387695, + "rewards/margins": 13.884038925170898, + "rewards/rejected": -20.20733070373535, + "step": 46830 + }, + { + "epoch": 2.79, + "learning_rate": 7.21067996905972e-08, + "logits/chosen": -2.5832910537719727, + "logits/rejected": -1.9453634023666382, + "logps/chosen": -677.0369873046875, + "logps/rejected": -2058.08203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.061339378356934, + "rewards/margins": 14.071481704711914, + "rewards/rejected": -20.132822036743164, + "step": 46840 + }, + { + "epoch": 2.79, + "learning_rate": 7.169379243184976e-08, + "logits/chosen": -2.5275824069976807, + "logits/rejected": -1.767577886581421, + "logps/chosen": -679.9091186523438, + "logps/rejected": -2216.70068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.069261074066162, + "rewards/margins": 15.657687187194824, + "rewards/rejected": -21.726947784423828, + "step": 46850 + }, + { + "epoch": 2.79, + "learning_rate": 7.128195415967987e-08, + "logits/chosen": -2.5400495529174805, + "logits/rejected": -1.8945419788360596, + "logps/chosen": -662.5357055664062, + "logps/rejected": -2267.301513671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.924284934997559, + "rewards/margins": 16.294876098632812, + "rewards/rejected": -22.219160079956055, + "step": 46860 + }, + { + "epoch": 2.79, + "learning_rate": 7.087128507234642e-08, + "logits/chosen": -2.537766695022583, + "logits/rejected": -1.8042895793914795, + "logps/chosen": -664.9937744140625, + "logps/rejected": -2126.4453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.925654411315918, + "rewards/margins": 14.898709297180176, + "rewards/rejected": -20.82436180114746, + "step": 46870 + }, + { + "epoch": 2.8, + "learning_rate": 7.04617853675449e-08, + "logits/chosen": -2.510934829711914, + "logits/rejected": -1.811241865158081, + "logps/chosen": -683.1253662109375, + "logps/rejected": -2045.2548828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.154810905456543, + "rewards/margins": 13.867228507995605, + "rewards/rejected": -20.02203941345215, + "step": 46880 + }, + { + "epoch": 2.8, + "learning_rate": 7.005345524240926e-08, + "logits/chosen": -2.55890154838562, + "logits/rejected": -1.8718925714492798, + "logps/chosen": -664.2725830078125, + "logps/rejected": -2165.9228515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.956556797027588, + "rewards/margins": 15.270875930786133, + "rewards/rejected": -21.22743034362793, + "step": 46890 + }, + { + "epoch": 2.8, + "learning_rate": 6.964629489350894e-08, + "logits/chosen": -2.5450479984283447, + "logits/rejected": -1.8443864583969116, + "logps/chosen": -708.7744750976562, + "logps/rejected": -2192.045654296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.381344318389893, + "rewards/margins": 15.09606647491455, + "rewards/rejected": -21.4774112701416, + "step": 46900 + }, + { + "epoch": 2.8, + "learning_rate": 6.924030451685188e-08, + "logits/chosen": -2.5803585052490234, + "logits/rejected": -1.8783537149429321, + "logps/chosen": -663.0265502929688, + "logps/rejected": -2174.274658203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.899228572845459, + "rewards/margins": 15.396766662597656, + "rewards/rejected": -21.29599380493164, + "step": 46910 + }, + { + "epoch": 2.8, + "learning_rate": 6.883548430788062e-08, + "logits/chosen": -2.534604072570801, + "logits/rejected": -1.9054114818572998, + "logps/chosen": -698.0333251953125, + "logps/rejected": -2163.714599609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.245051860809326, + "rewards/margins": 14.94243049621582, + "rewards/rejected": -21.187480926513672, + "step": 46920 + }, + { + "epoch": 2.8, + "learning_rate": 6.843183446147678e-08, + "logits/chosen": -2.4721310138702393, + "logits/rejected": -1.7849397659301758, + "logps/chosen": -685.6085205078125, + "logps/rejected": -2129.851318359375, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.162440299987793, + "rewards/margins": 14.69129753112793, + "rewards/rejected": -20.853736877441406, + "step": 46930 + }, + { + "epoch": 2.8, + "learning_rate": 6.802935517195658e-08, + "logits/chosen": -2.5231547355651855, + "logits/rejected": -1.8804184198379517, + "logps/chosen": -691.2014770507812, + "logps/rejected": -2128.352294921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.159139156341553, + "rewards/margins": 14.672332763671875, + "rewards/rejected": -20.831472396850586, + "step": 46940 + }, + { + "epoch": 2.8, + "learning_rate": 6.762804663307365e-08, + "logits/chosen": -2.4944472312927246, + "logits/rejected": -1.762385368347168, + "logps/chosen": -698.126708984375, + "logps/rejected": -2061.5498046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.194155216217041, + "rewards/margins": 13.98669719696045, + "rewards/rejected": -20.18085289001465, + "step": 46950 + }, + { + "epoch": 2.8, + "learning_rate": 6.722790903801819e-08, + "logits/chosen": -2.5208871364593506, + "logits/rejected": -1.8972753286361694, + "logps/chosen": -666.8382568359375, + "logps/rejected": -2042.1019287109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.004981517791748, + "rewards/margins": 13.985746383666992, + "rewards/rejected": -19.99073028564453, + "step": 46960 + }, + { + "epoch": 2.8, + "learning_rate": 6.682894257941635e-08, + "logits/chosen": -2.531975030899048, + "logits/rejected": -1.8512489795684814, + "logps/chosen": -662.7518310546875, + "logps/rejected": -2185.3759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.918298244476318, + "rewards/margins": 15.48864459991455, + "rewards/rejected": -21.40694236755371, + "step": 46970 + }, + { + "epoch": 2.8, + "learning_rate": 6.643114744933038e-08, + "logits/chosen": -2.5235393047332764, + "logits/rejected": -1.9213613271713257, + "logps/chosen": -662.2254638671875, + "logps/rejected": -2121.1357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.98894739151001, + "rewards/margins": 14.787420272827148, + "rewards/rejected": -20.7763671875, + "step": 46980 + }, + { + "epoch": 2.8, + "learning_rate": 6.603452383925901e-08, + "logits/chosen": -2.5251247882843018, + "logits/rejected": -1.8694055080413818, + "logps/chosen": -670.2806396484375, + "logps/rejected": -2094.330810546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.00808048248291, + "rewards/margins": 14.507476806640625, + "rewards/rejected": -20.51555824279785, + "step": 46990 + }, + { + "epoch": 2.8, + "learning_rate": 6.563907194013702e-08, + "logits/chosen": -2.536083459854126, + "logits/rejected": -1.8015861511230469, + "logps/chosen": -688.3124389648438, + "logps/rejected": -2142.44775390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.121410369873047, + "rewards/margins": 14.864214897155762, + "rewards/rejected": -20.985626220703125, + "step": 47000 + }, + { + "epoch": 2.8, + "learning_rate": 6.524479194233463e-08, + "logits/chosen": -2.539639949798584, + "logits/rejected": -1.8069534301757812, + "logps/chosen": -665.1608276367188, + "logps/rejected": -2144.9267578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9824018478393555, + "rewards/margins": 15.023954391479492, + "rewards/rejected": -21.006357192993164, + "step": 47010 + }, + { + "epoch": 2.8, + "learning_rate": 6.485168403565834e-08, + "logits/chosen": -2.5571560859680176, + "logits/rejected": -1.9241364002227783, + "logps/chosen": -700.6619873046875, + "logps/rejected": -2200.407470703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.254749298095703, + "rewards/margins": 15.307095527648926, + "rewards/rejected": -21.561845779418945, + "step": 47020 + }, + { + "epoch": 2.8, + "learning_rate": 6.445974840935065e-08, + "logits/chosen": -2.5345206260681152, + "logits/rejected": -1.8848564624786377, + "logps/chosen": -692.8067626953125, + "logps/rejected": -2074.433837890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.204917907714844, + "rewards/margins": 14.100835800170898, + "rewards/rejected": -20.30575180053711, + "step": 47030 + }, + { + "epoch": 2.81, + "learning_rate": 6.406898525208843e-08, + "logits/chosen": -2.5634961128234863, + "logits/rejected": -1.9256664514541626, + "logps/chosen": -671.770751953125, + "logps/rejected": -2176.504638671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.019883155822754, + "rewards/margins": 15.299352645874023, + "rewards/rejected": -21.319236755371094, + "step": 47040 + }, + { + "epoch": 2.81, + "learning_rate": 6.367939475198592e-08, + "logits/chosen": -2.5507805347442627, + "logits/rejected": -1.8546031713485718, + "logps/chosen": -677.8995361328125, + "logps/rejected": -2159.44970703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0643768310546875, + "rewards/margins": 15.082345962524414, + "rewards/rejected": -21.14672088623047, + "step": 47050 + }, + { + "epoch": 2.81, + "learning_rate": 6.329097709659143e-08, + "logits/chosen": -2.52693247795105, + "logits/rejected": -1.7560161352157593, + "logps/chosen": -674.9082641601562, + "logps/rejected": -2277.20849609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.040009498596191, + "rewards/margins": 16.276203155517578, + "rewards/rejected": -22.316211700439453, + "step": 47060 + }, + { + "epoch": 2.81, + "learning_rate": 6.290373247289012e-08, + "logits/chosen": -2.5581767559051514, + "logits/rejected": -1.7821681499481201, + "logps/chosen": -681.5172729492188, + "logps/rejected": -2137.0869140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.194296836853027, + "rewards/margins": 14.731167793273926, + "rewards/rejected": -20.925466537475586, + "step": 47070 + }, + { + "epoch": 2.81, + "learning_rate": 6.251766106730033e-08, + "logits/chosen": -2.5737624168395996, + "logits/rejected": -1.868833303451538, + "logps/chosen": -673.5806274414062, + "logps/rejected": -2141.82958984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.056980133056641, + "rewards/margins": 14.905644416809082, + "rewards/rejected": -20.96262550354004, + "step": 47080 + }, + { + "epoch": 2.81, + "learning_rate": 6.213276306567762e-08, + "logits/chosen": -2.5217247009277344, + "logits/rejected": -1.9380607604980469, + "logps/chosen": -679.1942138671875, + "logps/rejected": -2084.87451171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.049038887023926, + "rewards/margins": 14.352285385131836, + "rewards/rejected": -20.401325225830078, + "step": 47090 + }, + { + "epoch": 2.81, + "learning_rate": 6.174903865331177e-08, + "logits/chosen": -2.5282504558563232, + "logits/rejected": -1.8933216333389282, + "logps/chosen": -653.0824584960938, + "logps/rejected": -2126.1787109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.865716457366943, + "rewards/margins": 14.943493843078613, + "rewards/rejected": -20.8092098236084, + "step": 47100 + }, + { + "epoch": 2.81, + "learning_rate": 6.136648801492756e-08, + "logits/chosen": -2.565070390701294, + "logits/rejected": -1.846265435218811, + "logps/chosen": -677.4641723632812, + "logps/rejected": -2216.351806640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0560302734375, + "rewards/margins": 15.667757034301758, + "rewards/rejected": -21.72378921508789, + "step": 47110 + }, + { + "epoch": 2.81, + "learning_rate": 6.098511133468516e-08, + "logits/chosen": -2.553659677505493, + "logits/rejected": -1.7508271932601929, + "logps/chosen": -682.1256103515625, + "logps/rejected": -2072.38232421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.102184295654297, + "rewards/margins": 14.167813301086426, + "rewards/rejected": -20.26999855041504, + "step": 47120 + }, + { + "epoch": 2.81, + "learning_rate": 6.060490879617853e-08, + "logits/chosen": -2.5271661281585693, + "logits/rejected": -1.9085804224014282, + "logps/chosen": -671.64501953125, + "logps/rejected": -2092.87060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.001364707946777, + "rewards/margins": 14.502592086791992, + "rewards/rejected": -20.503955841064453, + "step": 47130 + }, + { + "epoch": 2.81, + "learning_rate": 6.022588058243823e-08, + "logits/chosen": -2.5486743450164795, + "logits/rejected": -1.8918434381484985, + "logps/chosen": -668.6968994140625, + "logps/rejected": -2112.817138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.971996784210205, + "rewards/margins": 14.721391677856445, + "rewards/rejected": -20.69338607788086, + "step": 47140 + }, + { + "epoch": 2.81, + "learning_rate": 5.984802687592745e-08, + "logits/chosen": -2.527763843536377, + "logits/rejected": -1.8212769031524658, + "logps/chosen": -669.57421875, + "logps/rejected": -2192.658203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9623236656188965, + "rewards/margins": 15.506696701049805, + "rewards/rejected": -21.469018936157227, + "step": 47150 + }, + { + "epoch": 2.81, + "learning_rate": 5.947134785854597e-08, + "logits/chosen": -2.5508296489715576, + "logits/rejected": -1.9466297626495361, + "logps/chosen": -666.468994140625, + "logps/rejected": -2197.472900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.984175682067871, + "rewards/margins": 15.540921211242676, + "rewards/rejected": -21.525094985961914, + "step": 47160 + }, + { + "epoch": 2.81, + "learning_rate": 5.9095843711625964e-08, + "logits/chosen": -2.514777898788452, + "logits/rejected": -1.9424779415130615, + "logps/chosen": -667.6060791015625, + "logps/rejected": -2104.600341796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.98452091217041, + "rewards/margins": 14.60365104675293, + "rewards/rejected": -20.588172912597656, + "step": 47170 + }, + { + "epoch": 2.81, + "learning_rate": 5.8721514615935895e-08, + "logits/chosen": -2.5380196571350098, + "logits/rejected": -1.9046646356582642, + "logps/chosen": -673.3609619140625, + "logps/rejected": -2198.1142578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.029489040374756, + "rewards/margins": 15.508066177368164, + "rewards/rejected": -21.537555694580078, + "step": 47180 + }, + { + "epoch": 2.81, + "learning_rate": 5.8348360751677435e-08, + "logits/chosen": -2.4800820350646973, + "logits/rejected": -1.8473131656646729, + "logps/chosen": -685.9952392578125, + "logps/rejected": -2071.17919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.148233890533447, + "rewards/margins": 14.126672744750977, + "rewards/rejected": -20.274906158447266, + "step": 47190 + }, + { + "epoch": 2.81, + "learning_rate": 5.7976382298487454e-08, + "logits/chosen": -2.4953689575195312, + "logits/rejected": -1.8000198602676392, + "logps/chosen": -677.9495849609375, + "logps/rejected": -2122.07568359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.105116844177246, + "rewards/margins": 14.669015884399414, + "rewards/rejected": -20.774131774902344, + "step": 47200 + }, + { + "epoch": 2.82, + "learning_rate": 5.7605579435435486e-08, + "logits/chosen": -2.5113186836242676, + "logits/rejected": -1.7418453693389893, + "logps/chosen": -693.5748901367188, + "logps/rejected": -2181.266845703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1955718994140625, + "rewards/margins": 15.170636177062988, + "rewards/rejected": -21.366207122802734, + "step": 47210 + }, + { + "epoch": 2.82, + "learning_rate": 5.7235952341026524e-08, + "logits/chosen": -2.5136733055114746, + "logits/rejected": -1.7890678644180298, + "logps/chosen": -686.45361328125, + "logps/rejected": -2135.130126953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1684889793396, + "rewards/margins": 14.742996215820312, + "rewards/rejected": -20.911483764648438, + "step": 47220 + }, + { + "epoch": 2.82, + "learning_rate": 5.686750119319878e-08, + "logits/chosen": -2.554689645767212, + "logits/rejected": -1.8305184841156006, + "logps/chosen": -687.0180053710938, + "logps/rejected": -2171.148681640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.108046531677246, + "rewards/margins": 15.155177116394043, + "rewards/rejected": -21.26322364807129, + "step": 47230 + }, + { + "epoch": 2.82, + "learning_rate": 5.65002261693251e-08, + "logits/chosen": -2.521134853363037, + "logits/rejected": -1.7845122814178467, + "logps/chosen": -685.08447265625, + "logps/rejected": -2181.797607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.143383502960205, + "rewards/margins": 15.236211776733398, + "rewards/rejected": -21.379594802856445, + "step": 47240 + }, + { + "epoch": 2.82, + "learning_rate": 5.6134127446211275e-08, + "logits/chosen": -2.563678741455078, + "logits/rejected": -1.962903618812561, + "logps/chosen": -691.3277587890625, + "logps/rejected": -2243.520263671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.232640266418457, + "rewards/margins": 15.755285263061523, + "rewards/rejected": -21.98792839050293, + "step": 47250 + }, + { + "epoch": 2.82, + "learning_rate": 5.576920520009715e-08, + "logits/chosen": -2.569988489151001, + "logits/rejected": -1.9352738857269287, + "logps/chosen": -680.8040771484375, + "logps/rejected": -2068.232177734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.073209762573242, + "rewards/margins": 14.176905632019043, + "rewards/rejected": -20.2501163482666, + "step": 47260 + }, + { + "epoch": 2.82, + "learning_rate": 5.5405459606656666e-08, + "logits/chosen": -2.5687403678894043, + "logits/rejected": -1.868665099143982, + "logps/chosen": -674.2461547851562, + "logps/rejected": -2221.7578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.997897148132324, + "rewards/margins": 15.76598072052002, + "rewards/rejected": -21.76388168334961, + "step": 47270 + }, + { + "epoch": 2.82, + "learning_rate": 5.5042890840996676e-08, + "logits/chosen": -2.5381112098693848, + "logits/rejected": -1.9319162368774414, + "logps/chosen": -671.0023193359375, + "logps/rejected": -2139.60595703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.983185291290283, + "rewards/margins": 14.973075866699219, + "rewards/rejected": -20.95625877380371, + "step": 47280 + }, + { + "epoch": 2.82, + "learning_rate": 5.468149907765785e-08, + "logits/chosen": -2.539281129837036, + "logits/rejected": -1.8521482944488525, + "logps/chosen": -687.8781127929688, + "logps/rejected": -2182.696044921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.118537902832031, + "rewards/margins": 15.269830703735352, + "rewards/rejected": -21.388368606567383, + "step": 47290 + }, + { + "epoch": 2.82, + "learning_rate": 5.432128449061464e-08, + "logits/chosen": -2.554746150970459, + "logits/rejected": -1.7857024669647217, + "logps/chosen": -672.5222778320312, + "logps/rejected": -2111.75830078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.023922920227051, + "rewards/margins": 14.66132926940918, + "rewards/rejected": -20.685253143310547, + "step": 47300 + }, + { + "epoch": 2.82, + "learning_rate": 5.3962247253273035e-08, + "logits/chosen": -2.566312551498413, + "logits/rejected": -1.8927781581878662, + "logps/chosen": -663.545166015625, + "logps/rejected": -2039.658935546875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.915989398956299, + "rewards/margins": 14.041834831237793, + "rewards/rejected": -19.95782470703125, + "step": 47310 + }, + { + "epoch": 2.82, + "learning_rate": 5.360438753847508e-08, + "logits/chosen": -2.5676960945129395, + "logits/rejected": -1.8665297031402588, + "logps/chosen": -691.0333251953125, + "logps/rejected": -2116.93798828125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.257615089416504, + "rewards/margins": 14.487396240234375, + "rewards/rejected": -20.745010375976562, + "step": 47320 + }, + { + "epoch": 2.82, + "learning_rate": 5.324770551849351e-08, + "logits/chosen": -2.571864604949951, + "logits/rejected": -1.8762840032577515, + "logps/chosen": -677.481201171875, + "logps/rejected": -2156.761962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.033608436584473, + "rewards/margins": 15.0857515335083, + "rewards/rejected": -21.119361877441406, + "step": 47330 + }, + { + "epoch": 2.82, + "learning_rate": 5.2892201365035144e-08, + "logits/chosen": -2.5574376583099365, + "logits/rejected": -1.8754304647445679, + "logps/chosen": -695.974609375, + "logps/rejected": -2250.69873046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2494635581970215, + "rewards/margins": 15.815910339355469, + "rewards/rejected": -22.065372467041016, + "step": 47340 + }, + { + "epoch": 2.82, + "learning_rate": 5.253787524924031e-08, + "logits/chosen": -2.5657660961151123, + "logits/rejected": -1.797064185142517, + "logps/chosen": -662.8653564453125, + "logps/rejected": -2069.41455078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.931711196899414, + "rewards/margins": 14.327664375305176, + "rewards/rejected": -20.259374618530273, + "step": 47350 + }, + { + "epoch": 2.82, + "learning_rate": 5.218472734168062e-08, + "logits/chosen": -2.5532426834106445, + "logits/rejected": -1.9720685482025146, + "logps/chosen": -674.6867065429688, + "logps/rejected": -2181.9169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.023752212524414, + "rewards/margins": 15.347574234008789, + "rewards/rejected": -21.371326446533203, + "step": 47360 + }, + { + "epoch": 2.82, + "learning_rate": 5.18327578123623e-08, + "logits/chosen": -2.5416009426116943, + "logits/rejected": -1.885337471961975, + "logps/chosen": -680.5325927734375, + "logps/rejected": -2081.856201171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.098489284515381, + "rewards/margins": 14.288740158081055, + "rewards/rejected": -20.387229919433594, + "step": 47370 + }, + { + "epoch": 2.83, + "learning_rate": 5.148196683072315e-08, + "logits/chosen": -2.5502355098724365, + "logits/rejected": -1.9338178634643555, + "logps/chosen": -678.5765380859375, + "logps/rejected": -2102.2392578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.121753215789795, + "rewards/margins": 14.454063415527344, + "rewards/rejected": -20.575815200805664, + "step": 47380 + }, + { + "epoch": 2.83, + "learning_rate": 5.113235456563392e-08, + "logits/chosen": -2.5712649822235107, + "logits/rejected": -1.875290870666504, + "logps/chosen": -676.6262817382812, + "logps/rejected": -1980.990966796875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.997308254241943, + "rewards/margins": 13.380887985229492, + "rewards/rejected": -19.37819480895996, + "step": 47390 + }, + { + "epoch": 2.83, + "learning_rate": 5.078392118539777e-08, + "logits/chosen": -2.5602688789367676, + "logits/rejected": -1.7996370792388916, + "logps/chosen": -664.0924072265625, + "logps/rejected": -2164.23876953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.893694877624512, + "rewards/margins": 15.280848503112793, + "rewards/rejected": -21.174543380737305, + "step": 47400 + }, + { + "epoch": 2.83, + "learning_rate": 5.043666685775106e-08, + "logits/chosen": -2.5235097408294678, + "logits/rejected": -1.773857831954956, + "logps/chosen": -666.3176879882812, + "logps/rejected": -2194.57470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.985875129699707, + "rewards/margins": 15.523536682128906, + "rewards/rejected": -21.509410858154297, + "step": 47410 + }, + { + "epoch": 2.83, + "learning_rate": 5.0090591749861473e-08, + "logits/chosen": -2.5411510467529297, + "logits/rejected": -1.8447891473770142, + "logps/chosen": -685.4935913085938, + "logps/rejected": -2049.56103515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.095654487609863, + "rewards/margins": 13.958871841430664, + "rewards/rejected": -20.05452537536621, + "step": 47420 + }, + { + "epoch": 2.83, + "learning_rate": 4.974569602832991e-08, + "logits/chosen": -2.580054759979248, + "logits/rejected": -1.911026954650879, + "logps/chosen": -670.1770629882812, + "logps/rejected": -2131.51904296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9405741691589355, + "rewards/margins": 14.935157775878906, + "rewards/rejected": -20.875732421875, + "step": 47430 + }, + { + "epoch": 2.83, + "learning_rate": 4.940197985918882e-08, + "logits/chosen": -2.529546022415161, + "logits/rejected": -1.8089923858642578, + "logps/chosen": -676.3284912109375, + "logps/rejected": -2103.535888671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.074386119842529, + "rewards/margins": 14.525541305541992, + "rewards/rejected": -20.59992790222168, + "step": 47440 + }, + { + "epoch": 2.83, + "learning_rate": 4.905944340790364e-08, + "logits/chosen": -2.5703659057617188, + "logits/rejected": -1.8957693576812744, + "logps/chosen": -665.1199340820312, + "logps/rejected": -2141.795654296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.952441215515137, + "rewards/margins": 15.021100997924805, + "rewards/rejected": -20.973543167114258, + "step": 47450 + }, + { + "epoch": 2.83, + "learning_rate": 4.8718086839370794e-08, + "logits/chosen": -2.5345606803894043, + "logits/rejected": -1.89739990234375, + "logps/chosen": -701.9666137695312, + "logps/rejected": -2134.614990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.250332355499268, + "rewards/margins": 14.653836250305176, + "rewards/rejected": -20.904170989990234, + "step": 47460 + }, + { + "epoch": 2.83, + "learning_rate": 4.837791031792022e-08, + "logits/chosen": -2.5111706256866455, + "logits/rejected": -1.7772338390350342, + "logps/chosen": -670.9286499023438, + "logps/rejected": -2171.6328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.002225875854492, + "rewards/margins": 15.2686767578125, + "rewards/rejected": -21.270902633666992, + "step": 47470 + }, + { + "epoch": 2.83, + "learning_rate": 4.8038914007312035e-08, + "logits/chosen": -2.5487418174743652, + "logits/rejected": -1.8645923137664795, + "logps/chosen": -672.6256713867188, + "logps/rejected": -2155.6533203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.045359134674072, + "rewards/margins": 15.075834274291992, + "rewards/rejected": -21.12119483947754, + "step": 47480 + }, + { + "epoch": 2.83, + "learning_rate": 4.7701098070739304e-08, + "logits/chosen": -2.550095319747925, + "logits/rejected": -1.8620383739471436, + "logps/chosen": -693.6041870117188, + "logps/rejected": -2129.43701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.254053115844727, + "rewards/margins": 14.602615356445312, + "rewards/rejected": -20.85666847229004, + "step": 47490 + }, + { + "epoch": 2.83, + "learning_rate": 4.736446267082667e-08, + "logits/chosen": -2.5161101818084717, + "logits/rejected": -1.9454540014266968, + "logps/chosen": -706.1748657226562, + "logps/rejected": -2144.411376953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.338494300842285, + "rewards/margins": 14.664685249328613, + "rewards/rejected": -21.0031795501709, + "step": 47500 + }, + { + "epoch": 2.83, + "learning_rate": 4.702900796963061e-08, + "logits/chosen": -2.5175135135650635, + "logits/rejected": -1.8122961521148682, + "logps/chosen": -668.8933715820312, + "logps/rejected": -2121.621337890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.971011638641357, + "rewards/margins": 14.808650016784668, + "rewards/rejected": -20.779661178588867, + "step": 47510 + }, + { + "epoch": 2.83, + "learning_rate": 4.66947341286389e-08, + "logits/chosen": -2.5315327644348145, + "logits/rejected": -1.8161300420761108, + "logps/chosen": -688.0343017578125, + "logps/rejected": -2086.309814453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.163668632507324, + "rewards/margins": 14.260505676269531, + "rewards/rejected": -20.42417335510254, + "step": 47520 + }, + { + "epoch": 2.83, + "learning_rate": 4.636164130877058e-08, + "logits/chosen": -2.583683490753174, + "logits/rejected": -1.9412953853607178, + "logps/chosen": -679.1873779296875, + "logps/rejected": -2095.97412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.115659713745117, + "rewards/margins": 14.414434432983398, + "rewards/rejected": -20.530094146728516, + "step": 47530 + }, + { + "epoch": 2.83, + "learning_rate": 4.602972967037711e-08, + "logits/chosen": -2.492678642272949, + "logits/rejected": -1.888042688369751, + "logps/chosen": -686.2781982421875, + "logps/rejected": -2157.820068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.172307968139648, + "rewards/margins": 14.95256519317627, + "rewards/rejected": -21.124874114990234, + "step": 47540 + }, + { + "epoch": 2.84, + "learning_rate": 4.5698999373240404e-08, + "logits/chosen": -2.536902904510498, + "logits/rejected": -1.8325836658477783, + "logps/chosen": -687.1345825195312, + "logps/rejected": -2129.013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.115371227264404, + "rewards/margins": 14.733335494995117, + "rewards/rejected": -20.84870719909668, + "step": 47550 + }, + { + "epoch": 2.84, + "learning_rate": 4.5369450576574214e-08, + "logits/chosen": -2.564908504486084, + "logits/rejected": -1.8695217370986938, + "logps/chosen": -658.3702392578125, + "logps/rejected": -2110.984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.923367023468018, + "rewards/margins": 14.738192558288574, + "rewards/rejected": -20.66156005859375, + "step": 47560 + }, + { + "epoch": 2.84, + "learning_rate": 4.504108343902302e-08, + "logits/chosen": -2.5192713737487793, + "logits/rejected": -1.8150522708892822, + "logps/chosen": -678.7591552734375, + "logps/rejected": -2076.673095703125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0677900314331055, + "rewards/margins": 14.249433517456055, + "rewards/rejected": -20.317218780517578, + "step": 47570 + }, + { + "epoch": 2.84, + "learning_rate": 4.471389811866289e-08, + "logits/chosen": -2.5906484127044678, + "logits/rejected": -1.9081836938858032, + "logps/chosen": -660.6258544921875, + "logps/rejected": -2128.07666015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.912613868713379, + "rewards/margins": 14.923670768737793, + "rewards/rejected": -20.836284637451172, + "step": 47580 + }, + { + "epoch": 2.84, + "learning_rate": 4.438789477300115e-08, + "logits/chosen": -2.51710844039917, + "logits/rejected": -1.823919653892517, + "logps/chosen": -682.1995239257812, + "logps/rejected": -2118.607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.163671016693115, + "rewards/margins": 14.591218948364258, + "rewards/rejected": -20.754886627197266, + "step": 47590 + }, + { + "epoch": 2.84, + "learning_rate": 4.40630735589756e-08, + "logits/chosen": -2.5426859855651855, + "logits/rejected": -1.9016996622085571, + "logps/chosen": -686.1389770507812, + "logps/rejected": -2108.010498046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.132206439971924, + "rewards/margins": 14.495864868164062, + "rewards/rejected": -20.628070831298828, + "step": 47600 + }, + { + "epoch": 2.84, + "learning_rate": 4.373943463295477e-08, + "logits/chosen": -2.502950668334961, + "logits/rejected": -1.8067753314971924, + "logps/chosen": -653.8362426757812, + "logps/rejected": -2258.2373046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.868977069854736, + "rewards/margins": 16.267526626586914, + "rewards/rejected": -22.13650131225586, + "step": 47610 + }, + { + "epoch": 2.84, + "learning_rate": 4.341697815073903e-08, + "logits/chosen": -2.55815052986145, + "logits/rejected": -1.7902015447616577, + "logps/chosen": -673.0187377929688, + "logps/rejected": -2210.58935546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0123772621154785, + "rewards/margins": 15.645230293273926, + "rewards/rejected": -21.657608032226562, + "step": 47620 + }, + { + "epoch": 2.84, + "learning_rate": 4.309570426755866e-08, + "logits/chosen": -2.4957470893859863, + "logits/rejected": -1.8015661239624023, + "logps/chosen": -697.4530029296875, + "logps/rejected": -2155.7431640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.25293493270874, + "rewards/margins": 14.854543685913086, + "rewards/rejected": -21.107479095458984, + "step": 47630 + }, + { + "epoch": 2.84, + "learning_rate": 4.277561313807493e-08, + "logits/chosen": -2.517493963241577, + "logits/rejected": -1.8259299993515015, + "logps/chosen": -692.1400146484375, + "logps/rejected": -2130.933837890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.226770401000977, + "rewards/margins": 14.632356643676758, + "rewards/rejected": -20.859127044677734, + "step": 47640 + }, + { + "epoch": 2.84, + "learning_rate": 4.245670491637988e-08, + "logits/chosen": -2.5154199600219727, + "logits/rejected": -1.860664963722229, + "logps/chosen": -676.83349609375, + "logps/rejected": -2135.884521484375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.066385746002197, + "rewards/margins": 14.842989921569824, + "rewards/rejected": -20.909374237060547, + "step": 47650 + }, + { + "epoch": 2.84, + "learning_rate": 4.213897975599568e-08, + "logits/chosen": -2.5054843425750732, + "logits/rejected": -1.7045276165008545, + "logps/chosen": -693.5032348632812, + "logps/rejected": -2108.269287109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.158959865570068, + "rewards/margins": 14.492403984069824, + "rewards/rejected": -20.651365280151367, + "step": 47660 + }, + { + "epoch": 2.84, + "learning_rate": 4.1822437809874994e-08, + "logits/chosen": -2.559051036834717, + "logits/rejected": -1.9234611988067627, + "logps/chosen": -663.4970092773438, + "logps/rejected": -2191.361328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.962947368621826, + "rewards/margins": 15.512721061706543, + "rewards/rejected": -21.475669860839844, + "step": 47670 + }, + { + "epoch": 2.84, + "learning_rate": 4.150707923040176e-08, + "logits/chosen": -2.5145649909973145, + "logits/rejected": -1.836493730545044, + "logps/chosen": -637.5252075195312, + "logps/rejected": -2078.739013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.734302997589111, + "rewards/margins": 14.611444473266602, + "rewards/rejected": -20.345745086669922, + "step": 47680 + }, + { + "epoch": 2.84, + "learning_rate": 4.11929041693887e-08, + "logits/chosen": -2.5427849292755127, + "logits/rejected": -1.8053820133209229, + "logps/chosen": -693.7364501953125, + "logps/rejected": -2144.505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.25128698348999, + "rewards/margins": 14.7566556930542, + "rewards/rejected": -21.007946014404297, + "step": 47690 + }, + { + "epoch": 2.84, + "learning_rate": 4.0879912778080956e-08, + "logits/chosen": -2.503117322921753, + "logits/rejected": -1.728816270828247, + "logps/chosen": -703.8245239257812, + "logps/rejected": -2102.326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.326975345611572, + "rewards/margins": 14.248906135559082, + "rewards/rejected": -20.57588005065918, + "step": 47700 + }, + { + "epoch": 2.84, + "learning_rate": 4.0568105207151046e-08, + "logits/chosen": -2.497243642807007, + "logits/rejected": -1.8414499759674072, + "logps/chosen": -700.8486938476562, + "logps/rejected": -2181.412841796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.340514183044434, + "rewards/margins": 15.033048629760742, + "rewards/rejected": -21.37356185913086, + "step": 47710 + }, + { + "epoch": 2.85, + "learning_rate": 4.025748160670473e-08, + "logits/chosen": -2.5655412673950195, + "logits/rejected": -1.900296926498413, + "logps/chosen": -671.3773803710938, + "logps/rejected": -2067.399169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9938578605651855, + "rewards/margins": 14.258296012878418, + "rewards/rejected": -20.252155303955078, + "step": 47720 + }, + { + "epoch": 2.85, + "learning_rate": 3.994804212627462e-08, + "logits/chosen": -2.5636401176452637, + "logits/rejected": -1.9296932220458984, + "logps/chosen": -692.0986938476562, + "logps/rejected": -2211.77294921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.223373889923096, + "rewards/margins": 15.448974609375, + "rewards/rejected": -21.67234992980957, + "step": 47730 + }, + { + "epoch": 2.85, + "learning_rate": 3.963978691482628e-08, + "logits/chosen": -2.508497953414917, + "logits/rejected": -1.8899345397949219, + "logps/chosen": -678.550537109375, + "logps/rejected": -2203.274658203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.125723838806152, + "rewards/margins": 15.461532592773438, + "rewards/rejected": -21.587255477905273, + "step": 47740 + }, + { + "epoch": 2.85, + "learning_rate": 3.933271612075296e-08, + "logits/chosen": -2.533334970474243, + "logits/rejected": -1.8877637386322021, + "logps/chosen": -672.4347534179688, + "logps/rejected": -2101.23974609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.995011329650879, + "rewards/margins": 14.57408332824707, + "rewards/rejected": -20.569093704223633, + "step": 47750 + }, + { + "epoch": 2.85, + "learning_rate": 3.902682989187889e-08, + "logits/chosen": -2.512589931488037, + "logits/rejected": -1.859840750694275, + "logps/chosen": -667.7904052734375, + "logps/rejected": -2067.97509765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.025022983551025, + "rewards/margins": 14.20838451385498, + "rewards/rejected": -20.233409881591797, + "step": 47760 + }, + { + "epoch": 2.85, + "learning_rate": 3.8722128375457136e-08, + "logits/chosen": -2.5760319232940674, + "logits/rejected": -1.862810492515564, + "logps/chosen": -677.1986694335938, + "logps/rejected": -2179.998291015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.020535945892334, + "rewards/margins": 15.335311889648438, + "rewards/rejected": -21.355846405029297, + "step": 47770 + }, + { + "epoch": 2.85, + "learning_rate": 3.841861171817174e-08, + "logits/chosen": -2.4859166145324707, + "logits/rejected": -1.8246815204620361, + "logps/chosen": -672.9573974609375, + "logps/rejected": -2095.293701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.095870018005371, + "rewards/margins": 14.415499687194824, + "rewards/rejected": -20.511371612548828, + "step": 47780 + }, + { + "epoch": 2.85, + "learning_rate": 3.8116280066134994e-08, + "logits/chosen": -2.5613465309143066, + "logits/rejected": -1.9155223369598389, + "logps/chosen": -662.9572143554688, + "logps/rejected": -2135.5224609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.882259368896484, + "rewards/margins": 15.026023864746094, + "rewards/rejected": -20.908283233642578, + "step": 47790 + }, + { + "epoch": 2.85, + "learning_rate": 3.7815133564889916e-08, + "logits/chosen": -2.611546277999878, + "logits/rejected": -1.875576376914978, + "logps/chosen": -671.8868408203125, + "logps/rejected": -2135.354248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9433088302612305, + "rewards/margins": 14.951828002929688, + "rewards/rejected": -20.895137786865234, + "step": 47800 + }, + { + "epoch": 2.85, + "learning_rate": 3.751517235940805e-08, + "logits/chosen": -2.5202412605285645, + "logits/rejected": -1.7706420421600342, + "logps/chosen": -687.0828247070312, + "logps/rejected": -2169.42138671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1380486488342285, + "rewards/margins": 15.107882499694824, + "rewards/rejected": -21.24593162536621, + "step": 47810 + }, + { + "epoch": 2.85, + "learning_rate": 3.721639659409054e-08, + "logits/chosen": -2.5662174224853516, + "logits/rejected": -1.7732973098754883, + "logps/chosen": -662.0130004882812, + "logps/rejected": -2082.178955078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.934773921966553, + "rewards/margins": 14.450090408325195, + "rewards/rejected": -20.384862899780273, + "step": 47820 + }, + { + "epoch": 2.85, + "learning_rate": 3.6918806412768736e-08, + "logits/chosen": -2.478520631790161, + "logits/rejected": -1.8062931299209595, + "logps/chosen": -691.7656860351562, + "logps/rejected": -2102.6591796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2340898513793945, + "rewards/margins": 14.351289749145508, + "rewards/rejected": -20.58538055419922, + "step": 47830 + }, + { + "epoch": 2.85, + "learning_rate": 3.662240195870165e-08, + "logits/chosen": -2.549574136734009, + "logits/rejected": -1.9584376811981201, + "logps/chosen": -676.7379760742188, + "logps/rejected": -2133.9384765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0297746658325195, + "rewards/margins": 14.854937553405762, + "rewards/rejected": -20.88471031188965, + "step": 47840 + }, + { + "epoch": 2.85, + "learning_rate": 3.63271833745793e-08, + "logits/chosen": -2.517732620239258, + "logits/rejected": -1.875828504562378, + "logps/chosen": -682.2874755859375, + "logps/rejected": -2102.638916015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1290364265441895, + "rewards/margins": 14.43586254119873, + "rewards/rejected": -20.56490135192871, + "step": 47850 + }, + { + "epoch": 2.85, + "learning_rate": 3.6033150802519126e-08, + "logits/chosen": -2.55305814743042, + "logits/rejected": -1.8247458934783936, + "logps/chosen": -691.8704833984375, + "logps/rejected": -2073.18017578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1719207763671875, + "rewards/margins": 14.112444877624512, + "rewards/rejected": -20.284364700317383, + "step": 47860 + }, + { + "epoch": 2.85, + "learning_rate": 3.5740304384069e-08, + "logits/chosen": -2.5731184482574463, + "logits/rejected": -1.9510066509246826, + "logps/chosen": -647.0023193359375, + "logps/rejected": -2109.41943359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.767357349395752, + "rewards/margins": 14.891677856445312, + "rewards/rejected": -20.659038543701172, + "step": 47870 + }, + { + "epoch": 2.86, + "learning_rate": 3.544864426020478e-08, + "logits/chosen": -2.5674118995666504, + "logits/rejected": -1.9616050720214844, + "logps/chosen": -712.2982177734375, + "logps/rejected": -2164.016845703125, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.432646751403809, + "rewards/margins": 14.77087116241455, + "rewards/rejected": -21.203516006469727, + "step": 47880 + }, + { + "epoch": 2.86, + "learning_rate": 3.515817057133164e-08, + "logits/chosen": -2.5122408866882324, + "logits/rejected": -1.8520265817642212, + "logps/chosen": -695.5745239257812, + "logps/rejected": -2214.824951171875, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.262725830078125, + "rewards/margins": 15.429193496704102, + "rewards/rejected": -21.69191551208496, + "step": 47890 + }, + { + "epoch": 2.86, + "learning_rate": 3.486888345728412e-08, + "logits/chosen": -2.547996759414673, + "logits/rejected": -1.8270342350006104, + "logps/chosen": -656.1243896484375, + "logps/rejected": -2139.267822265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.917409420013428, + "rewards/margins": 15.02479362487793, + "rewards/rejected": -20.942203521728516, + "step": 47900 + }, + { + "epoch": 2.86, + "learning_rate": 3.4580783057324706e-08, + "logits/chosen": -2.5542266368865967, + "logits/rejected": -1.9836204051971436, + "logps/chosen": -692.465576171875, + "logps/rejected": -2069.17529296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.206921100616455, + "rewards/margins": 14.040995597839355, + "rewards/rejected": -20.24791717529297, + "step": 47910 + }, + { + "epoch": 2.86, + "learning_rate": 3.42938695101444e-08, + "logits/chosen": -2.543165922164917, + "logits/rejected": -1.9066762924194336, + "logps/chosen": -687.923828125, + "logps/rejected": -2029.6168212890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.173656940460205, + "rewards/margins": 13.678509712219238, + "rewards/rejected": -19.852169036865234, + "step": 47920 + }, + { + "epoch": 2.86, + "learning_rate": 3.40081429538644e-08, + "logits/chosen": -2.544551134109497, + "logits/rejected": -1.9108400344848633, + "logps/chosen": -695.4801025390625, + "logps/rejected": -2173.6005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.301712989807129, + "rewards/margins": 14.99010181427002, + "rewards/rejected": -21.291812896728516, + "step": 47930 + }, + { + "epoch": 2.86, + "learning_rate": 3.3723603526032435e-08, + "logits/chosen": -2.5203888416290283, + "logits/rejected": -1.8678734302520752, + "logps/chosen": -678.5736694335938, + "logps/rejected": -2083.8671875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.096879482269287, + "rewards/margins": 14.306879043579102, + "rewards/rejected": -20.403757095336914, + "step": 47940 + }, + { + "epoch": 2.86, + "learning_rate": 3.344025136362672e-08, + "logits/chosen": -2.485604763031006, + "logits/rejected": -1.861669898033142, + "logps/chosen": -674.9385375976562, + "logps/rejected": -2091.67236328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.088749408721924, + "rewards/margins": 14.386297225952148, + "rewards/rejected": -20.475048065185547, + "step": 47950 + }, + { + "epoch": 2.86, + "learning_rate": 3.315808660305203e-08, + "logits/chosen": -2.508643627166748, + "logits/rejected": -1.7809970378875732, + "logps/chosen": -672.435791015625, + "logps/rejected": -2047.2025146484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.045732021331787, + "rewards/margins": 13.987871170043945, + "rewards/rejected": -20.03360366821289, + "step": 47960 + }, + { + "epoch": 2.86, + "learning_rate": 3.2877109380143604e-08, + "logits/chosen": -2.483832836151123, + "logits/rejected": -1.8156442642211914, + "logps/chosen": -672.6600952148438, + "logps/rejected": -2112.48486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.024083137512207, + "rewards/margins": 14.656590461730957, + "rewards/rejected": -20.680673599243164, + "step": 47970 + }, + { + "epoch": 2.86, + "learning_rate": 3.2597319830162675e-08, + "logits/chosen": -2.5489094257354736, + "logits/rejected": -1.8829145431518555, + "logps/chosen": -677.2984619140625, + "logps/rejected": -2120.223388671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.039390563964844, + "rewards/margins": 14.732190132141113, + "rewards/rejected": -20.771581649780273, + "step": 47980 + }, + { + "epoch": 2.86, + "learning_rate": 3.231871808780096e-08, + "logits/chosen": -2.4919073581695557, + "logits/rejected": -1.7926206588745117, + "logps/chosen": -698.2374877929688, + "logps/rejected": -2078.39892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.219583034515381, + "rewards/margins": 14.130247116088867, + "rewards/rejected": -20.349828720092773, + "step": 47990 + }, + { + "epoch": 2.86, + "learning_rate": 3.204130428717672e-08, + "logits/chosen": -2.532219648361206, + "logits/rejected": -1.8869495391845703, + "logps/chosen": -694.7149047851562, + "logps/rejected": -2153.762451171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.250391960144043, + "rewards/margins": 14.838266372680664, + "rewards/rejected": -21.088659286499023, + "step": 48000 + }, + { + "epoch": 2.86, + "learning_rate": 3.17650785618373e-08, + "logits/chosen": -2.5247292518615723, + "logits/rejected": -1.8444039821624756, + "logps/chosen": -678.7606201171875, + "logps/rejected": -2084.937255859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.121559143066406, + "rewards/margins": 14.2954740524292, + "rewards/rejected": -20.417034149169922, + "step": 48010 + }, + { + "epoch": 2.86, + "learning_rate": 3.149004104475745e-08, + "logits/chosen": -2.5388245582580566, + "logits/rejected": -1.8359864950180054, + "logps/chosen": -674.0735473632812, + "logps/rejected": -2082.080078125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.085350036621094, + "rewards/margins": 14.289515495300293, + "rewards/rejected": -20.374866485595703, + "step": 48020 + }, + { + "epoch": 2.86, + "learning_rate": 3.121619186834041e-08, + "logits/chosen": -2.5577056407928467, + "logits/rejected": -1.8787380456924438, + "logps/chosen": -684.3341064453125, + "logps/rejected": -2126.623046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.113992214202881, + "rewards/margins": 14.713091850280762, + "rewards/rejected": -20.82708168029785, + "step": 48030 + }, + { + "epoch": 2.86, + "learning_rate": 3.094353116441684e-08, + "logits/chosen": -2.558563709259033, + "logits/rejected": -1.9709421396255493, + "logps/chosen": -685.641845703125, + "logps/rejected": -2157.740234375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.132631301879883, + "rewards/margins": 15.00428581237793, + "rewards/rejected": -21.136913299560547, + "step": 48040 + }, + { + "epoch": 2.87, + "learning_rate": 3.06720590642462e-08, + "logits/chosen": -2.546692371368408, + "logits/rejected": -1.9234333038330078, + "logps/chosen": -666.1824951171875, + "logps/rejected": -2069.71630859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.930205821990967, + "rewards/margins": 14.324373245239258, + "rewards/rejected": -20.25457763671875, + "step": 48050 + }, + { + "epoch": 2.87, + "learning_rate": 3.040177569851477e-08, + "logits/chosen": -2.5363621711730957, + "logits/rejected": -1.9172999858856201, + "logps/chosen": -686.1873168945312, + "logps/rejected": -2127.279541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.201183319091797, + "rewards/margins": 14.632098197937012, + "rewards/rejected": -20.833280563354492, + "step": 48060 + }, + { + "epoch": 2.87, + "learning_rate": 3.013268119733709e-08, + "logits/chosen": -2.5211150646209717, + "logits/rejected": -1.917533278465271, + "logps/chosen": -686.6590576171875, + "logps/rejected": -2068.53955078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.105084419250488, + "rewards/margins": 14.135374069213867, + "rewards/rejected": -20.240459442138672, + "step": 48070 + }, + { + "epoch": 2.87, + "learning_rate": 2.986477569025509e-08, + "logits/chosen": -2.568075656890869, + "logits/rejected": -1.9155346155166626, + "logps/chosen": -679.5704956054688, + "logps/rejected": -2176.61376953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.101100921630859, + "rewards/margins": 15.219587326049805, + "rewards/rejected": -21.320688247680664, + "step": 48080 + }, + { + "epoch": 2.87, + "learning_rate": 2.9598059306238658e-08, + "logits/chosen": -2.558987617492676, + "logits/rejected": -2.007091522216797, + "logps/chosen": -684.961669921875, + "logps/rejected": -2024.7945556640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.150697231292725, + "rewards/margins": 13.650751113891602, + "rewards/rejected": -19.801448822021484, + "step": 48090 + }, + { + "epoch": 2.87, + "learning_rate": 2.9332532173684812e-08, + "logits/chosen": -2.5566246509552, + "logits/rejected": -1.8722708225250244, + "logps/chosen": -664.0341186523438, + "logps/rejected": -2163.433349609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.965147495269775, + "rewards/margins": 15.221156120300293, + "rewards/rejected": -21.18630599975586, + "step": 48100 + }, + { + "epoch": 2.87, + "learning_rate": 2.9068194420418528e-08, + "logits/chosen": -2.492772340774536, + "logits/rejected": -1.873552680015564, + "logps/chosen": -698.576171875, + "logps/rejected": -2081.634033203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.243509292602539, + "rewards/margins": 14.126611709594727, + "rewards/rejected": -20.370121002197266, + "step": 48110 + }, + { + "epoch": 2.87, + "learning_rate": 2.8805046173692176e-08, + "logits/chosen": -2.5247108936309814, + "logits/rejected": -1.9478610754013062, + "logps/chosen": -666.8732299804688, + "logps/rejected": -2155.047607421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.934633731842041, + "rewards/margins": 15.1721773147583, + "rewards/rejected": -21.1068115234375, + "step": 48120 + }, + { + "epoch": 2.87, + "learning_rate": 2.85430875601847e-08, + "logits/chosen": -2.5428946018218994, + "logits/rejected": -1.9354099035263062, + "logps/chosen": -652.7473754882812, + "logps/rejected": -2136.23779296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.799168109893799, + "rewards/margins": 15.136640548706055, + "rewards/rejected": -20.935810089111328, + "step": 48130 + }, + { + "epoch": 2.87, + "learning_rate": 2.8282318706003563e-08, + "logits/chosen": -2.5692780017852783, + "logits/rejected": -1.8637539148330688, + "logps/chosen": -681.7174682617188, + "logps/rejected": -2215.114990234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.085501670837402, + "rewards/margins": 15.616198539733887, + "rewards/rejected": -21.70170021057129, + "step": 48140 + }, + { + "epoch": 2.87, + "learning_rate": 2.802273973668279e-08, + "logits/chosen": -2.529186964035034, + "logits/rejected": -1.9341747760772705, + "logps/chosen": -706.9898071289062, + "logps/rejected": -2137.85595703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.405731201171875, + "rewards/margins": 14.522122383117676, + "rewards/rejected": -20.927852630615234, + "step": 48150 + }, + { + "epoch": 2.87, + "learning_rate": 2.7764350777183535e-08, + "logits/chosen": -2.5776259899139404, + "logits/rejected": -1.852050542831421, + "logps/chosen": -672.5924072265625, + "logps/rejected": -2158.9951171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.988029956817627, + "rewards/margins": 15.157325744628906, + "rewards/rejected": -21.145355224609375, + "step": 48160 + }, + { + "epoch": 2.87, + "learning_rate": 2.7507151951894072e-08, + "logits/chosen": -2.5290331840515137, + "logits/rejected": -1.8456189632415771, + "logps/chosen": -703.8927001953125, + "logps/rejected": -2132.943359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.362330436706543, + "rewards/margins": 14.525136947631836, + "rewards/rejected": -20.887466430664062, + "step": 48170 + }, + { + "epoch": 2.87, + "learning_rate": 2.725114338463064e-08, + "logits/chosen": -2.559382915496826, + "logits/rejected": -1.967773675918579, + "logps/chosen": -675.7244873046875, + "logps/rejected": -2131.157470703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.051231384277344, + "rewards/margins": 14.812968254089355, + "rewards/rejected": -20.86419677734375, + "step": 48180 + }, + { + "epoch": 2.87, + "learning_rate": 2.6996325198634656e-08, + "logits/chosen": -2.5366411209106445, + "logits/rejected": -1.9130455255508423, + "logps/chosen": -665.3953857421875, + "logps/rejected": -2056.852783203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.024036884307861, + "rewards/margins": 14.11381721496582, + "rewards/rejected": -20.13785743713379, + "step": 48190 + }, + { + "epoch": 2.87, + "learning_rate": 2.6742697516576598e-08, + "logits/chosen": -2.5816354751586914, + "logits/rejected": -1.9181264638900757, + "logps/chosen": -672.965576171875, + "logps/rejected": -2149.8017578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.015598297119141, + "rewards/margins": 15.036584854125977, + "rewards/rejected": -21.05218505859375, + "step": 48200 + }, + { + "epoch": 2.87, + "learning_rate": 2.6490260460552143e-08, + "logits/chosen": -2.583555221557617, + "logits/rejected": -1.907400369644165, + "logps/chosen": -666.8712768554688, + "logps/rejected": -2136.479248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.957029819488525, + "rewards/margins": 14.96014404296875, + "rewards/rejected": -20.917171478271484, + "step": 48210 + }, + { + "epoch": 2.88, + "learning_rate": 2.6239014152084907e-08, + "logits/chosen": -2.494107961654663, + "logits/rejected": -1.8871190547943115, + "logps/chosen": -679.34033203125, + "logps/rejected": -2102.567138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.097395896911621, + "rewards/margins": 14.487383842468262, + "rewards/rejected": -20.584781646728516, + "step": 48220 + }, + { + "epoch": 2.88, + "learning_rate": 2.5988958712124536e-08, + "logits/chosen": -2.528308391571045, + "logits/rejected": -1.8732054233551025, + "logps/chosen": -682.8327026367188, + "logps/rejected": -2071.43701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.146365165710449, + "rewards/margins": 14.13207721710205, + "rewards/rejected": -20.2784423828125, + "step": 48230 + }, + { + "epoch": 2.88, + "learning_rate": 2.5740094261048342e-08, + "logits/chosen": -2.5257115364074707, + "logits/rejected": -1.8616867065429688, + "logps/chosen": -681.8861083984375, + "logps/rejected": -2170.365234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.13875675201416, + "rewards/margins": 15.104164123535156, + "rewards/rejected": -21.242919921875, + "step": 48240 + }, + { + "epoch": 2.88, + "learning_rate": 2.5492420918658833e-08, + "logits/chosen": -2.505824327468872, + "logits/rejected": -1.8045841455459595, + "logps/chosen": -675.2021484375, + "logps/rejected": -2066.759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0664262771606445, + "rewards/margins": 14.170278549194336, + "rewards/rejected": -20.236703872680664, + "step": 48250 + }, + { + "epoch": 2.88, + "learning_rate": 2.5245938804186464e-08, + "logits/chosen": -2.577463150024414, + "logits/rejected": -1.8691482543945312, + "logps/chosen": -688.9198608398438, + "logps/rejected": -2238.098388671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.231585502624512, + "rewards/margins": 15.706693649291992, + "rewards/rejected": -21.93828010559082, + "step": 48260 + }, + { + "epoch": 2.88, + "learning_rate": 2.5000648036287712e-08, + "logits/chosen": -2.5649771690368652, + "logits/rejected": -1.8977749347686768, + "logps/chosen": -709.6845703125, + "logps/rejected": -2089.54052734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.348001956939697, + "rewards/margins": 14.113397598266602, + "rewards/rejected": -20.46139907836914, + "step": 48270 + }, + { + "epoch": 2.88, + "learning_rate": 2.475654873304506e-08, + "logits/chosen": -2.494678497314453, + "logits/rejected": -1.9047847986221313, + "logps/chosen": -696.6685180664062, + "logps/rejected": -2120.934326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.258927822113037, + "rewards/margins": 14.51147747039795, + "rewards/rejected": -20.770404815673828, + "step": 48280 + }, + { + "epoch": 2.88, + "learning_rate": 2.451364101196868e-08, + "logits/chosen": -2.5400214195251465, + "logits/rejected": -1.8645656108856201, + "logps/chosen": -701.3526000976562, + "logps/rejected": -2109.02978515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.307577610015869, + "rewards/margins": 14.341954231262207, + "rewards/rejected": -20.649532318115234, + "step": 48290 + }, + { + "epoch": 2.88, + "learning_rate": 2.4271924989993646e-08, + "logits/chosen": -2.574380397796631, + "logits/rejected": -1.7771488428115845, + "logps/chosen": -663.8343505859375, + "logps/rejected": -2171.732177734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.910373687744141, + "rewards/margins": 15.356915473937988, + "rewards/rejected": -21.267290115356445, + "step": 48300 + }, + { + "epoch": 2.88, + "learning_rate": 2.4031400783482706e-08, + "logits/chosen": -2.602247714996338, + "logits/rejected": -2.03269624710083, + "logps/chosen": -661.1981201171875, + "logps/rejected": -2135.71240234375, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.919764041900635, + "rewards/margins": 14.998234748840332, + "rewards/rejected": -20.917997360229492, + "step": 48310 + }, + { + "epoch": 2.88, + "learning_rate": 2.379206850822352e-08, + "logits/chosen": -2.5491063594818115, + "logits/rejected": -1.83774733543396, + "logps/chosen": -699.4498291015625, + "logps/rejected": -2039.5247802734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.309936046600342, + "rewards/margins": 13.641949653625488, + "rewards/rejected": -19.95188331604004, + "step": 48320 + }, + { + "epoch": 2.88, + "learning_rate": 2.3553928279431147e-08, + "logits/chosen": -2.5580928325653076, + "logits/rejected": -1.9755287170410156, + "logps/chosen": -671.3640747070312, + "logps/rejected": -2176.05615234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.044943809509277, + "rewards/margins": 15.275979995727539, + "rewards/rejected": -21.3209228515625, + "step": 48330 + }, + { + "epoch": 2.88, + "learning_rate": 2.331698021174611e-08, + "logits/chosen": -2.552849054336548, + "logits/rejected": -1.7346900701522827, + "logps/chosen": -674.7931518554688, + "logps/rejected": -2146.14306640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.019736289978027, + "rewards/margins": 14.990621566772461, + "rewards/rejected": -21.010356903076172, + "step": 48340 + }, + { + "epoch": 2.88, + "learning_rate": 2.3081224419235214e-08, + "logits/chosen": -2.522785186767578, + "logits/rejected": -1.7783358097076416, + "logps/chosen": -665.6649169921875, + "logps/rejected": -2164.91748046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.991165637969971, + "rewards/margins": 15.208572387695312, + "rewards/rejected": -21.199737548828125, + "step": 48350 + }, + { + "epoch": 2.88, + "learning_rate": 2.284666101539129e-08, + "logits/chosen": -2.574859857559204, + "logits/rejected": -1.82585871219635, + "logps/chosen": -680.7073974609375, + "logps/rejected": -2123.90576171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.057093143463135, + "rewards/margins": 14.746889114379883, + "rewards/rejected": -20.80398178100586, + "step": 48360 + }, + { + "epoch": 2.88, + "learning_rate": 2.261329011313318e-08, + "logits/chosen": -2.5424869060516357, + "logits/rejected": -1.840075135231018, + "logps/chosen": -683.1573486328125, + "logps/rejected": -2146.518310546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.134173393249512, + "rewards/margins": 14.869140625, + "rewards/rejected": -21.003314971923828, + "step": 48370 + }, + { + "epoch": 2.88, + "learning_rate": 2.238111182480546e-08, + "logits/chosen": -2.552433490753174, + "logits/rejected": -1.7988309860229492, + "logps/chosen": -653.740478515625, + "logps/rejected": -2156.285400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8588151931762695, + "rewards/margins": 15.25567626953125, + "rewards/rejected": -21.114492416381836, + "step": 48380 + }, + { + "epoch": 2.89, + "learning_rate": 2.2150126262179273e-08, + "logits/chosen": -2.558483362197876, + "logits/rejected": -1.9057018756866455, + "logps/chosen": -666.5973510742188, + "logps/rejected": -2209.914794921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.903714179992676, + "rewards/margins": 15.746350288391113, + "rewards/rejected": -21.65006446838379, + "step": 48390 + }, + { + "epoch": 2.89, + "learning_rate": 2.19203335364504e-08, + "logits/chosen": -2.5402607917785645, + "logits/rejected": -1.9255377054214478, + "logps/chosen": -672.2955322265625, + "logps/rejected": -2053.94482421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.044112205505371, + "rewards/margins": 14.07019329071045, + "rewards/rejected": -20.114307403564453, + "step": 48400 + }, + { + "epoch": 2.89, + "learning_rate": 2.1691733758241462e-08, + "logits/chosen": -2.5230350494384766, + "logits/rejected": -1.846625566482544, + "logps/chosen": -678.90869140625, + "logps/rejected": -2195.80029296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.109433174133301, + "rewards/margins": 15.41120719909668, + "rewards/rejected": -21.520641326904297, + "step": 48410 + }, + { + "epoch": 2.89, + "learning_rate": 2.1464327037600264e-08, + "logits/chosen": -2.5155532360076904, + "logits/rejected": -1.8880211114883423, + "logps/chosen": -683.0197143554688, + "logps/rejected": -2206.65966796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.106818675994873, + "rewards/margins": 15.517202377319336, + "rewards/rejected": -21.624021530151367, + "step": 48420 + }, + { + "epoch": 2.89, + "learning_rate": 2.1238113484000344e-08, + "logits/chosen": -2.5177817344665527, + "logits/rejected": -1.7608206272125244, + "logps/chosen": -702.9937133789062, + "logps/rejected": -2137.910888671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.338097095489502, + "rewards/margins": 14.591066360473633, + "rewards/rejected": -20.929161071777344, + "step": 48430 + }, + { + "epoch": 2.89, + "learning_rate": 2.1013093206341263e-08, + "logits/chosen": -2.519204616546631, + "logits/rejected": -1.8256458044052124, + "logps/chosen": -676.8306884765625, + "logps/rejected": -2147.65478515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.012251377105713, + "rewards/margins": 15.022649765014648, + "rewards/rejected": -21.034900665283203, + "step": 48440 + }, + { + "epoch": 2.89, + "learning_rate": 2.0789266312947477e-08, + "logits/chosen": -2.5189526081085205, + "logits/rejected": -1.8379127979278564, + "logps/chosen": -675.8633422851562, + "logps/rejected": -2124.234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0555267333984375, + "rewards/margins": 14.744211196899414, + "rewards/rejected": -20.79973793029785, + "step": 48450 + }, + { + "epoch": 2.89, + "learning_rate": 2.0566632911568907e-08, + "logits/chosen": -2.5339057445526123, + "logits/rejected": -1.9101272821426392, + "logps/chosen": -675.91357421875, + "logps/rejected": -2136.774169921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.099097728729248, + "rewards/margins": 14.830904960632324, + "rewards/rejected": -20.930004119873047, + "step": 48460 + }, + { + "epoch": 2.89, + "learning_rate": 2.0345193109382043e-08, + "logits/chosen": -2.5700201988220215, + "logits/rejected": -1.8629182577133179, + "logps/chosen": -683.1878662109375, + "logps/rejected": -2132.492919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.099869728088379, + "rewards/margins": 14.777615547180176, + "rewards/rejected": -20.877485275268555, + "step": 48470 + }, + { + "epoch": 2.89, + "learning_rate": 2.0124947012987172e-08, + "logits/chosen": -2.6124796867370605, + "logits/rejected": -1.9333750009536743, + "logps/chosen": -687.8350219726562, + "logps/rejected": -2170.37841796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.131969451904297, + "rewards/margins": 15.132286071777344, + "rewards/rejected": -21.26425552368164, + "step": 48480 + }, + { + "epoch": 2.89, + "learning_rate": 1.99058947284117e-08, + "logits/chosen": -2.5566813945770264, + "logits/rejected": -1.9328352212905884, + "logps/chosen": -667.7733764648438, + "logps/rejected": -2070.81298828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.013966083526611, + "rewards/margins": 14.248822212219238, + "rewards/rejected": -20.26279067993164, + "step": 48490 + }, + { + "epoch": 2.89, + "learning_rate": 1.9688036361106278e-08, + "logits/chosen": -2.554651975631714, + "logits/rejected": -1.8732411861419678, + "logps/chosen": -650.5001220703125, + "logps/rejected": -2067.894287109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.822342872619629, + "rewards/margins": 14.427488327026367, + "rewards/rejected": -20.249832153320312, + "step": 48500 + }, + { + "epoch": 2.89, + "learning_rate": 1.947137201594923e-08, + "logits/chosen": -2.502858877182007, + "logits/rejected": -1.8387563228607178, + "logps/chosen": -716.0361328125, + "logps/rejected": -2084.72900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.43338680267334, + "rewards/margins": 13.973152160644531, + "rewards/rejected": -20.406538009643555, + "step": 48510 + }, + { + "epoch": 2.89, + "learning_rate": 1.9255901797241295e-08, + "logits/chosen": -2.539612054824829, + "logits/rejected": -1.7984920740127563, + "logps/chosen": -679.2880859375, + "logps/rejected": -2127.771484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.034034729003906, + "rewards/margins": 14.804336547851562, + "rewards/rejected": -20.8383731842041, + "step": 48520 + }, + { + "epoch": 2.89, + "learning_rate": 1.904162580871116e-08, + "logits/chosen": -2.5044991970062256, + "logits/rejected": -1.9137852191925049, + "logps/chosen": -683.4713134765625, + "logps/rejected": -2102.85205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.148890018463135, + "rewards/margins": 14.440159797668457, + "rewards/rejected": -20.58905029296875, + "step": 48530 + }, + { + "epoch": 2.89, + "learning_rate": 1.8828544153510765e-08, + "logits/chosen": -2.5656578540802, + "logits/rejected": -1.8460876941680908, + "logps/chosen": -670.1319580078125, + "logps/rejected": -2045.78515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9569091796875, + "rewards/margins": 14.056169509887695, + "rewards/rejected": -20.013078689575195, + "step": 48540 + }, + { + "epoch": 2.9, + "learning_rate": 1.8616656934217215e-08, + "logits/chosen": -2.516164779663086, + "logits/rejected": -1.8509070873260498, + "logps/chosen": -688.08447265625, + "logps/rejected": -2083.99267578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.139894008636475, + "rewards/margins": 14.258671760559082, + "rewards/rejected": -20.3985652923584, + "step": 48550 + }, + { + "epoch": 2.9, + "learning_rate": 1.8405964252833642e-08, + "logits/chosen": -2.566682815551758, + "logits/rejected": -1.9034065008163452, + "logps/chosen": -675.9892578125, + "logps/rejected": -2188.638671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.020956993103027, + "rewards/margins": 15.414299011230469, + "rewards/rejected": -21.435256958007812, + "step": 48560 + }, + { + "epoch": 2.9, + "learning_rate": 1.8196466210787245e-08, + "logits/chosen": -2.5223007202148438, + "logits/rejected": -1.8366798162460327, + "logps/chosen": -691.3553466796875, + "logps/rejected": -2189.38818359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.174257755279541, + "rewards/margins": 15.266016960144043, + "rewards/rejected": -21.44027328491211, + "step": 48570 + }, + { + "epoch": 2.9, + "learning_rate": 1.798816290893013e-08, + "logits/chosen": -2.535620927810669, + "logits/rejected": -1.8651174306869507, + "logps/chosen": -707.1216430664062, + "logps/rejected": -2114.194091796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.343506813049316, + "rewards/margins": 14.355438232421875, + "rewards/rejected": -20.698945999145508, + "step": 48580 + }, + { + "epoch": 2.9, + "learning_rate": 1.778105444753986e-08, + "logits/chosen": -2.538989782333374, + "logits/rejected": -1.9091320037841797, + "logps/chosen": -675.8742065429688, + "logps/rejected": -2102.255859375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0666093826293945, + "rewards/margins": 14.515759468078613, + "rewards/rejected": -20.582366943359375, + "step": 48590 + }, + { + "epoch": 2.9, + "learning_rate": 1.7575140926318346e-08, + "logits/chosen": -2.5574989318847656, + "logits/rejected": -1.9730069637298584, + "logps/chosen": -682.9398193359375, + "logps/rejected": -2203.36572265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.034187316894531, + "rewards/margins": 15.549585342407227, + "rewards/rejected": -21.583772659301758, + "step": 48600 + }, + { + "epoch": 2.9, + "learning_rate": 1.737042244439241e-08, + "logits/chosen": -2.5169150829315186, + "logits/rejected": -1.78499436378479, + "logps/chosen": -687.6849365234375, + "logps/rejected": -2243.892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.122307777404785, + "rewards/margins": 15.866750717163086, + "rewards/rejected": -21.989057540893555, + "step": 48610 + }, + { + "epoch": 2.9, + "learning_rate": 1.7166899100313217e-08, + "logits/chosen": -2.5508875846862793, + "logits/rejected": -1.8987147808074951, + "logps/chosen": -675.4234619140625, + "logps/rejected": -2166.285888671875, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.028780460357666, + "rewards/margins": 15.1937255859375, + "rewards/rejected": -21.222503662109375, + "step": 48620 + }, + { + "epoch": 2.9, + "learning_rate": 1.6964570992057394e-08, + "logits/chosen": -2.506406307220459, + "logits/rejected": -1.8649343252182007, + "logps/chosen": -661.332275390625, + "logps/rejected": -2112.5390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.97758150100708, + "rewards/margins": 14.705682754516602, + "rewards/rejected": -20.68326187133789, + "step": 48630 + }, + { + "epoch": 2.9, + "learning_rate": 1.6763438217025362e-08, + "logits/chosen": -2.542694568634033, + "logits/rejected": -1.8451522588729858, + "logps/chosen": -681.0606079101562, + "logps/rejected": -2116.603515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1574273109436035, + "rewards/margins": 14.576957702636719, + "rewards/rejected": -20.734384536743164, + "step": 48640 + }, + { + "epoch": 2.9, + "learning_rate": 1.6563500872042447e-08, + "logits/chosen": -2.5754928588867188, + "logits/rejected": -1.8675918579101562, + "logps/chosen": -698.6868286132812, + "logps/rejected": -2100.425048828125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.227984428405762, + "rewards/margins": 14.3243408203125, + "rewards/rejected": -20.552322387695312, + "step": 48650 + }, + { + "epoch": 2.9, + "learning_rate": 1.6364759053358603e-08, + "logits/chosen": -2.579556703567505, + "logits/rejected": -1.9358940124511719, + "logps/chosen": -665.9500732421875, + "logps/rejected": -2054.67431640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.984236717224121, + "rewards/margins": 14.130717277526855, + "rewards/rejected": -20.11495590209961, + "step": 48660 + }, + { + "epoch": 2.9, + "learning_rate": 1.6167212856648128e-08, + "logits/chosen": -2.5563290119171143, + "logits/rejected": -1.8523623943328857, + "logps/chosen": -673.8303833007812, + "logps/rejected": -2168.585205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.004269599914551, + "rewards/margins": 15.243806838989258, + "rewards/rejected": -21.24807357788086, + "step": 48670 + }, + { + "epoch": 2.9, + "learning_rate": 1.59708623770094e-08, + "logits/chosen": -2.599889039993286, + "logits/rejected": -1.9306586980819702, + "logps/chosen": -660.0601806640625, + "logps/rejected": -2115.59521484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.915115833282471, + "rewards/margins": 14.806657791137695, + "rewards/rejected": -20.721771240234375, + "step": 48680 + }, + { + "epoch": 2.9, + "learning_rate": 1.5775707708966247e-08, + "logits/chosen": -2.527975559234619, + "logits/rejected": -1.7743488550186157, + "logps/chosen": -669.3555908203125, + "logps/rejected": -2047.3922119140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.975733757019043, + "rewards/margins": 14.049616813659668, + "rewards/rejected": -20.02535057067871, + "step": 48690 + }, + { + "epoch": 2.9, + "learning_rate": 1.558174894646547e-08, + "logits/chosen": -2.5388214588165283, + "logits/rejected": -1.9961516857147217, + "logps/chosen": -683.0889892578125, + "logps/rejected": -2133.08056640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.107321739196777, + "rewards/margins": 14.774149894714355, + "rewards/rejected": -20.881471633911133, + "step": 48700 + }, + { + "epoch": 2.9, + "learning_rate": 1.538898618287932e-08, + "logits/chosen": -2.4946603775024414, + "logits/rejected": -1.7936128377914429, + "logps/chosen": -659.8072509765625, + "logps/rejected": -2184.678466796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.914683818817139, + "rewards/margins": 15.487020492553711, + "rewards/rejected": -21.401704788208008, + "step": 48710 + }, + { + "epoch": 2.91, + "learning_rate": 1.5197419511003564e-08, + "logits/chosen": -2.6003847122192383, + "logits/rejected": -1.9525346755981445, + "logps/chosen": -670.9959716796875, + "logps/rejected": -2148.529296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.012170791625977, + "rewards/margins": 15.029474258422852, + "rewards/rejected": -21.041645050048828, + "step": 48720 + }, + { + "epoch": 2.91, + "learning_rate": 1.5007049023058052e-08, + "logits/chosen": -2.5371158123016357, + "logits/rejected": -1.763897180557251, + "logps/chosen": -675.0404663085938, + "logps/rejected": -2161.195068359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.056451797485352, + "rewards/margins": 15.104817390441895, + "rewards/rejected": -21.16126823425293, + "step": 48730 + }, + { + "epoch": 2.91, + "learning_rate": 1.4817874810687526e-08, + "logits/chosen": -2.524130344390869, + "logits/rejected": -1.8019332885742188, + "logps/chosen": -693.6753540039062, + "logps/rejected": -2132.919921875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.177970886230469, + "rewards/margins": 14.70988655090332, + "rewards/rejected": -20.887855529785156, + "step": 48740 + }, + { + "epoch": 2.91, + "learning_rate": 1.4629896964960533e-08, + "logits/chosen": -2.543842077255249, + "logits/rejected": -1.8095471858978271, + "logps/chosen": -679.0814819335938, + "logps/rejected": -2126.502197265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.072670936584473, + "rewards/margins": 14.757519721984863, + "rewards/rejected": -20.83019256591797, + "step": 48750 + }, + { + "epoch": 2.91, + "learning_rate": 1.4443115576369405e-08, + "logits/chosen": -2.5638887882232666, + "logits/rejected": -1.9243509769439697, + "logps/chosen": -664.1622924804688, + "logps/rejected": -2212.7333984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.958317756652832, + "rewards/margins": 15.734384536743164, + "rewards/rejected": -21.692699432373047, + "step": 48760 + }, + { + "epoch": 2.91, + "learning_rate": 1.425753073483055e-08, + "logits/chosen": -2.480419158935547, + "logits/rejected": -1.7666288614273071, + "logps/chosen": -686.7485961914062, + "logps/rejected": -2065.2626953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.212603569030762, + "rewards/margins": 13.988736152648926, + "rewards/rejected": -20.201339721679688, + "step": 48770 + }, + { + "epoch": 2.91, + "learning_rate": 1.4073142529685003e-08, + "logits/chosen": -2.521117687225342, + "logits/rejected": -1.85613214969635, + "logps/chosen": -668.9860229492188, + "logps/rejected": -2171.75927734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.997374534606934, + "rewards/margins": 15.270957946777344, + "rewards/rejected": -21.26833152770996, + "step": 48780 + }, + { + "epoch": 2.91, + "learning_rate": 1.388995104969676e-08, + "logits/chosen": -2.5140128135681152, + "logits/rejected": -1.8621021509170532, + "logps/chosen": -660.1328125, + "logps/rejected": -2104.06884765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9389166831970215, + "rewards/margins": 14.642435073852539, + "rewards/rejected": -20.581350326538086, + "step": 48790 + }, + { + "epoch": 2.91, + "learning_rate": 1.3707956383054445e-08, + "logits/chosen": -2.5517418384552, + "logits/rejected": -1.7998158931732178, + "logps/chosen": -659.0509033203125, + "logps/rejected": -2137.900390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.837357521057129, + "rewards/margins": 15.093998908996582, + "rewards/rejected": -20.931354522705078, + "step": 48800 + }, + { + "epoch": 2.91, + "learning_rate": 1.3527158617370196e-08, + "logits/chosen": -2.5252902507781982, + "logits/rejected": -1.7599433660507202, + "logps/chosen": -676.3214721679688, + "logps/rejected": -2136.4716796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.019022464752197, + "rewards/margins": 14.885801315307617, + "rewards/rejected": -20.904823303222656, + "step": 48810 + }, + { + "epoch": 2.91, + "learning_rate": 1.3347557839680226e-08, + "logits/chosen": -2.552111864089966, + "logits/rejected": -1.9791275262832642, + "logps/chosen": -690.5372314453125, + "logps/rejected": -2101.62548828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.193233013153076, + "rewards/margins": 14.380520820617676, + "rewards/rejected": -20.57375144958496, + "step": 48820 + }, + { + "epoch": 2.91, + "learning_rate": 1.3169154136443984e-08, + "logits/chosen": -2.5502305030822754, + "logits/rejected": -1.9194914102554321, + "logps/chosen": -673.890380859375, + "logps/rejected": -2176.20556640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.056182861328125, + "rewards/margins": 15.265724182128906, + "rewards/rejected": -21.3219051361084, + "step": 48830 + }, + { + "epoch": 2.91, + "learning_rate": 1.2991947593545273e-08, + "logits/chosen": -2.5634987354278564, + "logits/rejected": -1.8857848644256592, + "logps/chosen": -684.3558959960938, + "logps/rejected": -2182.829833984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.05673885345459, + "rewards/margins": 15.313934326171875, + "rewards/rejected": -21.37067222595215, + "step": 48840 + }, + { + "epoch": 2.91, + "learning_rate": 1.2815938296291407e-08, + "logits/chosen": -2.5501842498779297, + "logits/rejected": -1.8610496520996094, + "logps/chosen": -684.4824829101562, + "logps/rejected": -2093.41552734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.145699977874756, + "rewards/margins": 14.35393238067627, + "rewards/rejected": -20.499629974365234, + "step": 48850 + }, + { + "epoch": 2.91, + "learning_rate": 1.2641126329412946e-08, + "logits/chosen": -2.551257610321045, + "logits/rejected": -1.9131431579589844, + "logps/chosen": -683.664306640625, + "logps/rejected": -2135.542236328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.110589027404785, + "rewards/margins": 14.799705505371094, + "rewards/rejected": -20.910295486450195, + "step": 48860 + }, + { + "epoch": 2.91, + "learning_rate": 1.2467511777064789e-08, + "logits/chosen": -2.5224099159240723, + "logits/rejected": -1.8558149337768555, + "logps/chosen": -706.2445068359375, + "logps/rejected": -1962.3560791015625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.387429714202881, + "rewards/margins": 12.826004028320312, + "rewards/rejected": -19.213436126708984, + "step": 48870 + }, + { + "epoch": 2.91, + "learning_rate": 1.229509472282453e-08, + "logits/chosen": -2.5334010124206543, + "logits/rejected": -1.836787462234497, + "logps/chosen": -665.4156494140625, + "logps/rejected": -2108.0205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9287261962890625, + "rewards/margins": 14.72576904296875, + "rewards/rejected": -20.654495239257812, + "step": 48880 + }, + { + "epoch": 2.92, + "learning_rate": 1.2123875249694106e-08, + "logits/chosen": -2.47283673286438, + "logits/rejected": -1.7431246042251587, + "logps/chosen": -700.4696655273438, + "logps/rejected": -2153.192626953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.291677951812744, + "rewards/margins": 14.794816970825195, + "rewards/rejected": -21.086496353149414, + "step": 48890 + }, + { + "epoch": 2.92, + "learning_rate": 1.1953853440098418e-08, + "logits/chosen": -2.5183515548706055, + "logits/rejected": -1.895329236984253, + "logps/chosen": -670.8108520507812, + "logps/rejected": -2099.613037109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.981019020080566, + "rewards/margins": 14.566896438598633, + "rewards/rejected": -20.547916412353516, + "step": 48900 + }, + { + "epoch": 2.92, + "learning_rate": 1.178502937588588e-08, + "logits/chosen": -2.55566668510437, + "logits/rejected": -1.8807426691055298, + "logps/chosen": -681.2425537109375, + "logps/rejected": -2180.01904296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.074954509735107, + "rewards/margins": 15.284391403198242, + "rewards/rejected": -21.359346389770508, + "step": 48910 + }, + { + "epoch": 2.92, + "learning_rate": 1.1617403138328431e-08, + "logits/chosen": -2.532188653945923, + "logits/rejected": -1.8961032629013062, + "logps/chosen": -726.2718505859375, + "logps/rejected": -2214.133056640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.53622579574585, + "rewards/margins": 15.151870727539062, + "rewards/rejected": -21.68809700012207, + "step": 48920 + }, + { + "epoch": 2.92, + "learning_rate": 1.145097480812124e-08, + "logits/chosen": -2.5612850189208984, + "logits/rejected": -1.8756030797958374, + "logps/chosen": -671.4337158203125, + "logps/rejected": -2154.809814453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0155158042907715, + "rewards/margins": 15.09416675567627, + "rewards/rejected": -21.109683990478516, + "step": 48930 + }, + { + "epoch": 2.92, + "learning_rate": 1.1285744465383274e-08, + "logits/chosen": -2.5373470783233643, + "logits/rejected": -1.8834476470947266, + "logps/chosen": -679.8656616210938, + "logps/rejected": -2204.810791015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.137664318084717, + "rewards/margins": 15.476186752319336, + "rewards/rejected": -21.613849639892578, + "step": 48940 + }, + { + "epoch": 2.92, + "learning_rate": 1.1121712189656187e-08, + "logits/chosen": -2.51404070854187, + "logits/rejected": -1.920281171798706, + "logps/chosen": -685.8297729492188, + "logps/rejected": -2124.42431640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.193963050842285, + "rewards/margins": 14.613909721374512, + "rewards/rejected": -20.807872772216797, + "step": 48950 + }, + { + "epoch": 2.92, + "learning_rate": 1.0958878059905143e-08, + "logits/chosen": -2.5721654891967773, + "logits/rejected": -1.87485671043396, + "logps/chosen": -694.6907348632812, + "logps/rejected": -2118.43701171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.204733371734619, + "rewards/margins": 14.545827865600586, + "rewards/rejected": -20.750560760498047, + "step": 48960 + }, + { + "epoch": 2.92, + "learning_rate": 1.0797242154518273e-08, + "logits/chosen": -2.56357479095459, + "logits/rejected": -1.8754189014434814, + "logps/chosen": -665.7887573242188, + "logps/rejected": -2173.88623046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.963348865509033, + "rewards/margins": 15.331085205078125, + "rewards/rejected": -21.29443359375, + "step": 48970 + }, + { + "epoch": 2.92, + "learning_rate": 1.06368045513075e-08, + "logits/chosen": -2.5471644401550293, + "logits/rejected": -1.944789171218872, + "logps/chosen": -665.5935668945312, + "logps/rejected": -2093.67138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.963828086853027, + "rewards/margins": 14.540364265441895, + "rewards/rejected": -20.504192352294922, + "step": 48980 + }, + { + "epoch": 2.92, + "learning_rate": 1.0477565327507155e-08, + "logits/chosen": -2.556380033493042, + "logits/rejected": -1.8459333181381226, + "logps/chosen": -685.0186157226562, + "logps/rejected": -2123.691650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.138432502746582, + "rewards/margins": 14.651666641235352, + "rewards/rejected": -20.790098190307617, + "step": 48990 + }, + { + "epoch": 2.92, + "learning_rate": 1.0319524559775085e-08, + "logits/chosen": -2.5312838554382324, + "logits/rejected": -1.863898515701294, + "logps/chosen": -697.2221069335938, + "logps/rejected": -2104.92724609375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.274937629699707, + "rewards/margins": 14.334075927734375, + "rewards/rejected": -20.609012603759766, + "step": 49000 + }, + { + "epoch": 2.92, + "eval_logits/chosen": -2.489546298980713, + "eval_logits/rejected": -2.059112787246704, + "eval_logps/chosen": -732.947509765625, + "eval_logps/rejected": -1999.9326171875, + "eval_loss": 2.8473234124248847e-05, + "eval_rewards/accuracies": 1.0, + "eval_rewards/chosen": -6.658730506896973, + "eval_rewards/margins": 12.885114669799805, + "eval_rewards/rejected": -19.543846130371094, + "eval_runtime": 3.904, + "eval_samples_per_second": 1.281, + "eval_steps_per_second": 0.256, + "step": 49000 + }, + { + "epoch": 2.92, + "learning_rate": 1.0162682324192374e-08, + "logits/chosen": -2.5221567153930664, + "logits/rejected": -1.9165809154510498, + "logps/chosen": -678.3230590820312, + "logps/rejected": -2168.92138671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.044962406158447, + "rewards/margins": 15.20093822479248, + "rewards/rejected": -21.245899200439453, + "step": 49010 + }, + { + "epoch": 2.92, + "learning_rate": 1.0007038696262517e-08, + "logits/chosen": -2.5397629737854004, + "logits/rejected": -1.9195655584335327, + "logps/chosen": -680.8883056640625, + "logps/rejected": -2147.04248046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.123128414154053, + "rewards/margins": 14.916658401489258, + "rewards/rejected": -21.039785385131836, + "step": 49020 + }, + { + "epoch": 2.92, + "learning_rate": 9.85259375091252e-09, + "logits/chosen": -2.56266713142395, + "logits/rejected": -1.8985960483551025, + "logps/chosen": -698.9107666015625, + "logps/rejected": -2093.101318359375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.2458295822143555, + "rewards/margins": 14.238186836242676, + "rewards/rejected": -20.4840145111084, + "step": 49030 + }, + { + "epoch": 2.92, + "learning_rate": 9.699347562491801e-09, + "logits/chosen": -2.4930036067962646, + "logits/rejected": -1.8773431777954102, + "logps/chosen": -682.4627075195312, + "logps/rejected": -2102.07275390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.144158840179443, + "rewards/margins": 14.434295654296875, + "rewards/rejected": -20.578453063964844, + "step": 49040 + }, + { + "epoch": 2.92, + "learning_rate": 9.547300204773845e-09, + "logits/chosen": -2.5365922451019287, + "logits/rejected": -1.939753770828247, + "logps/chosen": -666.0460815429688, + "logps/rejected": -2100.78173828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.967895984649658, + "rewards/margins": 14.607416152954102, + "rewards/rejected": -20.57530975341797, + "step": 49050 + }, + { + "epoch": 2.93, + "learning_rate": 9.39645175095344e-09, + "logits/chosen": -2.5180437564849854, + "logits/rejected": -1.8077160120010376, + "logps/chosen": -688.3214111328125, + "logps/rejected": -2187.06640625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.129983425140381, + "rewards/margins": 15.302978515625, + "rewards/rejected": -21.432958602905273, + "step": 49060 + }, + { + "epoch": 2.93, + "learning_rate": 9.246802273649713e-09, + "logits/chosen": -2.5448787212371826, + "logits/rejected": -1.964117407798767, + "logps/chosen": -661.248046875, + "logps/rejected": -2161.6875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.940907001495361, + "rewards/margins": 15.240422248840332, + "rewards/rejected": -21.18132781982422, + "step": 49070 + }, + { + "epoch": 2.93, + "learning_rate": 9.098351844903653e-09, + "logits/chosen": -2.4956841468811035, + "logits/rejected": -1.7715524435043335, + "logps/chosen": -651.1827392578125, + "logps/rejected": -2194.448486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8213629722595215, + "rewards/margins": 15.654253959655762, + "rewards/rejected": -21.475618362426758, + "step": 49080 + }, + { + "epoch": 2.93, + "learning_rate": 8.951100536178925e-09, + "logits/chosen": -2.5677311420440674, + "logits/rejected": -1.8768508434295654, + "logps/chosen": -700.5944213867188, + "logps/rejected": -2072.28466796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.333847999572754, + "rewards/margins": 13.944681167602539, + "rewards/rejected": -20.27853012084961, + "step": 49090 + }, + { + "epoch": 2.93, + "learning_rate": 8.805048418362993e-09, + "logits/chosen": -2.513887405395508, + "logits/rejected": -1.8816516399383545, + "logps/chosen": -656.3055419921875, + "logps/rejected": -2117.01611328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9356160163879395, + "rewards/margins": 14.79194450378418, + "rewards/rejected": -20.727558135986328, + "step": 49100 + }, + { + "epoch": 2.93, + "learning_rate": 8.660195561764617e-09, + "logits/chosen": -2.5079245567321777, + "logits/rejected": -1.8423922061920166, + "logps/chosen": -672.3221435546875, + "logps/rejected": -2187.3974609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.005290985107422, + "rewards/margins": 15.428964614868164, + "rewards/rejected": -21.434253692626953, + "step": 49110 + }, + { + "epoch": 2.93, + "learning_rate": 8.516542036116627e-09, + "logits/chosen": -2.5632400512695312, + "logits/rejected": -1.8192932605743408, + "logps/chosen": -651.1019287109375, + "logps/rejected": -2095.583984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.851267337799072, + "rewards/margins": 14.662727355957031, + "rewards/rejected": -20.513996124267578, + "step": 49120 + }, + { + "epoch": 2.93, + "learning_rate": 8.374087910573426e-09, + "logits/chosen": -2.554694175720215, + "logits/rejected": -1.8394750356674194, + "logps/chosen": -679.4542236328125, + "logps/rejected": -2172.34326171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.100657939910889, + "rewards/margins": 15.166318893432617, + "rewards/rejected": -21.266979217529297, + "step": 49130 + }, + { + "epoch": 2.93, + "learning_rate": 8.232833253712657e-09, + "logits/chosen": -2.569272756576538, + "logits/rejected": -1.8462133407592773, + "logps/chosen": -649.4180908203125, + "logps/rejected": -2161.095703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8409271240234375, + "rewards/margins": 15.329233169555664, + "rewards/rejected": -21.170156478881836, + "step": 49140 + }, + { + "epoch": 2.93, + "learning_rate": 8.092778133533818e-09, + "logits/chosen": -2.505617380142212, + "logits/rejected": -1.7246204614639282, + "logps/chosen": -683.0110473632812, + "logps/rejected": -2127.79736328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.144989490509033, + "rewards/margins": 14.695756912231445, + "rewards/rejected": -20.840744018554688, + "step": 49150 + }, + { + "epoch": 2.93, + "learning_rate": 7.953922617460198e-09, + "logits/chosen": -2.567241668701172, + "logits/rejected": -1.9169107675552368, + "logps/chosen": -677.0145263671875, + "logps/rejected": -2157.26123046875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.09099006652832, + "rewards/margins": 15.038966178894043, + "rewards/rejected": -21.129955291748047, + "step": 49160 + }, + { + "epoch": 2.93, + "learning_rate": 7.816266772336378e-09, + "logits/chosen": -2.553133249282837, + "logits/rejected": -1.8633687496185303, + "logps/chosen": -689.4061279296875, + "logps/rejected": -2112.793212890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.157993793487549, + "rewards/margins": 14.530378341674805, + "rewards/rejected": -20.688373565673828, + "step": 49170 + }, + { + "epoch": 2.93, + "learning_rate": 7.67981066442991e-09, + "logits/chosen": -2.554311752319336, + "logits/rejected": -1.7835776805877686, + "logps/chosen": -705.2890625, + "logps/rejected": -2044.6259765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.331605434417725, + "rewards/margins": 13.660550117492676, + "rewards/rejected": -19.99215316772461, + "step": 49180 + }, + { + "epoch": 2.93, + "learning_rate": 7.544554359430744e-09, + "logits/chosen": -2.574735641479492, + "logits/rejected": -1.8628994226455688, + "logps/chosen": -666.4688720703125, + "logps/rejected": -2115.528564453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.985551357269287, + "rewards/margins": 14.722851753234863, + "rewards/rejected": -20.708404541015625, + "step": 49190 + }, + { + "epoch": 2.93, + "learning_rate": 7.410497922451243e-09, + "logits/chosen": -2.517439603805542, + "logits/rejected": -1.8490692377090454, + "logps/chosen": -676.3138427734375, + "logps/rejected": -2173.390869140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.085583686828613, + "rewards/margins": 15.198657035827637, + "rewards/rejected": -21.28424072265625, + "step": 49200 + }, + { + "epoch": 2.93, + "learning_rate": 7.277641418026449e-09, + "logits/chosen": -2.5650455951690674, + "logits/rejected": -1.892671823501587, + "logps/chosen": -684.19091796875, + "logps/rejected": -2102.72119140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.143913745880127, + "rewards/margins": 14.43809986114502, + "rewards/rejected": -20.582012176513672, + "step": 49210 + }, + { + "epoch": 2.94, + "learning_rate": 7.1459849101132575e-09, + "logits/chosen": -2.5351481437683105, + "logits/rejected": -1.852952003479004, + "logps/chosen": -677.2384643554688, + "logps/rejected": -2161.10888671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.060883045196533, + "rewards/margins": 15.097317695617676, + "rewards/rejected": -21.158201217651367, + "step": 49220 + }, + { + "epoch": 2.94, + "learning_rate": 7.015528462091248e-09, + "logits/chosen": -2.52811861038208, + "logits/rejected": -1.9085180759429932, + "logps/chosen": -717.533203125, + "logps/rejected": -2221.222412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.411994934082031, + "rewards/margins": 15.345428466796875, + "rewards/rejected": -21.75742530822754, + "step": 49230 + }, + { + "epoch": 2.94, + "learning_rate": 6.8862721367618514e-09, + "logits/chosen": -2.568427562713623, + "logits/rejected": -1.9487667083740234, + "logps/chosen": -679.6404418945312, + "logps/rejected": -2210.21875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.056485176086426, + "rewards/margins": 15.60278034210205, + "rewards/rejected": -21.659263610839844, + "step": 49240 + }, + { + "epoch": 2.94, + "learning_rate": 6.7582159963494595e-09, + "logits/chosen": -2.546980619430542, + "logits/rejected": -1.9588088989257812, + "logps/chosen": -669.2982177734375, + "logps/rejected": -2094.781982421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.02681303024292, + "rewards/margins": 14.488410949707031, + "rewards/rejected": -20.51522445678711, + "step": 49250 + }, + { + "epoch": 2.94, + "learning_rate": 6.63136010250004e-09, + "logits/chosen": -2.516225814819336, + "logits/rejected": -1.7841581106185913, + "logps/chosen": -680.0847778320312, + "logps/rejected": -2109.33154296875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.085273742675781, + "rewards/margins": 14.563825607299805, + "rewards/rejected": -20.649097442626953, + "step": 49260 + }, + { + "epoch": 2.94, + "learning_rate": 6.505704516281685e-09, + "logits/chosen": -2.550874710083008, + "logits/rejected": -1.8583247661590576, + "logps/chosen": -651.0923461914062, + "logps/rejected": -2074.48486328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.797460079193115, + "rewards/margins": 14.509958267211914, + "rewards/rejected": -20.307415008544922, + "step": 49270 + }, + { + "epoch": 2.94, + "learning_rate": 6.38124929818601e-09, + "logits/chosen": -2.518436908721924, + "logits/rejected": -1.9192253351211548, + "logps/chosen": -714.9462890625, + "logps/rejected": -2154.89892578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.42159366607666, + "rewards/margins": 14.69117259979248, + "rewards/rejected": -21.112764358520508, + "step": 49280 + }, + { + "epoch": 2.94, + "learning_rate": 6.257994508124532e-09, + "logits/chosen": -2.5246682167053223, + "logits/rejected": -1.8921531438827515, + "logps/chosen": -707.6438598632812, + "logps/rejected": -2111.578369140625, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.385739326477051, + "rewards/margins": 14.284835815429688, + "rewards/rejected": -20.670574188232422, + "step": 49290 + }, + { + "epoch": 2.94, + "learning_rate": 6.135940205433122e-09, + "logits/chosen": -2.531855344772339, + "logits/rejected": -1.781602144241333, + "logps/chosen": -674.4317016601562, + "logps/rejected": -2071.22412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.037513732910156, + "rewards/margins": 14.233553886413574, + "rewards/rejected": -20.271068572998047, + "step": 49300 + }, + { + "epoch": 2.94, + "learning_rate": 6.015086448868112e-09, + "logits/chosen": -2.5663373470306396, + "logits/rejected": -1.8886483907699585, + "logps/chosen": -667.2059326171875, + "logps/rejected": -2082.9208984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.93973445892334, + "rewards/margins": 14.438677787780762, + "rewards/rejected": -20.3784122467041, + "step": 49310 + }, + { + "epoch": 2.94, + "learning_rate": 5.895433296608799e-09, + "logits/chosen": -2.5558807849884033, + "logits/rejected": -1.8729406595230103, + "logps/chosen": -695.56298828125, + "logps/rejected": -2155.61962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.294610500335693, + "rewards/margins": 14.818615913391113, + "rewards/rejected": -21.11322593688965, + "step": 49320 + }, + { + "epoch": 2.94, + "learning_rate": 5.7769808062560496e-09, + "logits/chosen": -2.541994333267212, + "logits/rejected": -1.7804911136627197, + "logps/chosen": -674.2493896484375, + "logps/rejected": -2187.38037109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.014627456665039, + "rewards/margins": 15.405705451965332, + "rewards/rejected": -21.420331954956055, + "step": 49330 + }, + { + "epoch": 2.94, + "learning_rate": 5.659729034832861e-09, + "logits/chosen": -2.5556557178497314, + "logits/rejected": -1.9154250621795654, + "logps/chosen": -681.1201171875, + "logps/rejected": -2125.99951171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.079209327697754, + "rewards/margins": 14.718145370483398, + "rewards/rejected": -20.797353744506836, + "step": 49340 + }, + { + "epoch": 2.94, + "learning_rate": 5.543678038784361e-09, + "logits/chosen": -2.538409948348999, + "logits/rejected": -1.9269828796386719, + "logps/chosen": -663.5633544921875, + "logps/rejected": -2080.65625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9376912117004395, + "rewards/margins": 14.422765731811523, + "rewards/rejected": -20.360454559326172, + "step": 49350 + }, + { + "epoch": 2.94, + "learning_rate": 5.428827873977249e-09, + "logits/chosen": -2.5536093711853027, + "logits/rejected": -1.827696442604065, + "logps/chosen": -668.5094604492188, + "logps/rejected": -2115.242431640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.974954128265381, + "rewards/margins": 14.725387573242188, + "rewards/rejected": -20.700342178344727, + "step": 49360 + }, + { + "epoch": 2.94, + "learning_rate": 5.31517859570091e-09, + "logits/chosen": -2.5148215293884277, + "logits/rejected": -1.807997465133667, + "logps/chosen": -707.469482421875, + "logps/rejected": -2156.641845703125, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3699846267700195, + "rewards/margins": 14.76057243347168, + "rewards/rejected": -21.130558013916016, + "step": 49370 + }, + { + "epoch": 2.94, + "learning_rate": 5.202730258665745e-09, + "logits/chosen": -2.52323055267334, + "logits/rejected": -1.913159966468811, + "logps/chosen": -681.5038452148438, + "logps/rejected": -2244.760498046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.110105514526367, + "rewards/margins": 15.882349014282227, + "rewards/rejected": -21.992454528808594, + "step": 49380 + }, + { + "epoch": 2.95, + "learning_rate": 5.0914829170042865e-09, + "logits/chosen": -2.5083489418029785, + "logits/rejected": -1.794224500656128, + "logps/chosen": -662.0447387695312, + "logps/rejected": -2191.64013671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9083428382873535, + "rewards/margins": 15.563919067382812, + "rewards/rejected": -21.472261428833008, + "step": 49390 + }, + { + "epoch": 2.95, + "learning_rate": 4.981436624271197e-09, + "logits/chosen": -2.513033390045166, + "logits/rejected": -1.847495675086975, + "logps/chosen": -680.7762451171875, + "logps/rejected": -1989.8824462890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.101420879364014, + "rewards/margins": 13.369844436645508, + "rewards/rejected": -19.471263885498047, + "step": 49400 + }, + { + "epoch": 2.95, + "learning_rate": 4.872591433442708e-09, + "logits/chosen": -2.534648895263672, + "logits/rejected": -1.8714148998260498, + "logps/chosen": -671.7918090820312, + "logps/rejected": -2135.2529296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.99990177154541, + "rewards/margins": 14.910247802734375, + "rewards/rejected": -20.9101505279541, + "step": 49410 + }, + { + "epoch": 2.95, + "learning_rate": 4.764947396916908e-09, + "logits/chosen": -2.4984121322631836, + "logits/rejected": -1.8002328872680664, + "logps/chosen": -675.6675415039062, + "logps/rejected": -2227.88037109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.042969703674316, + "rewards/margins": 15.79259967803955, + "rewards/rejected": -21.8355712890625, + "step": 49420 + }, + { + "epoch": 2.95, + "learning_rate": 4.6585045665134555e-09, + "logits/chosen": -2.530494213104248, + "logits/rejected": -1.8684513568878174, + "logps/chosen": -690.7496948242188, + "logps/rejected": -2171.0126953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.202425956726074, + "rewards/margins": 15.074358940124512, + "rewards/rejected": -21.276782989501953, + "step": 49430 + }, + { + "epoch": 2.95, + "learning_rate": 4.5532629934744166e-09, + "logits/chosen": -2.558126926422119, + "logits/rejected": -1.9294935464859009, + "logps/chosen": -663.81005859375, + "logps/rejected": -2155.494873046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.953575134277344, + "rewards/margins": 15.146524429321289, + "rewards/rejected": -21.100099563598633, + "step": 49440 + }, + { + "epoch": 2.95, + "learning_rate": 4.449222728462599e-09, + "logits/chosen": -2.4777719974517822, + "logits/rejected": -1.7861063480377197, + "logps/chosen": -696.8436889648438, + "logps/rejected": -2126.11474609375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.220231533050537, + "rewards/margins": 14.582344055175781, + "rewards/rejected": -20.802576065063477, + "step": 49450 + }, + { + "epoch": 2.95, + "learning_rate": 4.346383821562938e-09, + "logits/chosen": -2.5325992107391357, + "logits/rejected": -1.8913787603378296, + "logps/chosen": -695.7694702148438, + "logps/rejected": -2091.69189453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.26959228515625, + "rewards/margins": 14.209233283996582, + "rewards/rejected": -20.47882652282715, + "step": 49460 + }, + { + "epoch": 2.95, + "learning_rate": 4.244746322282501e-09, + "logits/chosen": -2.5697498321533203, + "logits/rejected": -1.9177442789077759, + "logps/chosen": -687.0233764648438, + "logps/rejected": -2193.84130859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.117989540100098, + "rewards/margins": 15.374124526977539, + "rewards/rejected": -21.492111206054688, + "step": 49470 + }, + { + "epoch": 2.95, + "learning_rate": 4.144310279549368e-09, + "logits/chosen": -2.500814437866211, + "logits/rejected": -1.7839845418930054, + "logps/chosen": -678.1619262695312, + "logps/rejected": -2114.965087890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.091034412384033, + "rewards/margins": 14.620742797851562, + "rewards/rejected": -20.711774826049805, + "step": 49480 + }, + { + "epoch": 2.95, + "learning_rate": 4.0450757417132005e-09, + "logits/chosen": -2.54978609085083, + "logits/rejected": -1.925737738609314, + "logps/chosen": -679.5492553710938, + "logps/rejected": -2128.204833984375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.154738426208496, + "rewards/margins": 14.674949645996094, + "rewards/rejected": -20.829689025878906, + "step": 49490 + }, + { + "epoch": 2.95, + "learning_rate": 3.94704275654606e-09, + "logits/chosen": -2.516572952270508, + "logits/rejected": -1.8042802810668945, + "logps/chosen": -705.8152465820312, + "logps/rejected": -2217.54296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.24371337890625, + "rewards/margins": 15.469934463500977, + "rewards/rejected": -21.713647842407227, + "step": 49500 + }, + { + "epoch": 2.95, + "learning_rate": 3.850211371240475e-09, + "logits/chosen": -2.516396999359131, + "logits/rejected": -1.7670866250991821, + "logps/chosen": -693.7000122070312, + "logps/rejected": -2170.15625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.249978065490723, + "rewards/margins": 15.004602432250977, + "rewards/rejected": -21.25457763671875, + "step": 49510 + }, + { + "epoch": 2.95, + "learning_rate": 3.75458163241138e-09, + "logits/chosen": -2.5478100776672363, + "logits/rejected": -2.0232670307159424, + "logps/chosen": -724.9491577148438, + "logps/rejected": -2094.201171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.523284912109375, + "rewards/margins": 13.976945877075195, + "rewards/rejected": -20.500228881835938, + "step": 49520 + }, + { + "epoch": 2.95, + "learning_rate": 3.6601535860950053e-09, + "logits/chosen": -2.562737464904785, + "logits/rejected": -1.877733588218689, + "logps/chosen": -676.1272583007812, + "logps/rejected": -2110.81005859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.075245380401611, + "rewards/margins": 14.596429824829102, + "rewards/rejected": -20.671676635742188, + "step": 49530 + }, + { + "epoch": 2.95, + "learning_rate": 3.5669272777485998e-09, + "logits/chosen": -2.552736282348633, + "logits/rejected": -1.728158950805664, + "logps/chosen": -705.5743408203125, + "logps/rejected": -2115.58837890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.349186897277832, + "rewards/margins": 14.372385025024414, + "rewards/rejected": -20.721569061279297, + "step": 49540 + }, + { + "epoch": 2.95, + "learning_rate": 3.4749027522518186e-09, + "logits/chosen": -2.538142442703247, + "logits/rejected": -1.843336820602417, + "logps/chosen": -716.3800048828125, + "logps/rejected": -2107.57373046875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.426610469818115, + "rewards/margins": 14.202349662780762, + "rewards/rejected": -20.62896156311035, + "step": 49550 + }, + { + "epoch": 2.96, + "learning_rate": 3.3840800539047815e-09, + "logits/chosen": -2.5024640560150146, + "logits/rejected": -1.8885743618011475, + "logps/chosen": -675.263916015625, + "logps/rejected": -2182.994384765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.06791877746582, + "rewards/margins": 15.307395935058594, + "rewards/rejected": -21.375314712524414, + "step": 49560 + }, + { + "epoch": 2.96, + "learning_rate": 3.2944592264300133e-09, + "logits/chosen": -2.5641889572143555, + "logits/rejected": -1.9444749355316162, + "logps/chosen": -682.0316162109375, + "logps/rejected": -2195.05029296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1519317626953125, + "rewards/margins": 15.349777221679688, + "rewards/rejected": -21.501710891723633, + "step": 49570 + }, + { + "epoch": 2.96, + "learning_rate": 3.206040312970504e-09, + "logits/chosen": -2.5301618576049805, + "logits/rejected": -1.813672423362732, + "logps/chosen": -666.8998413085938, + "logps/rejected": -2134.23779296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.990866661071777, + "rewards/margins": 14.90544605255127, + "rewards/rejected": -20.896312713623047, + "step": 49580 + }, + { + "epoch": 2.96, + "learning_rate": 3.1188233560913717e-09, + "logits/chosen": -2.5323376655578613, + "logits/rejected": -1.9399681091308594, + "logps/chosen": -684.0280151367188, + "logps/rejected": -2118.993408203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.153268814086914, + "rewards/margins": 14.594823837280273, + "rewards/rejected": -20.748088836669922, + "step": 49590 + }, + { + "epoch": 2.96, + "learning_rate": 3.0328083977790303e-09, + "logits/chosen": -2.5808420181274414, + "logits/rejected": -1.9589622020721436, + "logps/chosen": -691.9338989257812, + "logps/rejected": -2119.939453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.240786552429199, + "rewards/margins": 14.515874862670898, + "rewards/rejected": -20.756662368774414, + "step": 49600 + }, + { + "epoch": 2.96, + "learning_rate": 2.9479954794409126e-09, + "logits/chosen": -2.5234713554382324, + "logits/rejected": -1.8210713863372803, + "logps/chosen": -690.149169921875, + "logps/rejected": -2116.299560546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.195367336273193, + "rewards/margins": 14.542287826538086, + "rewards/rejected": -20.737653732299805, + "step": 49610 + }, + { + "epoch": 2.96, + "learning_rate": 2.8643846419057484e-09, + "logits/chosen": -2.519155979156494, + "logits/rejected": -1.8517986536026, + "logps/chosen": -679.0350341796875, + "logps/rejected": -2128.08447265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.047640800476074, + "rewards/margins": 14.78327751159668, + "rewards/rejected": -20.83091926574707, + "step": 49620 + }, + { + "epoch": 2.96, + "learning_rate": 2.781975925423841e-09, + "logits/chosen": -2.5755741596221924, + "logits/rejected": -1.8668380975723267, + "logps/chosen": -652.0667724609375, + "logps/rejected": -2102.26513671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.8185715675354, + "rewards/margins": 14.758939743041992, + "rewards/rejected": -20.577512741088867, + "step": 49630 + }, + { + "epoch": 2.96, + "learning_rate": 2.700769369667344e-09, + "logits/chosen": -2.5223653316497803, + "logits/rejected": -1.8752691745758057, + "logps/chosen": -666.7166137695312, + "logps/rejected": -2094.19482421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.985750198364258, + "rewards/margins": 14.508581161499023, + "rewards/rejected": -20.49433135986328, + "step": 49640 + }, + { + "epoch": 2.96, + "learning_rate": 2.6207650137283215e-09, + "logits/chosen": -2.5077269077301025, + "logits/rejected": -1.7905728816986084, + "logps/chosen": -685.771484375, + "logps/rejected": -2165.820556640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.151116371154785, + "rewards/margins": 15.075695991516113, + "rewards/rejected": -21.226810455322266, + "step": 49650 + }, + { + "epoch": 2.96, + "learning_rate": 2.5419628961212416e-09, + "logits/chosen": -2.5256519317626953, + "logits/rejected": -1.8557029962539673, + "logps/chosen": -693.1612548828125, + "logps/rejected": -2218.91162109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.24569034576416, + "rewards/margins": 15.49833869934082, + "rewards/rejected": -21.744029998779297, + "step": 49660 + }, + { + "epoch": 2.96, + "learning_rate": 2.464363054781316e-09, + "logits/chosen": -2.533587694168091, + "logits/rejected": -1.818748116493225, + "logps/chosen": -690.1409301757812, + "logps/rejected": -2162.29052734375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.137089729309082, + "rewards/margins": 15.048016548156738, + "rewards/rejected": -21.185104370117188, + "step": 49670 + }, + { + "epoch": 2.96, + "learning_rate": 2.3879655270650504e-09, + "logits/chosen": -2.4907283782958984, + "logits/rejected": -1.8902851343154907, + "logps/chosen": -686.443115234375, + "logps/rejected": -2148.316162109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.108162879943848, + "rewards/margins": 14.929410934448242, + "rewards/rejected": -21.037572860717773, + "step": 49680 + }, + { + "epoch": 2.96, + "learning_rate": 2.3127703497505262e-09, + "logits/chosen": -2.5786945819854736, + "logits/rejected": -1.9803249835968018, + "logps/chosen": -683.5202026367188, + "logps/rejected": -2149.1650390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.069382190704346, + "rewards/margins": 14.968286514282227, + "rewards/rejected": -21.037670135498047, + "step": 49690 + }, + { + "epoch": 2.96, + "learning_rate": 2.2387775590365644e-09, + "logits/chosen": -2.561718702316284, + "logits/rejected": -1.9002258777618408, + "logps/chosen": -715.8023681640625, + "logps/rejected": -2141.392578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.4254608154296875, + "rewards/margins": 14.530370712280273, + "rewards/rejected": -20.95583152770996, + "step": 49700 + }, + { + "epoch": 2.96, + "learning_rate": 2.1659871905430064e-09, + "logits/chosen": -2.5503509044647217, + "logits/rejected": -1.8585374355316162, + "logps/chosen": -670.7238159179688, + "logps/rejected": -2242.79541015625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.982290744781494, + "rewards/margins": 15.980626106262207, + "rewards/rejected": -21.96291732788086, + "step": 49710 + }, + { + "epoch": 2.96, + "learning_rate": 2.094399279311543e-09, + "logits/chosen": -2.5173089504241943, + "logits/rejected": -1.8253977298736572, + "logps/chosen": -668.9486694335938, + "logps/rejected": -2150.856689453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.025146484375, + "rewards/margins": 15.046812057495117, + "rewards/rejected": -21.071956634521484, + "step": 49720 + }, + { + "epoch": 2.97, + "learning_rate": 2.02401385980433e-09, + "logits/chosen": -2.596003770828247, + "logits/rejected": -1.9283034801483154, + "logps/chosen": -666.5158081054688, + "logps/rejected": -2156.18505859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.960243225097656, + "rewards/margins": 15.164944648742676, + "rewards/rejected": -21.12518882751465, + "step": 49730 + }, + { + "epoch": 2.97, + "learning_rate": 1.954830965905097e-09, + "logits/chosen": -2.5083346366882324, + "logits/rejected": -1.8655881881713867, + "logps/chosen": -672.6925659179688, + "logps/rejected": -2090.392822265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.030396461486816, + "rewards/margins": 14.44017505645752, + "rewards/rejected": -20.470571517944336, + "step": 49740 + }, + { + "epoch": 2.97, + "learning_rate": 1.8868506309183132e-09, + "logits/chosen": -2.575096368789673, + "logits/rejected": -1.9653161764144897, + "logps/chosen": -655.029052734375, + "logps/rejected": -2278.940673828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.862828731536865, + "rewards/margins": 16.47231101989746, + "rewards/rejected": -22.335140228271484, + "step": 49750 + }, + { + "epoch": 2.97, + "learning_rate": 1.820072887569746e-09, + "logits/chosen": -2.5755858421325684, + "logits/rejected": -1.956120491027832, + "logps/chosen": -681.0980224609375, + "logps/rejected": -2201.649169921875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.125912666320801, + "rewards/margins": 15.446125984191895, + "rewards/rejected": -21.572040557861328, + "step": 49760 + }, + { + "epoch": 2.97, + "learning_rate": 1.7544977680064578e-09, + "logits/chosen": -2.517467975616455, + "logits/rejected": -1.839003562927246, + "logps/chosen": -690.4193115234375, + "logps/rejected": -2072.1982421875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.1948161125183105, + "rewards/margins": 14.088478088378906, + "rewards/rejected": -20.28329849243164, + "step": 49770 + }, + { + "epoch": 2.97, + "learning_rate": 1.6901253037959752e-09, + "logits/chosen": -2.56687593460083, + "logits/rejected": -1.9186580181121826, + "logps/chosen": -709.2658081054688, + "logps/rejected": -2123.8212890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.3644185066223145, + "rewards/margins": 14.421613693237305, + "rewards/rejected": -20.78603172302246, + "step": 49780 + }, + { + "epoch": 2.97, + "learning_rate": 1.6269555259271207e-09, + "logits/chosen": -2.5694708824157715, + "logits/rejected": -1.8420253992080688, + "logps/chosen": -650.3720703125, + "logps/rejected": -2140.111572265625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.758027076721191, + "rewards/margins": 15.202230453491211, + "rewards/rejected": -20.960256576538086, + "step": 49790 + }, + { + "epoch": 2.97, + "learning_rate": 1.564988464810291e-09, + "logits/chosen": -2.5522613525390625, + "logits/rejected": -1.9379383325576782, + "logps/chosen": -661.7889404296875, + "logps/rejected": -2206.80908203125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.9215874671936035, + "rewards/margins": 15.696504592895508, + "rewards/rejected": -21.618091583251953, + "step": 49800 + }, + { + "epoch": 2.97, + "learning_rate": 1.5042241502757904e-09, + "logits/chosen": -2.537839651107788, + "logits/rejected": -1.878558874130249, + "logps/chosen": -675.55078125, + "logps/rejected": -2132.173095703125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.053613185882568, + "rewards/margins": 14.832422256469727, + "rewards/rejected": -20.88603401184082, + "step": 49810 + }, + { + "epoch": 2.97, + "learning_rate": 1.4446626115763307e-09, + "logits/chosen": -2.577200412750244, + "logits/rejected": -1.9026952981948853, + "logps/chosen": -696.6884765625, + "logps/rejected": -2178.208740234375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.250899791717529, + "rewards/margins": 15.093254089355469, + "rewards/rejected": -21.344152450561523, + "step": 49820 + }, + { + "epoch": 2.97, + "learning_rate": 1.386303877384254e-09, + "logits/chosen": -2.540126323699951, + "logits/rejected": -1.8450285196304321, + "logps/chosen": -669.8217163085938, + "logps/rejected": -2203.628173828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.992453575134277, + "rewards/margins": 15.585983276367188, + "rewards/rejected": -21.57843589782715, + "step": 49830 + }, + { + "epoch": 2.97, + "learning_rate": 1.3291479757934766e-09, + "logits/chosen": -2.5800750255584717, + "logits/rejected": -1.8229376077651978, + "logps/chosen": -690.0701904296875, + "logps/rejected": -2152.00341796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.203404903411865, + "rewards/margins": 14.876243591308594, + "rewards/rejected": -21.079647064208984, + "step": 49840 + }, + { + "epoch": 2.97, + "learning_rate": 1.2731949343192107e-09, + "logits/chosen": -2.526212692260742, + "logits/rejected": -1.8458163738250732, + "logps/chosen": -657.47265625, + "logps/rejected": -2161.98193359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.815003395080566, + "rewards/margins": 15.35925006866455, + "rewards/rejected": -21.174251556396484, + "step": 49850 + }, + { + "epoch": 2.97, + "learning_rate": 1.2184447798971322e-09, + "logits/chosen": -2.522651433944702, + "logits/rejected": -1.8430054187774658, + "logps/chosen": -672.443359375, + "logps/rejected": -2147.48486328125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0261101722717285, + "rewards/margins": 14.999653816223145, + "rewards/rejected": -21.025760650634766, + "step": 49860 + }, + { + "epoch": 2.97, + "learning_rate": 1.1648975388836581e-09, + "logits/chosen": -2.5237815380096436, + "logits/rejected": -1.9365698099136353, + "logps/chosen": -686.151123046875, + "logps/rejected": -2190.659423828125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.157508850097656, + "rewards/margins": 15.315503120422363, + "rewards/rejected": -21.473012924194336, + "step": 49870 + }, + { + "epoch": 2.97, + "learning_rate": 1.1125532370567793e-09, + "logits/chosen": -2.5711348056793213, + "logits/rejected": -1.913601279258728, + "logps/chosen": -670.5914916992188, + "logps/rejected": -2041.4388427734375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.00182580947876, + "rewards/margins": 13.9690523147583, + "rewards/rejected": -19.97087860107422, + "step": 49880 + }, + { + "epoch": 2.97, + "learning_rate": 1.0614118996146727e-09, + "logits/chosen": -2.4952950477600098, + "logits/rejected": -1.8099769353866577, + "logps/chosen": -684.0933837890625, + "logps/rejected": -2054.927001953125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.151881217956543, + "rewards/margins": 13.971158027648926, + "rewards/rejected": -20.1230411529541, + "step": 49890 + }, + { + "epoch": 2.98, + "learning_rate": 1.0114735511773666e-09, + "logits/chosen": -2.5705182552337646, + "logits/rejected": -1.8209835290908813, + "logps/chosen": -677.50390625, + "logps/rejected": -2105.103271484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.0513715744018555, + "rewards/margins": 14.550100326538086, + "rewards/rejected": -20.60147476196289, + "step": 49900 + }, + { + "epoch": 2.98, + "learning_rate": 9.6273821578452e-10, + "logits/chosen": -2.5202555656433105, + "logits/rejected": -1.8913053274154663, + "logps/chosen": -657.323486328125, + "logps/rejected": -2163.587890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.903634071350098, + "rewards/margins": 15.301651000976562, + "rewards/rejected": -21.205286026000977, + "step": 49910 + }, + { + "epoch": 2.98, + "learning_rate": 9.152059168976435e-10, + "logits/chosen": -2.5227673053741455, + "logits/rejected": -1.925824761390686, + "logps/chosen": -676.1573486328125, + "logps/rejected": -2191.748779296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.064774513244629, + "rewards/margins": 15.403039932250977, + "rewards/rejected": -21.46781349182129, + "step": 49920 + }, + { + "epoch": 2.98, + "learning_rate": 8.688766773989888e-10, + "logits/chosen": -2.5212929248809814, + "logits/rejected": -1.8973617553710938, + "logps/chosen": -677.9844360351562, + "logps/rejected": -2129.238525390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.069830894470215, + "rewards/margins": 14.790493965148926, + "rewards/rejected": -20.86032485961914, + "step": 49930 + }, + { + "epoch": 2.98, + "learning_rate": 8.237505195912709e-10, + "logits/chosen": -2.497556209564209, + "logits/rejected": -1.7923336029052734, + "logps/chosen": -658.2739868164062, + "logps/rejected": -2179.097412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.895893573760986, + "rewards/margins": 15.450843811035156, + "rewards/rejected": -21.346736907958984, + "step": 49940 + }, + { + "epoch": 2.98, + "learning_rate": 7.798274651979465e-10, + "logits/chosen": -2.533353805541992, + "logits/rejected": -1.7671968936920166, + "logps/chosen": -693.9860229492188, + "logps/rejected": -2089.450439453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.229470252990723, + "rewards/margins": 14.235941886901855, + "rewards/rejected": -20.465412139892578, + "step": 49950 + }, + { + "epoch": 2.98, + "learning_rate": 7.371075353640455e-10, + "logits/chosen": -2.502901792526245, + "logits/rejected": -1.8352088928222656, + "logps/chosen": -672.581298828125, + "logps/rejected": -2170.059814453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.02311372756958, + "rewards/margins": 15.237655639648438, + "rewards/rejected": -21.26076889038086, + "step": 49960 + }, + { + "epoch": 2.98, + "learning_rate": 6.955907506545068e-10, + "logits/chosen": -2.527617931365967, + "logits/rejected": -1.890013337135315, + "logps/chosen": -663.4302978515625, + "logps/rejected": -2121.8125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.906503200531006, + "rewards/margins": 14.873156547546387, + "rewards/rejected": -20.779659271240234, + "step": 49970 + }, + { + "epoch": 2.98, + "learning_rate": 6.552771310558426e-10, + "logits/chosen": -2.5521366596221924, + "logits/rejected": -1.9126533269882202, + "logps/chosen": -672.8548583984375, + "logps/rejected": -2226.603759765625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.077043056488037, + "rewards/margins": 15.751864433288574, + "rewards/rejected": -21.828907012939453, + "step": 49980 + }, + { + "epoch": 2.98, + "learning_rate": 6.161666959747514e-10, + "logits/chosen": -2.5637426376342773, + "logits/rejected": -1.8561832904815674, + "logps/chosen": -683.3431396484375, + "logps/rejected": -2135.83935546875, + "loss": 0.0005, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.087283611297607, + "rewards/margins": 14.815427780151367, + "rewards/rejected": -20.9027099609375, + "step": 49990 + }, + { + "epoch": 2.98, + "learning_rate": 5.782594642392281e-10, + "logits/chosen": -2.5122523307800293, + "logits/rejected": -1.8399207592010498, + "logps/chosen": -698.0675048828125, + "logps/rejected": -2116.02880859375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.268139839172363, + "rewards/margins": 14.43890380859375, + "rewards/rejected": -20.70704460144043, + "step": 50000 + }, + { + "epoch": 2.98, + "learning_rate": 5.415554540977308e-10, + "logits/chosen": -2.520503520965576, + "logits/rejected": -1.8655586242675781, + "logps/chosen": -671.724853515625, + "logps/rejected": -2102.10986328125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.000971794128418, + "rewards/margins": 14.579197883605957, + "rewards/rejected": -20.580169677734375, + "step": 50010 + }, + { + "epoch": 2.98, + "learning_rate": 5.060546832194591e-10, + "logits/chosen": -2.533262252807617, + "logits/rejected": -1.8203647136688232, + "logps/chosen": -682.4366455078125, + "logps/rejected": -2115.25146484375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.068896293640137, + "rewards/margins": 14.640386581420898, + "rewards/rejected": -20.709280014038086, + "step": 50020 + }, + { + "epoch": 2.98, + "learning_rate": 4.717571686949085e-10, + "logits/chosen": -2.5001306533813477, + "logits/rejected": -1.8636640310287476, + "logps/chosen": -656.9744873046875, + "logps/rejected": -2206.67529296875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.917806625366211, + "rewards/margins": 15.707781791687012, + "rewards/rejected": -21.625585556030273, + "step": 50030 + }, + { + "epoch": 2.98, + "learning_rate": 4.386629270342058e-10, + "logits/chosen": -2.5234766006469727, + "logits/rejected": -1.9177812337875366, + "logps/chosen": -682.3009033203125, + "logps/rejected": -2192.737060546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.125760555267334, + "rewards/margins": 15.349499702453613, + "rewards/rejected": -21.47525978088379, + "step": 50040 + }, + { + "epoch": 2.98, + "learning_rate": 4.0677197416960635e-10, + "logits/chosen": -2.5814576148986816, + "logits/rejected": -1.7792949676513672, + "logps/chosen": -678.3975830078125, + "logps/rejected": -2171.72412109375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.06838321685791, + "rewards/margins": 15.213052749633789, + "rewards/rejected": -21.281435012817383, + "step": 50050 + }, + { + "epoch": 2.99, + "learning_rate": 3.7608432545299666e-10, + "logits/chosen": -2.512453079223633, + "logits/rejected": -1.8584010601043701, + "logps/chosen": -694.1273803710938, + "logps/rejected": -2163.83642578125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.262761116027832, + "rewards/margins": 14.927938461303711, + "rewards/rejected": -21.19070053100586, + "step": 50060 + }, + { + "epoch": 2.99, + "learning_rate": 3.465999956575594e-10, + "logits/chosen": -2.519751787185669, + "logits/rejected": -1.7410414218902588, + "logps/chosen": -665.53515625, + "logps/rejected": -2085.19091796875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.966450214385986, + "rewards/margins": 14.444196701049805, + "rewards/rejected": -20.410648345947266, + "step": 50070 + }, + { + "epoch": 2.99, + "learning_rate": 3.1831899897694083e-10, + "logits/chosen": -2.6033010482788086, + "logits/rejected": -1.9829580783843994, + "logps/chosen": -667.8824462890625, + "logps/rejected": -2163.507568359375, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.90822696685791, + "rewards/margins": 15.277177810668945, + "rewards/rejected": -21.185401916503906, + "step": 50080 + }, + { + "epoch": 2.99, + "learning_rate": 2.912413490255284e-10, + "logits/chosen": -2.550960063934326, + "logits/rejected": -1.8287570476531982, + "logps/chosen": -676.916748046875, + "logps/rejected": -2044.824462890625, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.077078819274902, + "rewards/margins": 13.925676345825195, + "rewards/rejected": -20.00275421142578, + "step": 50090 + }, + { + "epoch": 2.99, + "learning_rate": 2.653670588390056e-10, + "logits/chosen": -2.542663097381592, + "logits/rejected": -1.8644644021987915, + "logps/chosen": -699.184814453125, + "logps/rejected": -2129.00830078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.328659534454346, + "rewards/margins": 14.524408340454102, + "rewards/rejected": -20.85306739807129, + "step": 50100 + }, + { + "epoch": 2.99, + "learning_rate": 2.406961408726871e-10, + "logits/chosen": -2.5173885822296143, + "logits/rejected": -1.9347556829452515, + "logps/chosen": -660.2291259765625, + "logps/rejected": -2149.12744140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.908097267150879, + "rewards/margins": 15.1456937789917, + "rewards/rejected": -21.053791046142578, + "step": 50110 + }, + { + "epoch": 2.99, + "learning_rate": 2.172286070031837e-10, + "logits/chosen": -2.587052345275879, + "logits/rejected": -1.9283698797225952, + "logps/chosen": -687.688232421875, + "logps/rejected": -2115.408935546875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.066790580749512, + "rewards/margins": 14.650062561035156, + "rewards/rejected": -20.716854095458984, + "step": 50120 + }, + { + "epoch": 2.99, + "learning_rate": 1.9496446852840244e-10, + "logits/chosen": -2.542694091796875, + "logits/rejected": -1.8852260112762451, + "logps/chosen": -676.3635864257812, + "logps/rejected": -2152.5400390625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.085347652435303, + "rewards/margins": 14.984086990356445, + "rewards/rejected": -21.069433212280273, + "step": 50130 + }, + { + "epoch": 2.99, + "learning_rate": 1.7390373616560373e-10, + "logits/chosen": -2.538145065307617, + "logits/rejected": -1.9341118335723877, + "logps/chosen": -683.75927734375, + "logps/rejected": -2104.004638671875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.160312652587891, + "rewards/margins": 14.43463134765625, + "rewards/rejected": -20.59494400024414, + "step": 50140 + }, + { + "epoch": 2.99, + "learning_rate": 1.540464200536218e-10, + "logits/chosen": -2.527733564376831, + "logits/rejected": -1.778207778930664, + "logps/chosen": -676.5880126953125, + "logps/rejected": -2043.5830078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.048380374908447, + "rewards/margins": 13.946096420288086, + "rewards/rejected": -19.994476318359375, + "step": 50150 + }, + { + "epoch": 2.99, + "learning_rate": 1.3539252975175442e-10, + "logits/chosen": -2.5877346992492676, + "logits/rejected": -1.9657316207885742, + "logps/chosen": -678.2493896484375, + "logps/rejected": -2098.16357421875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.046801567077637, + "rewards/margins": 14.506769180297852, + "rewards/rejected": -20.553569793701172, + "step": 50160 + }, + { + "epoch": 2.99, + "learning_rate": 1.1794207424031812e-10, + "logits/chosen": -2.5748183727264404, + "logits/rejected": -1.8665997982025146, + "logps/chosen": -652.9252319335938, + "logps/rejected": -2205.422119140625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.819643497467041, + "rewards/margins": 15.78723430633545, + "rewards/rejected": -21.60687828063965, + "step": 50170 + }, + { + "epoch": 2.99, + "learning_rate": 1.0169506191953782e-10, + "logits/chosen": -2.5255067348480225, + "logits/rejected": -1.907688856124878, + "logps/chosen": -670.2386474609375, + "logps/rejected": -2140.00634765625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.032339572906494, + "rewards/margins": 14.932443618774414, + "rewards/rejected": -20.96478271484375, + "step": 50180 + }, + { + "epoch": 2.99, + "learning_rate": 8.665150061093475e-11, + "logits/chosen": -2.5627894401550293, + "logits/rejected": -1.9426950216293335, + "logps/chosen": -687.9757080078125, + "logps/rejected": -2140.408447265625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.172346591949463, + "rewards/margins": 14.797430038452148, + "rewards/rejected": -20.969776153564453, + "step": 50190 + }, + { + "epoch": 2.99, + "learning_rate": 7.281139755621614e-11, + "logits/chosen": -2.5275824069976807, + "logits/rejected": -1.862557053565979, + "logps/chosen": -671.0575561523438, + "logps/rejected": -2103.86962890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.971700191497803, + "rewards/margins": 14.623090744018555, + "rewards/rejected": -20.594791412353516, + "step": 50200 + }, + { + "epoch": 2.99, + "learning_rate": 6.017475941838547e-11, + "logits/chosen": -2.5633490085601807, + "logits/rejected": -1.9198487997055054, + "logps/chosen": -702.2757568359375, + "logps/rejected": -2089.74462890625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.329959869384766, + "rewards/margins": 14.123682022094727, + "rewards/rejected": -20.453641891479492, + "step": 50210 + }, + { + "epoch": 2.99, + "learning_rate": 4.874159228063224e-11, + "logits/chosen": -2.538217067718506, + "logits/rejected": -1.774009108543396, + "logps/chosen": -688.5136108398438, + "logps/rejected": -2190.60205078125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.216983795166016, + "rewards/margins": 15.246686935424805, + "rewards/rejected": -21.463672637939453, + "step": 50220 + }, + { + "epoch": 3.0, + "learning_rate": 3.851190164660956e-11, + "logits/chosen": -2.5337436199188232, + "logits/rejected": -1.9034135341644287, + "logps/chosen": -694.5013427734375, + "logps/rejected": -2065.637451171875, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.172028541564941, + "rewards/margins": 14.04706859588623, + "rewards/rejected": -20.219097137451172, + "step": 50230 + }, + { + "epoch": 3.0, + "learning_rate": 2.9485692441266756e-11, + "logits/chosen": -2.548444986343384, + "logits/rejected": -1.8404958248138428, + "logps/chosen": -658.4937133789062, + "logps/rejected": -2103.43359375, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.911465644836426, + "rewards/margins": 14.682355880737305, + "rewards/rejected": -20.593822479248047, + "step": 50240 + }, + { + "epoch": 3.0, + "learning_rate": 2.1662969009461632e-11, + "logits/chosen": -2.4834542274475098, + "logits/rejected": -1.8093007802963257, + "logps/chosen": -705.302001953125, + "logps/rejected": -2141.977783203125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.384571075439453, + "rewards/margins": 14.587183952331543, + "rewards/rejected": -20.97175407409668, + "step": 50250 + }, + { + "epoch": 3.0, + "learning_rate": 1.5043735117348247e-11, + "logits/chosen": -2.528701066970825, + "logits/rejected": -1.7395035028457642, + "logps/chosen": -683.5975341796875, + "logps/rejected": -2176.99853515625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.119858741760254, + "rewards/margins": 15.200639724731445, + "rewards/rejected": -21.320499420166016, + "step": 50260 + }, + { + "epoch": 3.0, + "learning_rate": 9.627993951266679e-12, + "logits/chosen": -2.5502355098724365, + "logits/rejected": -1.8845373392105103, + "logps/chosen": -676.9586181640625, + "logps/rejected": -2176.49462890625, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.054237365722656, + "rewards/margins": 15.249929428100586, + "rewards/rejected": -21.304166793823242, + "step": 50270 + }, + { + "epoch": 3.0, + "learning_rate": 5.415748118575703e-12, + "logits/chosen": -2.5088491439819336, + "logits/rejected": -1.8936443328857422, + "logps/chosen": -720.2625732421875, + "logps/rejected": -2075.21142578125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.50155782699585, + "rewards/margins": 13.819682121276855, + "rewards/rejected": -20.32124137878418, + "step": 50280 + }, + { + "epoch": 3.0, + "learning_rate": 2.4069996465425627e-12, + "logits/chosen": -2.525641679763794, + "logits/rejected": -1.8620027303695679, + "logps/chosen": -635.7743530273438, + "logps/rejected": -2122.26171875, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.642601013183594, + "rewards/margins": 15.141858100891113, + "rewards/rejected": -20.784460067749023, + "step": 50290 + }, + { + "epoch": 3.0, + "learning_rate": 6.017499840083041e-13, + "logits/chosen": -2.5679080486297607, + "logits/rejected": -1.8603260517120361, + "logps/chosen": -677.1753540039062, + "logps/rejected": -2139.66845703125, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.032637119293213, + "rewards/margins": 14.928869247436523, + "rewards/rejected": -20.961505889892578, + "step": 50300 + }, + { + "epoch": 3.0, + "learning_rate": 0.0, + "logits/chosen": -2.546466112136841, + "logits/rejected": -1.8361034393310547, + "logps/chosen": -681.6663818359375, + "logps/rejected": -2149.8056640625, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": -6.098496437072754, + "rewards/margins": 14.964967727661133, + "rewards/rejected": -21.063465118408203, + "step": 50310 + }, + { + "epoch": 3.0, + "step": 50310, + "total_flos": 0.0, + "train_loss": 0.0, + "train_runtime": 0.0617, + "train_samples_per_second": 6527181.256, + "train_steps_per_second": 815915.902 + } + ], + "logging_steps": 10, + "max_steps": 50310, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 1677, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}