|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997382884061764, |
|
"eval_steps": 100, |
|
"global_step": 955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0010468463752944255, |
|
"grad_norm": 4.4375, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -0.3494967222213745, |
|
"logits/rejected": -0.3728627860546112, |
|
"logps/chosen": -285.8127136230469, |
|
"logps/ref_response": -0.3494967222213745, |
|
"logps/rejected": -212.7957000732422, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 4.5, |
|
"learning_rate": 5.208333333333334e-07, |
|
"logits/chosen": -0.540075421333313, |
|
"logits/rejected": -0.54986971616745, |
|
"logps/chosen": -315.31512451171875, |
|
"logps/ref_response": -0.5399107336997986, |
|
"logps/rejected": -278.0267639160156, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4583333432674408, |
|
"rewards/chosen": 0.001649973331950605, |
|
"rewards/margins": 0.0034635968040674925, |
|
"rewards/rejected": -0.0018136235885322094, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 1.0416666666666667e-06, |
|
"logits/chosen": -0.5037816762924194, |
|
"logits/rejected": -0.5245965719223022, |
|
"logps/chosen": -306.7390441894531, |
|
"logps/ref_response": -0.5032420754432678, |
|
"logps/rejected": -271.2138671875, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.003458543913438916, |
|
"rewards/margins": 0.0031067535746842623, |
|
"rewards/rejected": 0.0003517906297929585, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 4.75, |
|
"learning_rate": 1.5625e-06, |
|
"logits/chosen": -0.5102043151855469, |
|
"logits/rejected": -0.5178056955337524, |
|
"logps/chosen": -291.02197265625, |
|
"logps/ref_response": -0.5080639123916626, |
|
"logps/rejected": -252.41531372070312, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.014292215928435326, |
|
"rewards/margins": 0.014373516663908958, |
|
"rewards/rejected": -8.130413334583864e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 3.921875, |
|
"learning_rate": 2.0833333333333334e-06, |
|
"logits/chosen": -0.48268669843673706, |
|
"logits/rejected": -0.5177565813064575, |
|
"logps/chosen": -305.90875244140625, |
|
"logps/ref_response": -0.47757530212402344, |
|
"logps/rejected": -244.60757446289062, |
|
"loss": 0.6781, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.039179086685180664, |
|
"rewards/margins": 0.04343840479850769, |
|
"rewards/rejected": -0.004259312059730291, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 3.0625, |
|
"learning_rate": 2.604166666666667e-06, |
|
"logits/chosen": -0.5464528799057007, |
|
"logits/rejected": -0.5745548605918884, |
|
"logps/chosen": -304.85235595703125, |
|
"logps/ref_response": -0.5367640256881714, |
|
"logps/rejected": -282.80804443359375, |
|
"loss": 0.6733, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.07245220243930817, |
|
"rewards/margins": 0.046217553317546844, |
|
"rewards/rejected": 0.026234647259116173, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 3.6875, |
|
"learning_rate": 3.125e-06, |
|
"logits/chosen": -0.5682042837142944, |
|
"logits/rejected": -0.5693326592445374, |
|
"logps/chosen": -290.4607849121094, |
|
"logps/ref_response": -0.5527787804603577, |
|
"logps/rejected": -254.50967407226562, |
|
"loss": 0.6554, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.12507006525993347, |
|
"rewards/margins": 0.05596587061882019, |
|
"rewards/rejected": 0.06910420954227448, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 3.625, |
|
"learning_rate": 3.6458333333333333e-06, |
|
"logits/chosen": -0.5585962533950806, |
|
"logits/rejected": -0.5734174847602844, |
|
"logps/chosen": -286.166748046875, |
|
"logps/ref_response": -0.5369429588317871, |
|
"logps/rejected": -263.13885498046875, |
|
"loss": 0.6366, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.21329982578754425, |
|
"rewards/margins": 0.146693617105484, |
|
"rewards/rejected": 0.06660620868206024, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 3.75, |
|
"learning_rate": 4.166666666666667e-06, |
|
"logits/chosen": -0.4981383681297302, |
|
"logits/rejected": -0.5249155759811401, |
|
"logps/chosen": -287.4258728027344, |
|
"logps/ref_response": -0.46965378522872925, |
|
"logps/rejected": -273.86474609375, |
|
"loss": 0.617, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.31552475690841675, |
|
"rewards/margins": 0.25278010964393616, |
|
"rewards/rejected": 0.06274466216564178, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 3.734375, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"logits/chosen": -0.5283939838409424, |
|
"logits/rejected": -0.5496431589126587, |
|
"logps/chosen": -330.2322692871094, |
|
"logps/ref_response": -0.4922845959663391, |
|
"logps/rejected": -295.63018798828125, |
|
"loss": 0.5998, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.2877245545387268, |
|
"rewards/margins": 0.3177236020565033, |
|
"rewards/rejected": -0.02999904192984104, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 3.8125, |
|
"learning_rate": 4.9997324926814375e-06, |
|
"logits/chosen": -0.5698152184486389, |
|
"logits/rejected": -0.5635516047477722, |
|
"logps/chosen": -275.736328125, |
|
"logps/ref_response": -0.533843994140625, |
|
"logps/rejected": -290.2398376464844, |
|
"loss": 0.6142, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.30928176641464233, |
|
"rewards/margins": 0.3119629919528961, |
|
"rewards/rejected": -0.0026811982970684767, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"eval_logits/chosen": -0.5543237924575806, |
|
"eval_logits/rejected": -0.548694908618927, |
|
"eval_logps/chosen": -290.523193359375, |
|
"eval_logps/ref_response": -0.5363935232162476, |
|
"eval_logps/rejected": -277.9860534667969, |
|
"eval_loss": 0.5973454713821411, |
|
"eval_rewards/accuracies": 0.7020000219345093, |
|
"eval_rewards/chosen": 0.2023972123861313, |
|
"eval_rewards/margins": 0.33334478735923767, |
|
"eval_rewards/rejected": -0.13094758987426758, |
|
"eval_runtime": 351.8267, |
|
"eval_samples_per_second": 5.685, |
|
"eval_steps_per_second": 0.355, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 4.996723692767927e-06, |
|
"logits/chosen": -0.6081199645996094, |
|
"logits/rejected": -0.6322951912879944, |
|
"logps/chosen": -289.56561279296875, |
|
"logps/ref_response": -0.5667906999588013, |
|
"logps/rejected": -277.76922607421875, |
|
"loss": 0.5906, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.20684650540351868, |
|
"rewards/margins": 0.4277339577674866, |
|
"rewards/rejected": -0.22088749706745148, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 3.34375, |
|
"learning_rate": 4.9903757462135984e-06, |
|
"logits/chosen": -0.5567010641098022, |
|
"logits/rejected": -0.5700705051422119, |
|
"logps/chosen": -262.211181640625, |
|
"logps/ref_response": -0.5169209837913513, |
|
"logps/rejected": -253.9445343017578, |
|
"loss": 0.5753, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.034964192658662796, |
|
"rewards/margins": 0.36375877261161804, |
|
"rewards/rejected": -0.32879456877708435, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 3.5, |
|
"learning_rate": 4.980697142834315e-06, |
|
"logits/chosen": -0.5261090397834778, |
|
"logits/rejected": -0.5425637364387512, |
|
"logps/chosen": -302.4659118652344, |
|
"logps/ref_response": -0.4790240228176117, |
|
"logps/rejected": -338.93597412109375, |
|
"loss": 0.5798, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.04841077700257301, |
|
"rewards/margins": 0.3843201696872711, |
|
"rewards/rejected": -0.335909366607666, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 3.015625, |
|
"learning_rate": 4.967700826904229e-06, |
|
"logits/chosen": -0.5993494391441345, |
|
"logits/rejected": -0.607743501663208, |
|
"logps/chosen": -283.1224670410156, |
|
"logps/ref_response": -0.5482783913612366, |
|
"logps/rejected": -276.7977294921875, |
|
"loss": 0.5616, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.0036001927219331264, |
|
"rewards/margins": 0.5189536213874817, |
|
"rewards/rejected": -0.5225538015365601, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 3.34375, |
|
"learning_rate": 4.951404179843963e-06, |
|
"logits/chosen": -0.6007939577102661, |
|
"logits/rejected": -0.5676769018173218, |
|
"logps/chosen": -308.45916748046875, |
|
"logps/ref_response": -0.5423828363418579, |
|
"logps/rejected": -280.7994689941406, |
|
"loss": 0.5581, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.18967030942440033, |
|
"rewards/margins": 0.5712782740592957, |
|
"rewards/rejected": -0.38160794973373413, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 3.140625, |
|
"learning_rate": 4.931828996974498e-06, |
|
"logits/chosen": -0.5421828031539917, |
|
"logits/rejected": -0.5333597660064697, |
|
"logps/chosen": -297.0065612792969, |
|
"logps/ref_response": -0.4895528256893158, |
|
"logps/rejected": -272.0807189941406, |
|
"loss": 0.5427, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.20104511082172394, |
|
"rewards/margins": 0.61830735206604, |
|
"rewards/rejected": -0.4172622263431549, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 3.46875, |
|
"learning_rate": 4.909001458367867e-06, |
|
"logits/chosen": -0.6238254308700562, |
|
"logits/rejected": -0.6083575487136841, |
|
"logps/chosen": -288.6294860839844, |
|
"logps/ref_response": -0.5753272771835327, |
|
"logps/rejected": -277.1576232910156, |
|
"loss": 0.5646, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.02497274801135063, |
|
"rewards/margins": 0.5278475880622864, |
|
"rewards/rejected": -0.5528203248977661, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 3.46875, |
|
"learning_rate": 4.882952093833628e-06, |
|
"logits/chosen": -0.6268518567085266, |
|
"logits/rejected": -0.5990904569625854, |
|
"logps/chosen": -303.6324157714844, |
|
"logps/ref_response": -0.5761692523956299, |
|
"logps/rejected": -267.112060546875, |
|
"loss": 0.5521, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.13170073926448822, |
|
"rewards/margins": 0.5248185396194458, |
|
"rewards/rejected": -0.6565192341804504, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 4.0, |
|
"learning_rate": 4.853715742087947e-06, |
|
"logits/chosen": -0.5630252957344055, |
|
"logits/rejected": -0.5427506566047668, |
|
"logps/chosen": -276.95648193359375, |
|
"logps/ref_response": -0.5028859972953796, |
|
"logps/rejected": -284.7541809082031, |
|
"loss": 0.5535, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.004456658847630024, |
|
"rewards/margins": 0.5479073524475098, |
|
"rewards/rejected": -0.5434507131576538, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 5.0625, |
|
"learning_rate": 4.821331504159906e-06, |
|
"logits/chosen": -0.5674183368682861, |
|
"logits/rejected": -0.5801523327827454, |
|
"logps/chosen": -298.1530456542969, |
|
"logps/ref_response": -0.5163358449935913, |
|
"logps/rejected": -257.4419250488281, |
|
"loss": 0.5579, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.06740345805883408, |
|
"rewards/margins": 0.5899164080619812, |
|
"rewards/rejected": -0.6573198437690735, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"eval_logits/chosen": -0.5041880011558533, |
|
"eval_logits/rejected": -0.48473650217056274, |
|
"eval_logps/chosen": -293.2984924316406, |
|
"eval_logps/ref_response": -0.5363935232162476, |
|
"eval_logps/rejected": -283.7411193847656, |
|
"eval_loss": 0.5482621788978577, |
|
"eval_rewards/accuracies": 0.7120000123977661, |
|
"eval_rewards/chosen": -0.07513303309679031, |
|
"eval_rewards/margins": 0.6313197016716003, |
|
"eval_rewards/rejected": -0.7064527869224548, |
|
"eval_runtime": 349.57, |
|
"eval_samples_per_second": 5.721, |
|
"eval_steps_per_second": 0.358, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 4.84375, |
|
"learning_rate": 4.7858426910973435e-06, |
|
"logits/chosen": -0.6099163889884949, |
|
"logits/rejected": -0.6096245050430298, |
|
"logps/chosen": -280.4328918457031, |
|
"logps/ref_response": -0.5563252568244934, |
|
"logps/rejected": -274.54486083984375, |
|
"loss": 0.5453, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.019828548654913902, |
|
"rewards/margins": 0.5767567157745361, |
|
"rewards/rejected": -0.596585214138031, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 3.578125, |
|
"learning_rate": 4.747296766042161e-06, |
|
"logits/chosen": -0.5816672444343567, |
|
"logits/rejected": -0.5607967376708984, |
|
"logps/chosen": -319.9589538574219, |
|
"logps/ref_response": -0.525614857673645, |
|
"logps/rejected": -272.82952880859375, |
|
"loss": 0.5401, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.03950881212949753, |
|
"rewards/margins": 0.63139808177948, |
|
"rewards/rejected": -0.5918892621994019, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 3.921875, |
|
"learning_rate": 4.705745280752586e-06, |
|
"logits/chosen": -0.6150011420249939, |
|
"logits/rejected": -0.572602391242981, |
|
"logps/chosen": -293.0460205078125, |
|
"logps/ref_response": -0.5675605535507202, |
|
"logps/rejected": -290.7093200683594, |
|
"loss": 0.5514, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.07407680153846741, |
|
"rewards/margins": 0.6289999485015869, |
|
"rewards/rejected": -0.7030767202377319, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 3.015625, |
|
"learning_rate": 4.661243806657256e-06, |
|
"logits/chosen": -0.5870501399040222, |
|
"logits/rejected": -0.546720564365387, |
|
"logps/chosen": -300.5980529785156, |
|
"logps/ref_response": -0.5330287218093872, |
|
"logps/rejected": -264.90716552734375, |
|
"loss": 0.5493, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.01282467134296894, |
|
"rewards/margins": 0.5539323091506958, |
|
"rewards/rejected": -0.5667570233345032, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 3.140625, |
|
"learning_rate": 4.613851860533367e-06, |
|
"logits/chosen": -0.5954620242118835, |
|
"logits/rejected": -0.5543604493141174, |
|
"logps/chosen": -293.9695739746094, |
|
"logps/ref_response": -0.5492520928382874, |
|
"logps/rejected": -260.6333312988281, |
|
"loss": 0.5657, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.10280059278011322, |
|
"rewards/margins": 0.5389178395271301, |
|
"rewards/rejected": -0.43611717224121094, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 3.765625, |
|
"learning_rate": 4.563632824908252e-06, |
|
"logits/chosen": -0.564812183380127, |
|
"logits/rejected": -0.5271375179290771, |
|
"logps/chosen": -293.28594970703125, |
|
"logps/ref_response": -0.5089389085769653, |
|
"logps/rejected": -280.13018798828125, |
|
"loss": 0.5289, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.3052422106266022, |
|
"rewards/margins": 0.8550776243209839, |
|
"rewards/rejected": -0.5498353838920593, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 4.25, |
|
"learning_rate": 4.510653863290871e-06, |
|
"logits/chosen": -0.558210015296936, |
|
"logits/rejected": -0.5351649522781372, |
|
"logps/chosen": -296.7794494628906, |
|
"logps/ref_response": -0.5091123580932617, |
|
"logps/rejected": -305.01654052734375, |
|
"loss": 0.5405, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.008460876531898975, |
|
"rewards/margins": 0.6207507848739624, |
|
"rewards/rejected": -0.6122900247573853, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 3.203125, |
|
"learning_rate": 4.454985830346574e-06, |
|
"logits/chosen": -0.6231056451797485, |
|
"logits/rejected": -0.5877543687820435, |
|
"logps/chosen": -302.84906005859375, |
|
"logps/ref_response": -0.5748014450073242, |
|
"logps/rejected": -286.6356201171875, |
|
"loss": 0.5587, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.0532672181725502, |
|
"rewards/margins": 0.5458566546440125, |
|
"rewards/rejected": -0.5991239547729492, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 3.34375, |
|
"learning_rate": 4.396703177135262e-06, |
|
"logits/chosen": -0.582170844078064, |
|
"logits/rejected": -0.5509222149848938, |
|
"logps/chosen": -287.865478515625, |
|
"logps/ref_response": -0.5320878624916077, |
|
"logps/rejected": -259.5517883300781, |
|
"loss": 0.5264, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.1213446855545044, |
|
"rewards/margins": 0.7001182436943054, |
|
"rewards/rejected": -0.5787736177444458, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 4.125, |
|
"learning_rate": 4.335883851539693e-06, |
|
"logits/chosen": -0.6001819372177124, |
|
"logits/rejected": -0.5625559091567993, |
|
"logps/chosen": -297.6165771484375, |
|
"logps/ref_response": -0.5529105067253113, |
|
"logps/rejected": -294.80816650390625, |
|
"loss": 0.5402, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.15017859637737274, |
|
"rewards/margins": 0.683876097202301, |
|
"rewards/rejected": -0.8340547680854797, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"eval_logits/chosen": -0.4637250602245331, |
|
"eval_logits/rejected": -0.4386967718601227, |
|
"eval_logps/chosen": -293.8652648925781, |
|
"eval_logps/ref_response": -0.5363935232162476, |
|
"eval_logps/rejected": -285.25445556640625, |
|
"eval_loss": 0.5354303121566772, |
|
"eval_rewards/accuracies": 0.7260000109672546, |
|
"eval_rewards/chosen": -0.1318078190088272, |
|
"eval_rewards/margins": 0.725982666015625, |
|
"eval_rewards/rejected": -0.857790470123291, |
|
"eval_runtime": 349.4289, |
|
"eval_samples_per_second": 5.724, |
|
"eval_steps_per_second": 0.358, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 4.40625, |
|
"learning_rate": 4.2726091940171055e-06, |
|
"logits/chosen": -0.549019992351532, |
|
"logits/rejected": -0.5806938409805298, |
|
"logps/chosen": -296.0178527832031, |
|
"logps/ref_response": -0.5006662607192993, |
|
"logps/rejected": -342.6523742675781, |
|
"loss": 0.5123, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.08306514471769333, |
|
"rewards/margins": 0.8439720869064331, |
|
"rewards/rejected": -0.760906994342804, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 2.953125, |
|
"learning_rate": 4.206963828813555e-06, |
|
"logits/chosen": -0.6003859043121338, |
|
"logits/rejected": -0.5595699548721313, |
|
"logps/chosen": -297.0275573730469, |
|
"logps/ref_response": -0.5563712120056152, |
|
"logps/rejected": -280.803466796875, |
|
"loss": 0.5233, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.02343413233757019, |
|
"rewards/margins": 0.791476845741272, |
|
"rewards/rejected": -0.8149110078811646, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 3.03125, |
|
"learning_rate": 4.139035550786495e-06, |
|
"logits/chosen": -0.6349459886550903, |
|
"logits/rejected": -0.5698983073234558, |
|
"logps/chosen": -290.1078186035156, |
|
"logps/ref_response": -0.5800708532333374, |
|
"logps/rejected": -261.7127685546875, |
|
"loss": 0.5278, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.08804565668106079, |
|
"rewards/margins": 0.6575459837913513, |
|
"rewards/rejected": -0.7455916404724121, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 3.953125, |
|
"learning_rate": 4.068915207986931e-06, |
|
"logits/chosen": -0.5905895233154297, |
|
"logits/rejected": -0.5241268277168274, |
|
"logps/chosen": -298.50506591796875, |
|
"logps/ref_response": -0.5407181978225708, |
|
"logps/rejected": -259.384765625, |
|
"loss": 0.5256, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.2834884226322174, |
|
"rewards/margins": 0.8159183263778687, |
|
"rewards/rejected": -1.0994068384170532, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 4.09375, |
|
"learning_rate": 3.996696580158211e-06, |
|
"logits/chosen": -0.5350117683410645, |
|
"logits/rejected": -0.509468674659729, |
|
"logps/chosen": -337.4930114746094, |
|
"logps/ref_response": -0.486247718334198, |
|
"logps/rejected": -292.4802551269531, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.055437762290239334, |
|
"rewards/margins": 0.7416700124740601, |
|
"rewards/rejected": -0.7971076965332031, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 3.140625, |
|
"learning_rate": 3.922476253313921e-06, |
|
"logits/chosen": -0.5137478113174438, |
|
"logits/rejected": -0.5199310183525085, |
|
"logps/chosen": -275.74993896484375, |
|
"logps/ref_response": -0.48780474066734314, |
|
"logps/rejected": -299.1233215332031, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.2543216347694397, |
|
"rewards/margins": 0.7176491618156433, |
|
"rewards/rejected": -0.9719708561897278, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 4.25, |
|
"learning_rate": 3.846353490562664e-06, |
|
"logits/chosen": -0.5372802019119263, |
|
"logits/rejected": -0.539850115776062, |
|
"logps/chosen": -290.45709228515625, |
|
"logps/ref_response": -0.491685152053833, |
|
"logps/rejected": -265.22857666015625, |
|
"loss": 0.5065, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.026863550767302513, |
|
"rewards/margins": 0.8847667574882507, |
|
"rewards/rejected": -0.9116303324699402, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 3.609375, |
|
"learning_rate": 3.768430099352445e-06, |
|
"logits/chosen": -0.5611374378204346, |
|
"logits/rejected": -0.5598313808441162, |
|
"logps/chosen": -306.60662841796875, |
|
"logps/ref_response": -0.5215914845466614, |
|
"logps/rejected": -280.02392578125, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.001783865736797452, |
|
"rewards/margins": 1.001586675643921, |
|
"rewards/rejected": -0.9998028874397278, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 4.625, |
|
"learning_rate": 3.6888102953122307e-06, |
|
"logits/chosen": -0.6096396446228027, |
|
"logits/rejected": -0.5705077052116394, |
|
"logps/chosen": -263.71826171875, |
|
"logps/ref_response": -0.5661150813102722, |
|
"logps/rejected": -265.1150817871094, |
|
"loss": 0.5434, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.06994754076004028, |
|
"rewards/margins": 0.7673075795173645, |
|
"rewards/rejected": -0.8372551202774048, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 3.875, |
|
"learning_rate": 3.607600562872785e-06, |
|
"logits/chosen": -0.5598580241203308, |
|
"logits/rejected": -0.5253019332885742, |
|
"logps/chosen": -286.2227478027344, |
|
"logps/ref_response": -0.5258094072341919, |
|
"logps/rejected": -277.05267333984375, |
|
"loss": 0.5112, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.047982849180698395, |
|
"rewards/margins": 0.7805670499801636, |
|
"rewards/rejected": -0.8285499811172485, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"eval_logits/chosen": -0.4029563367366791, |
|
"eval_logits/rejected": -0.37146955728530884, |
|
"eval_logps/chosen": -294.2449645996094, |
|
"eval_logps/ref_response": -0.5363935232162476, |
|
"eval_logps/rejected": -286.346923828125, |
|
"eval_loss": 0.5277438759803772, |
|
"eval_rewards/accuracies": 0.722000002861023, |
|
"eval_rewards/chosen": -0.16978110373020172, |
|
"eval_rewards/margins": 0.7972525954246521, |
|
"eval_rewards/rejected": -0.9670337438583374, |
|
"eval_runtime": 349.6206, |
|
"eval_samples_per_second": 5.72, |
|
"eval_steps_per_second": 0.358, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 3.703125, |
|
"learning_rate": 3.5249095128531863e-06, |
|
"logits/chosen": -0.5799764394760132, |
|
"logits/rejected": -0.5290526151657104, |
|
"logps/chosen": -279.68115234375, |
|
"logps/ref_response": -0.5564926862716675, |
|
"logps/rejected": -277.60308837890625, |
|
"loss": 0.5124, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.1508873999118805, |
|
"rewards/margins": 0.8393732905387878, |
|
"rewards/rejected": -0.9902607798576355, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 4.34375, |
|
"learning_rate": 3.4408477372034743e-06, |
|
"logits/chosen": -0.5675779581069946, |
|
"logits/rejected": -0.5418698191642761, |
|
"logps/chosen": -310.0582275390625, |
|
"logps/ref_response": -0.5361344218254089, |
|
"logps/rejected": -298.15447998046875, |
|
"loss": 0.5504, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.11406157165765762, |
|
"rewards/margins": 0.6680425405502319, |
|
"rewards/rejected": -0.7821041345596313, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 4.1875, |
|
"learning_rate": 3.355527661097728e-06, |
|
"logits/chosen": -0.569171130657196, |
|
"logits/rejected": -0.5680087804794312, |
|
"logps/chosen": -281.50506591796875, |
|
"logps/ref_response": -0.5477866530418396, |
|
"logps/rejected": -282.78619384765625, |
|
"loss": 0.5291, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.24793243408203125, |
|
"rewards/margins": 0.6574320197105408, |
|
"rewards/rejected": -0.9053643941879272, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 3.859375, |
|
"learning_rate": 3.269063392575352e-06, |
|
"logits/chosen": -0.5341087579727173, |
|
"logits/rejected": -0.5249664187431335, |
|
"logps/chosen": -328.975341796875, |
|
"logps/ref_response": -0.5050511360168457, |
|
"logps/rejected": -307.42388916015625, |
|
"loss": 0.5158, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.021743040531873703, |
|
"rewards/margins": 0.7577444314956665, |
|
"rewards/rejected": -0.7794874310493469, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 3.828125, |
|
"learning_rate": 3.181570569931697e-06, |
|
"logits/chosen": -0.5577148199081421, |
|
"logits/rejected": -0.5402424931526184, |
|
"logps/chosen": -287.3887939453125, |
|
"logps/ref_response": -0.5224987864494324, |
|
"logps/rejected": -283.7501525878906, |
|
"loss": 0.5068, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.2252788096666336, |
|
"rewards/margins": 0.7505895495414734, |
|
"rewards/rejected": -0.9758683443069458, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 2.765625, |
|
"learning_rate": 3.09316620706208e-06, |
|
"logits/chosen": -0.5113469362258911, |
|
"logits/rejected": -0.5197252631187439, |
|
"logps/chosen": -308.82244873046875, |
|
"logps/ref_response": -0.4874509274959564, |
|
"logps/rejected": -290.3113708496094, |
|
"loss": 0.4968, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.09814532101154327, |
|
"rewards/margins": 0.974043071269989, |
|
"rewards/rejected": -1.072188377380371, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 3.515625, |
|
"learning_rate": 3.0039685369660785e-06, |
|
"logits/chosen": -0.513633131980896, |
|
"logits/rejected": -0.46772414445877075, |
|
"logps/chosen": -282.93438720703125, |
|
"logps/ref_response": -0.4861488938331604, |
|
"logps/rejected": -267.72796630859375, |
|
"loss": 0.5329, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.05372269079089165, |
|
"rewards/margins": 0.8479040861129761, |
|
"rewards/rejected": -0.7941814661026001, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 3.75, |
|
"learning_rate": 2.91409685362137e-06, |
|
"logits/chosen": -0.5242967009544373, |
|
"logits/rejected": -0.5119736790657043, |
|
"logps/chosen": -280.0267028808594, |
|
"logps/ref_response": -0.5061747431755066, |
|
"logps/rejected": -277.7090759277344, |
|
"loss": 0.5085, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.04931460693478584, |
|
"rewards/margins": 0.8077665567398071, |
|
"rewards/rejected": -0.8570810556411743, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 2.8125, |
|
"learning_rate": 2.8236713524386085e-06, |
|
"logits/chosen": -0.587617039680481, |
|
"logits/rejected": -0.5464242100715637, |
|
"logps/chosen": -280.3719482421875, |
|
"logps/ref_response": -0.5583964586257935, |
|
"logps/rejected": -258.3996276855469, |
|
"loss": 0.5056, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": 0.056199681013822556, |
|
"rewards/margins": 0.8544878959655762, |
|
"rewards/rejected": -0.7982882261276245, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 3.546875, |
|
"learning_rate": 2.7328129695107205e-06, |
|
"logits/chosen": -0.4950012266635895, |
|
"logits/rejected": -0.49973049759864807, |
|
"logps/chosen": -266.21197509765625, |
|
"logps/ref_response": -0.46682921051979065, |
|
"logps/rejected": -276.02667236328125, |
|
"loss": 0.5319, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.011867323890328407, |
|
"rewards/margins": 1.0351099967956543, |
|
"rewards/rejected": -1.023242712020874, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"eval_logits/chosen": -0.3727329671382904, |
|
"eval_logits/rejected": -0.3377152681350708, |
|
"eval_logps/chosen": -294.09320068359375, |
|
"eval_logps/ref_response": -0.5363935232162476, |
|
"eval_logps/rejected": -286.45953369140625, |
|
"eval_loss": 0.521223783493042, |
|
"eval_rewards/accuracies": 0.7260000109672546, |
|
"eval_rewards/chosen": -0.1546054631471634, |
|
"eval_rewards/margins": 0.8236899375915527, |
|
"eval_rewards/rejected": -0.9782953858375549, |
|
"eval_runtime": 349.5098, |
|
"eval_samples_per_second": 5.722, |
|
"eval_steps_per_second": 0.358, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 3.046875, |
|
"learning_rate": 2.641643219871597e-06, |
|
"logits/chosen": -0.5270382165908813, |
|
"logits/rejected": -0.484760582447052, |
|
"logps/chosen": -314.9234619140625, |
|
"logps/ref_response": -0.5090646743774414, |
|
"logps/rejected": -299.1803894042969, |
|
"loss": 0.5149, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.1974947154521942, |
|
"rewards/margins": 0.7501281499862671, |
|
"rewards/rejected": -0.9476228952407837, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 4.53125, |
|
"learning_rate": 2.5502840349805074e-06, |
|
"logits/chosen": -0.5182399749755859, |
|
"logits/rejected": -0.5034407377243042, |
|
"logps/chosen": -310.6617126464844, |
|
"logps/ref_response": -0.5057616829872131, |
|
"logps/rejected": -298.3763427734375, |
|
"loss": 0.538, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.06424389034509659, |
|
"rewards/margins": 0.8863626718521118, |
|
"rewards/rejected": -0.9506064653396606, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 3.6875, |
|
"learning_rate": 2.4588575996495797e-06, |
|
"logits/chosen": -0.47386473417282104, |
|
"logits/rejected": -0.47526755928993225, |
|
"logps/chosen": -272.9559631347656, |
|
"logps/ref_response": -0.45075368881225586, |
|
"logps/rejected": -263.4909973144531, |
|
"loss": 0.5251, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.2501363754272461, |
|
"rewards/margins": 0.891460120677948, |
|
"rewards/rejected": -1.1415965557098389, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 2.367486188632446e-06, |
|
"logits/chosen": -0.5168323516845703, |
|
"logits/rejected": -0.5130370855331421, |
|
"logps/chosen": -286.60076904296875, |
|
"logps/ref_response": -0.5035119652748108, |
|
"logps/rejected": -326.2984619140625, |
|
"loss": 0.5101, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.09159872680902481, |
|
"rewards/margins": 0.8820840120315552, |
|
"rewards/rejected": -0.9736827611923218, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 3.28125, |
|
"learning_rate": 2.276292003092593e-06, |
|
"logits/chosen": -0.538984477519989, |
|
"logits/rejected": -0.5137041807174683, |
|
"logps/chosen": -258.5223083496094, |
|
"logps/ref_response": -0.5067554712295532, |
|
"logps/rejected": -266.6609802246094, |
|
"loss": 0.4938, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.0642676055431366, |
|
"rewards/margins": 0.9660905003547668, |
|
"rewards/rejected": -1.030358076095581, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 3.234375, |
|
"learning_rate": 2.1853970071701415e-06, |
|
"logits/chosen": -0.5292374491691589, |
|
"logits/rejected": -0.49377211928367615, |
|
"logps/chosen": -279.6850891113281, |
|
"logps/ref_response": -0.5059608817100525, |
|
"logps/rejected": -281.2428283691406, |
|
"loss": 0.5137, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.03345666453242302, |
|
"rewards/margins": 0.8658466339111328, |
|
"rewards/rejected": -0.8993034362792969, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 4.0, |
|
"learning_rate": 2.0949227648656194e-06, |
|
"logits/chosen": -0.5543760657310486, |
|
"logits/rejected": -0.5229381918907166, |
|
"logps/chosen": -296.0806579589844, |
|
"logps/ref_response": -0.5283219218254089, |
|
"logps/rejected": -263.48150634765625, |
|
"loss": 0.5265, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.09281423687934875, |
|
"rewards/margins": 0.9194073677062988, |
|
"rewards/rejected": -1.0122215747833252, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 3.5625, |
|
"learning_rate": 2.00499027745888e-06, |
|
"logits/chosen": -0.5253512263298035, |
|
"logits/rejected": -0.5014483332633972, |
|
"logps/chosen": -300.3424377441406, |
|
"logps/ref_response": -0.5130476355552673, |
|
"logps/rejected": -299.5146789550781, |
|
"loss": 0.5382, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.05144411325454712, |
|
"rewards/margins": 0.8135349154472351, |
|
"rewards/rejected": -0.8649789690971375, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 3.46875, |
|
"learning_rate": 1.915719821680624e-06, |
|
"logits/chosen": -0.5522564053535461, |
|
"logits/rejected": -0.4926326870918274, |
|
"logps/chosen": -288.0102844238281, |
|
"logps/ref_response": -0.5210384130477905, |
|
"logps/rejected": -284.1330261230469, |
|
"loss": 0.5143, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.19294333457946777, |
|
"rewards/margins": 0.9110462069511414, |
|
"rewards/rejected": -0.7181028127670288, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 3.5, |
|
"learning_rate": 1.8272307888529276e-06, |
|
"logits/chosen": -0.4761735796928406, |
|
"logits/rejected": -0.4301334321498871, |
|
"logps/chosen": -264.5964660644531, |
|
"logps/ref_response": -0.4653666913509369, |
|
"logps/rejected": -281.8071594238281, |
|
"loss": 0.5155, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.022416314110159874, |
|
"rewards/margins": 0.9364219903945923, |
|
"rewards/rejected": -0.9588383436203003, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"eval_logits/chosen": -0.36081573367118835, |
|
"eval_logits/rejected": -0.32467401027679443, |
|
"eval_logps/chosen": -293.3980407714844, |
|
"eval_logps/ref_response": -0.5363935232162476, |
|
"eval_logps/rejected": -285.9612121582031, |
|
"eval_loss": 0.5195037722587585, |
|
"eval_rewards/accuracies": 0.7360000014305115, |
|
"eval_rewards/chosen": -0.0850897878408432, |
|
"eval_rewards/margins": 0.8433744311332703, |
|
"eval_rewards/rejected": -0.9284642338752747, |
|
"eval_runtime": 349.4276, |
|
"eval_samples_per_second": 5.724, |
|
"eval_steps_per_second": 0.358, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 3.0, |
|
"learning_rate": 1.739641525213929e-06, |
|
"logits/chosen": -0.5045549869537354, |
|
"logits/rejected": -0.4906612038612366, |
|
"logps/chosen": -267.28338623046875, |
|
"logps/ref_response": -0.500705897808075, |
|
"logps/rejected": -273.13714599609375, |
|
"loss": 0.4986, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.08845983445644379, |
|
"rewards/margins": 0.9310785531997681, |
|
"rewards/rejected": -1.0195385217666626, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 3.0625, |
|
"learning_rate": 1.6530691736402317e-06, |
|
"logits/chosen": -0.5177065134048462, |
|
"logits/rejected": -0.4805786609649658, |
|
"logps/chosen": -295.038818359375, |
|
"logps/ref_response": -0.502620279788971, |
|
"logps/rejected": -284.8276672363281, |
|
"loss": 0.5092, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.21657638251781464, |
|
"rewards/margins": 0.9273554086685181, |
|
"rewards/rejected": -1.1439317464828491, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 3.625, |
|
"learning_rate": 1.5676295169786864e-06, |
|
"logits/chosen": -0.5329315662384033, |
|
"logits/rejected": -0.4890086054801941, |
|
"logps/chosen": -288.74847412109375, |
|
"logps/ref_response": -0.522256076335907, |
|
"logps/rejected": -274.2325439453125, |
|
"loss": 0.5196, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.2333780825138092, |
|
"rewards/margins": 0.8650426864624023, |
|
"rewards/rejected": -1.0984207391738892, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 3.21875, |
|
"learning_rate": 1.4834368231970922e-06, |
|
"logits/chosen": -0.5609028935432434, |
|
"logits/rejected": -0.5060838460922241, |
|
"logps/chosen": -288.317138671875, |
|
"logps/ref_response": -0.5478745698928833, |
|
"logps/rejected": -274.0264892578125, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.1753380298614502, |
|
"rewards/margins": 0.7557013034820557, |
|
"rewards/rejected": -0.9310394525527954, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 3.171875, |
|
"learning_rate": 1.4006036925609245e-06, |
|
"logits/chosen": -0.5304352045059204, |
|
"logits/rejected": -0.48552340269088745, |
|
"logps/chosen": -300.5797424316406, |
|
"logps/ref_response": -0.5103051662445068, |
|
"logps/rejected": -250.87216186523438, |
|
"loss": 0.5263, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.19611187279224396, |
|
"rewards/margins": 0.8849571347236633, |
|
"rewards/rejected": -1.0810692310333252, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 3.453125, |
|
"learning_rate": 1.3192409070404582e-06, |
|
"logits/chosen": -0.5464252233505249, |
|
"logits/rejected": -0.5194587111473083, |
|
"logps/chosen": -304.4057312011719, |
|
"logps/ref_response": -0.5286127328872681, |
|
"logps/rejected": -306.74737548828125, |
|
"loss": 0.5106, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.0011399202048778534, |
|
"rewards/margins": 0.8308441042900085, |
|
"rewards/rejected": -0.8297042846679688, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 4.5, |
|
"learning_rate": 1.2394572821496953e-06, |
|
"logits/chosen": -0.5439696311950684, |
|
"logits/rejected": -0.5098680257797241, |
|
"logps/chosen": -277.79193115234375, |
|
"logps/ref_response": -0.5491371154785156, |
|
"logps/rejected": -259.3212890625, |
|
"loss": 0.5184, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.07701723277568817, |
|
"rewards/margins": 0.8310605883598328, |
|
"rewards/rejected": -0.9080777168273926, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 3.109375, |
|
"learning_rate": 1.1613595214152713e-06, |
|
"logits/chosen": -0.5734778642654419, |
|
"logits/rejected": -0.526314377784729, |
|
"logps/chosen": -287.5583801269531, |
|
"logps/ref_response": -0.5694643259048462, |
|
"logps/rejected": -276.66607666015625, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.16065016388893127, |
|
"rewards/margins": 0.8544554710388184, |
|
"rewards/rejected": -1.0151057243347168, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 2.578125, |
|
"learning_rate": 1.0850520736699362e-06, |
|
"logits/chosen": -0.5160936117172241, |
|
"logits/rejected": -0.48213791847229004, |
|
"logps/chosen": -341.7447814941406, |
|
"logps/ref_response": -0.4945286810398102, |
|
"logps/rejected": -317.0802001953125, |
|
"loss": 0.5268, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.1648830622434616, |
|
"rewards/margins": 0.9320821762084961, |
|
"rewards/rejected": -1.0969650745391846, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 3.34375, |
|
"learning_rate": 1.0106369933615043e-06, |
|
"logits/chosen": -0.5604196786880493, |
|
"logits/rejected": -0.5098714828491211, |
|
"logps/chosen": -316.64105224609375, |
|
"logps/ref_response": -0.5506774187088013, |
|
"logps/rejected": -264.15411376953125, |
|
"loss": 0.5113, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.22138896584510803, |
|
"rewards/margins": 0.7301830053329468, |
|
"rewards/rejected": -0.9515719413757324, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"eval_logits/chosen": -0.34110894799232483, |
|
"eval_logits/rejected": -0.30364856123924255, |
|
"eval_logps/chosen": -294.4884948730469, |
|
"eval_logps/ref_response": -0.5363935232162476, |
|
"eval_logps/rejected": -287.16522216796875, |
|
"eval_loss": 0.5173361301422119, |
|
"eval_rewards/accuracies": 0.734000027179718, |
|
"eval_rewards/chosen": -0.19413326680660248, |
|
"eval_rewards/margins": 0.8547297120094299, |
|
"eval_rewards/rejected": -1.0488630533218384, |
|
"eval_runtime": 349.4515, |
|
"eval_samples_per_second": 5.723, |
|
"eval_steps_per_second": 0.358, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 3.890625, |
|
"learning_rate": 9.382138040640714e-07, |
|
"logits/chosen": -0.5672627687454224, |
|
"logits/rejected": -0.5198173522949219, |
|
"logps/chosen": -265.9830627441406, |
|
"logps/ref_response": -0.5634459257125854, |
|
"logps/rejected": -280.0369873046875, |
|
"loss": 0.532, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.16655750572681427, |
|
"rewards/margins": 0.8667360544204712, |
|
"rewards/rejected": -1.0332934856414795, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 3.25, |
|
"learning_rate": 8.678793653740633e-07, |
|
"logits/chosen": -0.492758572101593, |
|
"logits/rejected": -0.4843314290046692, |
|
"logps/chosen": -264.08270263671875, |
|
"logps/ref_response": -0.49243393540382385, |
|
"logps/rejected": -264.4930114746094, |
|
"loss": 0.5104, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.08720338344573975, |
|
"rewards/margins": 0.9140432476997375, |
|
"rewards/rejected": -1.001246690750122, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 2.609375, |
|
"learning_rate": 7.997277433690984e-07, |
|
"logits/chosen": -0.5135891437530518, |
|
"logits/rejected": -0.455097496509552, |
|
"logps/chosen": -302.38580322265625, |
|
"logps/ref_response": -0.4944031834602356, |
|
"logps/rejected": -288.3606872558594, |
|
"loss": 0.5071, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.15266835689544678, |
|
"rewards/margins": 0.821180522441864, |
|
"rewards/rejected": -0.973848819732666, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 2.890625, |
|
"learning_rate": 7.338500848029603e-07, |
|
"logits/chosen": -0.4669191241264343, |
|
"logits/rejected": -0.47497326135635376, |
|
"logps/chosen": -292.3653564453125, |
|
"logps/ref_response": -0.4282347559928894, |
|
"logps/rejected": -276.3725280761719, |
|
"loss": 0.4932, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.010840972885489464, |
|
"rewards/margins": 0.8605710864067078, |
|
"rewards/rejected": -0.8714120984077454, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 3.234375, |
|
"learning_rate": 6.70334495204884e-07, |
|
"logits/chosen": -0.5066109299659729, |
|
"logits/rejected": -0.4780009388923645, |
|
"logps/chosen": -324.728515625, |
|
"logps/ref_response": -0.49645256996154785, |
|
"logps/rejected": -287.18304443359375, |
|
"loss": 0.5074, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.02283366583287716, |
|
"rewards/margins": 0.854231059551239, |
|
"rewards/rejected": -0.8313972353935242, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 3.4375, |
|
"learning_rate": 6.092659210462232e-07, |
|
"logits/chosen": -0.5308446884155273, |
|
"logits/rejected": -0.5082138776779175, |
|
"logps/chosen": -270.0063781738281, |
|
"logps/ref_response": -0.5222411751747131, |
|
"logps/rejected": -269.8592224121094, |
|
"loss": 0.534, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.09487895667552948, |
|
"rewards/margins": 0.7679542303085327, |
|
"rewards/rejected": -0.8628333210945129, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 3.109375, |
|
"learning_rate": 5.507260361320738e-07, |
|
"logits/chosen": -0.5149004459381104, |
|
"logits/rejected": -0.5143811702728271, |
|
"logps/chosen": -285.86566162109375, |
|
"logps/ref_response": -0.50932776927948, |
|
"logps/rejected": -280.4559020996094, |
|
"loss": 0.5136, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.04052960127592087, |
|
"rewards/margins": 0.7234804034233093, |
|
"rewards/rejected": -0.6829507946968079, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 2.9375, |
|
"learning_rate": 4.947931323697983e-07, |
|
"logits/chosen": -0.5068045854568481, |
|
"logits/rejected": -0.47291022539138794, |
|
"logps/chosen": -287.3854675292969, |
|
"logps/ref_response": -0.49121037125587463, |
|
"logps/rejected": -281.04669189453125, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.09810696542263031, |
|
"rewards/margins": 0.7164817452430725, |
|
"rewards/rejected": -0.8145886659622192, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 3.40625, |
|
"learning_rate": 4.4154201506053985e-07, |
|
"logits/chosen": -0.5332745313644409, |
|
"logits/rejected": -0.5107340812683105, |
|
"logps/chosen": -301.6036071777344, |
|
"logps/ref_response": -0.5042006373405457, |
|
"logps/rejected": -265.80133056640625, |
|
"loss": 0.5114, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.08654189109802246, |
|
"rewards/margins": 0.8664189577102661, |
|
"rewards/rejected": -0.9529608488082886, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 2.828125, |
|
"learning_rate": 3.910439028537638e-07, |
|
"logits/chosen": -0.5402854681015015, |
|
"logits/rejected": -0.48599618673324585, |
|
"logps/chosen": -349.26226806640625, |
|
"logps/ref_response": -0.5149141550064087, |
|
"logps/rejected": -303.9481201171875, |
|
"loss": 0.5268, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.020410016179084778, |
|
"rewards/margins": 0.7255326509475708, |
|
"rewards/rejected": -0.7051225900650024, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"eval_logits/chosen": -0.3452620208263397, |
|
"eval_logits/rejected": -0.30824077129364014, |
|
"eval_logps/chosen": -293.0043640136719, |
|
"eval_logps/ref_response": -0.5363935232162476, |
|
"eval_logps/rejected": -285.70001220703125, |
|
"eval_loss": 0.5176524519920349, |
|
"eval_rewards/accuracies": 0.722000002861023, |
|
"eval_rewards/chosen": -0.045722391456365585, |
|
"eval_rewards/margins": 0.8566184043884277, |
|
"eval_rewards/rejected": -0.902340829372406, |
|
"eval_runtime": 349.422, |
|
"eval_samples_per_second": 5.724, |
|
"eval_steps_per_second": 0.358, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 2.78125, |
|
"learning_rate": 3.4336633249862084e-07, |
|
"logits/chosen": -0.564243733882904, |
|
"logits/rejected": -0.48312124609947205, |
|
"logps/chosen": -321.1978759765625, |
|
"logps/ref_response": -0.5519742369651794, |
|
"logps/rejected": -290.4500732421875, |
|
"loss": 0.5015, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.08782587200403214, |
|
"rewards/margins": 0.8487712740898132, |
|
"rewards/rejected": -0.9365970492362976, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 3.203125, |
|
"learning_rate": 2.98573068519539e-07, |
|
"logits/chosen": -0.5403026342391968, |
|
"logits/rejected": -0.5190576910972595, |
|
"logps/chosen": -308.6977844238281, |
|
"logps/ref_response": -0.5307375192642212, |
|
"logps/rejected": -295.208984375, |
|
"loss": 0.5196, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.004915142897516489, |
|
"rewards/margins": 0.8321554064750671, |
|
"rewards/rejected": -0.8370705842971802, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 3.453125, |
|
"learning_rate": 2.5672401793681854e-07, |
|
"logits/chosen": -0.5584547519683838, |
|
"logits/rejected": -0.5359379053115845, |
|
"logps/chosen": -276.21697998046875, |
|
"logps/ref_response": -0.5466696619987488, |
|
"logps/rejected": -271.29046630859375, |
|
"loss": 0.5047, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.08783929795026779, |
|
"rewards/margins": 0.8796290159225464, |
|
"rewards/rejected": -0.9674683809280396, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 3.109375, |
|
"learning_rate": 2.178751501463036e-07, |
|
"logits/chosen": -0.5241914987564087, |
|
"logits/rejected": -0.5036768317222595, |
|
"logps/chosen": -316.0185852050781, |
|
"logps/ref_response": -0.5086795091629028, |
|
"logps/rejected": -309.330810546875, |
|
"loss": 0.5019, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.10169823467731476, |
|
"rewards/margins": 0.8505386114120483, |
|
"rewards/rejected": -0.9522367715835571, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 3.296875, |
|
"learning_rate": 1.820784220652766e-07, |
|
"logits/chosen": -0.5777777433395386, |
|
"logits/rejected": -0.5282770991325378, |
|
"logps/chosen": -347.5041809082031, |
|
"logps/ref_response": -0.5546728372573853, |
|
"logps/rejected": -281.29705810546875, |
|
"loss": 0.5108, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.10623917728662491, |
|
"rewards/margins": 0.9054906964302063, |
|
"rewards/rejected": -0.7992514371871948, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 1.4938170864468636e-07, |
|
"logits/chosen": -0.4989868998527527, |
|
"logits/rejected": -0.4683281481266022, |
|
"logps/chosen": -291.2259216308594, |
|
"logps/ref_response": -0.4814940392971039, |
|
"logps/rejected": -270.903564453125, |
|
"loss": 0.4862, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.06208313629031181, |
|
"rewards/margins": 0.9479352235794067, |
|
"rewards/rejected": -1.010018229484558, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 3.828125, |
|
"learning_rate": 1.1982873884064466e-07, |
|
"logits/chosen": -0.4674592614173889, |
|
"logits/rejected": -0.46020442247390747, |
|
"logps/chosen": -288.8802795410156, |
|
"logps/ref_response": -0.463235467672348, |
|
"logps/rejected": -279.07818603515625, |
|
"loss": 0.5202, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.09532221406698227, |
|
"rewards/margins": 0.7154585719108582, |
|
"rewards/rejected": -0.8107808232307434, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 3.640625, |
|
"learning_rate": 9.345903713082305e-08, |
|
"logits/chosen": -0.550748884677887, |
|
"logits/rejected": -0.5384425520896912, |
|
"logps/chosen": -316.01953125, |
|
"logps/ref_response": -0.5406745672225952, |
|
"logps/rejected": -282.6295471191406, |
|
"loss": 0.5288, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.04705243557691574, |
|
"rewards/margins": 0.6842992901802063, |
|
"rewards/rejected": -0.7313517332077026, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 4.21875, |
|
"learning_rate": 7.030787065396866e-08, |
|
"logits/chosen": -0.5192651152610779, |
|
"logits/rejected": -0.47466206550598145, |
|
"logps/chosen": -320.68121337890625, |
|
"logps/ref_response": -0.5117658376693726, |
|
"logps/rejected": -295.110107421875, |
|
"loss": 0.5163, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.031226161867380142, |
|
"rewards/margins": 0.7437794208526611, |
|
"rewards/rejected": -0.7750056385993958, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 3.015625, |
|
"learning_rate": 5.0406202043228604e-08, |
|
"logits/chosen": -0.5355256795883179, |
|
"logits/rejected": -0.5220322012901306, |
|
"logps/chosen": -334.91900634765625, |
|
"logps/ref_response": -0.5195636749267578, |
|
"logps/rejected": -276.53973388671875, |
|
"loss": 0.4923, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.11498089134693146, |
|
"rewards/margins": 1.026011347770691, |
|
"rewards/rejected": -0.9110305905342102, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"eval_logits/chosen": -0.34425824880599976, |
|
"eval_logits/rejected": -0.30724215507507324, |
|
"eval_logps/chosen": -293.0645446777344, |
|
"eval_logps/ref_response": -0.5363935232162476, |
|
"eval_logps/rejected": -285.7690734863281, |
|
"eval_loss": 0.5175051093101501, |
|
"eval_rewards/accuracies": 0.7279999852180481, |
|
"eval_rewards/chosen": -0.05173807963728905, |
|
"eval_rewards/margins": 0.8575091361999512, |
|
"eval_rewards/rejected": -0.9092472791671753, |
|
"eval_runtime": 349.4689, |
|
"eval_samples_per_second": 5.723, |
|
"eval_steps_per_second": 0.358, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 2.953125, |
|
"learning_rate": 3.378064801637687e-08, |
|
"logits/chosen": -0.5745955109596252, |
|
"logits/rejected": -0.5213441848754883, |
|
"logps/chosen": -316.2192077636719, |
|
"logps/ref_response": -0.561827540397644, |
|
"logps/rejected": -315.3379821777344, |
|
"loss": 0.5218, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.04833642765879631, |
|
"rewards/margins": 0.7893211245536804, |
|
"rewards/rejected": -0.7409847378730774, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 2.78125, |
|
"learning_rate": 2.0453443778310766e-08, |
|
"logits/chosen": -0.5022112131118774, |
|
"logits/rejected": -0.45878568291664124, |
|
"logps/chosen": -330.7922668457031, |
|
"logps/ref_response": -0.4732615351676941, |
|
"logps/rejected": -306.924560546875, |
|
"loss": 0.5181, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.02221493050456047, |
|
"rewards/margins": 0.9629158973693848, |
|
"rewards/rejected": -0.9407010078430176, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 2.6875, |
|
"learning_rate": 1.0442413283435759e-08, |
|
"logits/chosen": -0.5029438734054565, |
|
"logits/rejected": -0.45212322473526, |
|
"logps/chosen": -319.1156921386719, |
|
"logps/ref_response": -0.4792579114437103, |
|
"logps/rejected": -280.00390625, |
|
"loss": 0.5078, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.054530493915081024, |
|
"rewards/margins": 1.144339680671692, |
|
"rewards/rejected": -1.0898091793060303, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 2.375, |
|
"learning_rate": 3.760945397705828e-09, |
|
"logits/chosen": -0.5288008451461792, |
|
"logits/rejected": -0.4695435166358948, |
|
"logps/chosen": -292.11932373046875, |
|
"logps/ref_response": -0.5234506726264954, |
|
"logps/rejected": -262.4600830078125, |
|
"loss": 0.4922, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.12431895732879639, |
|
"rewards/margins": 1.0587154626846313, |
|
"rewards/rejected": -0.9343963861465454, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 2.8125, |
|
"learning_rate": 4.1797599220405605e-10, |
|
"logits/chosen": -0.5448582172393799, |
|
"logits/rejected": -0.521163821220398, |
|
"logps/chosen": -296.86529541015625, |
|
"logps/ref_response": -0.5367287397384644, |
|
"logps/rejected": -280.74432373046875, |
|
"loss": 0.5133, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.027158012613654137, |
|
"rewards/margins": 0.8814530372619629, |
|
"rewards/rejected": -0.8542949557304382, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9997382884061764, |
|
"step": 955, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5383632463934533, |
|
"train_runtime": 19109.9128, |
|
"train_samples_per_second": 3.199, |
|
"train_steps_per_second": 0.05 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 955, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|