|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9957173447537473, |
|
"eval_steps": 500, |
|
"global_step": 466, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 4.9375, |
|
"learning_rate": 4.994320979557256e-06, |
|
"log_odds_chosen": 0.0681995302438736, |
|
"log_odds_ratio": -0.7082546353340149, |
|
"logits/chosen": -3.0977654457092285, |
|
"logits/rejected": -3.0908126831054688, |
|
"logps/chosen": -0.7222177386283875, |
|
"logps/rejected": -0.7692006230354309, |
|
"loss": 0.6137, |
|
"nll_loss": 0.4647584855556488, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.144443541765213, |
|
"rewards/margins": 0.009396565146744251, |
|
"rewards/rejected": -0.153840109705925, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 4.977309719247571e-06, |
|
"log_odds_chosen": 0.04795869067311287, |
|
"log_odds_ratio": -0.7321950793266296, |
|
"logits/chosen": -3.070446729660034, |
|
"logits/rejected": -3.076430559158325, |
|
"logps/chosen": -0.7525266408920288, |
|
"logps/rejected": -0.7740827202796936, |
|
"loss": 0.5618, |
|
"nll_loss": 0.42791947722435, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.15050533413887024, |
|
"rewards/margins": 0.004311202093958855, |
|
"rewards/rejected": -0.15481653809547424, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 4.03125, |
|
"learning_rate": 4.9490435049069925e-06, |
|
"log_odds_chosen": 0.0981246829032898, |
|
"log_odds_ratio": -0.6918349266052246, |
|
"logits/chosen": -3.0190162658691406, |
|
"logits/rejected": -3.0307419300079346, |
|
"logps/chosen": -0.70826256275177, |
|
"logps/rejected": -0.7406646013259888, |
|
"loss": 0.5694, |
|
"nll_loss": 0.43190431594848633, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.14165252447128296, |
|
"rewards/margins": 0.006480403244495392, |
|
"rewards/rejected": -0.14813292026519775, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 4.0625, |
|
"learning_rate": 4.909650756062782e-06, |
|
"log_odds_chosen": -0.031760524958372116, |
|
"log_odds_ratio": -0.7639201879501343, |
|
"logits/chosen": -3.0301899909973145, |
|
"logits/rejected": -3.013631582260132, |
|
"logps/chosen": -0.7143345475196838, |
|
"logps/rejected": -0.7065819501876831, |
|
"loss": 0.5928, |
|
"nll_loss": 0.4166012704372406, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.14286690950393677, |
|
"rewards/margins": -0.001550512621179223, |
|
"rewards/rejected": -0.14131638407707214, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 4.0, |
|
"learning_rate": 4.8593104424957275e-06, |
|
"log_odds_chosen": 0.1677912026643753, |
|
"log_odds_ratio": -0.6516054272651672, |
|
"logits/chosen": -2.9732978343963623, |
|
"logits/rejected": -2.9736242294311523, |
|
"logps/chosen": -0.6973217725753784, |
|
"logps/rejected": -0.7871943712234497, |
|
"loss": 0.5525, |
|
"nll_loss": 0.3650514483451843, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.1394643485546112, |
|
"rewards/margins": 0.017974523827433586, |
|
"rewards/rejected": -0.15743887424468994, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 4.59375, |
|
"learning_rate": 4.7982512711416995e-06, |
|
"log_odds_chosen": 0.13981187343597412, |
|
"log_odds_ratio": -0.6755875945091248, |
|
"logits/chosen": -2.983802556991577, |
|
"logits/rejected": -2.9678592681884766, |
|
"logps/chosen": -0.6857394576072693, |
|
"logps/rejected": -0.7719460725784302, |
|
"loss": 0.5749, |
|
"nll_loss": 0.43894845247268677, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.13714787364006042, |
|
"rewards/margins": 0.017241323366761208, |
|
"rewards/rejected": -0.15438921749591827, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 4.6875, |
|
"learning_rate": 4.726750647026569e-06, |
|
"log_odds_chosen": 0.060419272631406784, |
|
"log_odds_ratio": -0.7113076448440552, |
|
"logits/chosen": -3.0847103595733643, |
|
"logits/rejected": -3.0847182273864746, |
|
"logps/chosen": -0.6998961567878723, |
|
"logps/rejected": -0.7425668835639954, |
|
"loss": 0.6077, |
|
"nll_loss": 0.45065993070602417, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.13997924327850342, |
|
"rewards/margins": 0.008534139953553677, |
|
"rewards/rejected": -0.14851337671279907, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 4.5, |
|
"learning_rate": 4.64513341295515e-06, |
|
"log_odds_chosen": 0.14743533730506897, |
|
"log_odds_ratio": -0.664400577545166, |
|
"logits/chosen": -3.072849750518799, |
|
"logits/rejected": -3.064882755279541, |
|
"logps/chosen": -0.6606765985488892, |
|
"logps/rejected": -0.7464475035667419, |
|
"loss": 0.5869, |
|
"nll_loss": 0.40598875284194946, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.1321353018283844, |
|
"rewards/margins": 0.017154179513454437, |
|
"rewards/rejected": -0.14928947389125824, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 3.703125, |
|
"learning_rate": 4.553770373680062e-06, |
|
"log_odds_chosen": 0.1585932970046997, |
|
"log_odds_ratio": -0.6709593534469604, |
|
"logits/chosen": -3.08207631111145, |
|
"logits/rejected": -3.066760540008545, |
|
"logps/chosen": -0.699985146522522, |
|
"logps/rejected": -0.7977792620658875, |
|
"loss": 0.5663, |
|
"nll_loss": 0.4425368905067444, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.13999703526496887, |
|
"rewards/margins": 0.019558843225240707, |
|
"rewards/rejected": -0.15955588221549988, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 3.546875, |
|
"learning_rate": 4.453076611255507e-06, |
|
"log_odds_chosen": 0.14516516029834747, |
|
"log_odds_ratio": -0.6720137596130371, |
|
"logits/chosen": -3.114285469055176, |
|
"logits/rejected": -3.1085124015808105, |
|
"logps/chosen": -0.6726102828979492, |
|
"logps/rejected": -0.7475894689559937, |
|
"loss": 0.5877, |
|
"nll_loss": 0.4581482410430908, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.13452205061912537, |
|
"rewards/margins": 0.014995847828686237, |
|
"rewards/rejected": -0.14951792359352112, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 3.9375, |
|
"learning_rate": 4.343509599229697e-06, |
|
"log_odds_chosen": 0.07305195182561874, |
|
"log_odds_ratio": -0.7057459950447083, |
|
"logits/chosen": -3.0203895568847656, |
|
"logits/rejected": -3.048060417175293, |
|
"logps/chosen": -0.7229627966880798, |
|
"logps/rejected": -0.759824812412262, |
|
"loss": 0.5618, |
|
"nll_loss": 0.4208614230155945, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.1445925384759903, |
|
"rewards/margins": 0.007372408173978329, |
|
"rewards/rejected": -0.1519649475812912, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 3.640625, |
|
"learning_rate": 4.22556712424355e-06, |
|
"log_odds_chosen": 0.20443418622016907, |
|
"log_odds_ratio": -0.6452816724777222, |
|
"logits/chosen": -3.0728299617767334, |
|
"logits/rejected": -3.073429822921753, |
|
"logps/chosen": -0.6431705355644226, |
|
"logps/rejected": -0.7486152648925781, |
|
"loss": 0.5715, |
|
"nll_loss": 0.44801267981529236, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.12863411009311676, |
|
"rewards/margins": 0.02108895219862461, |
|
"rewards/rejected": -0.14972305297851562, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 4.099785024478276e-06, |
|
"log_odds_chosen": 0.10702254623174667, |
|
"log_odds_ratio": -0.6948094367980957, |
|
"logits/chosen": -3.104290246963501, |
|
"logits/rejected": -3.114654779434204, |
|
"logps/chosen": -0.708793580532074, |
|
"logps/rejected": -0.7508866786956787, |
|
"loss": 0.5651, |
|
"nll_loss": 0.408848375082016, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.1417587250471115, |
|
"rewards/margins": 0.008418610319495201, |
|
"rewards/rejected": -0.15017732977867126, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 4.25, |
|
"learning_rate": 3.9667347552265945e-06, |
|
"log_odds_chosen": 0.0996425449848175, |
|
"log_odds_ratio": -0.7059202194213867, |
|
"logits/chosen": -3.094669818878174, |
|
"logits/rejected": -3.0972161293029785, |
|
"logps/chosen": -0.6547704935073853, |
|
"logps/rejected": -0.7121762037277222, |
|
"loss": 0.5688, |
|
"nll_loss": 0.3880278468132019, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.1309541016817093, |
|
"rewards/margins": 0.011481141671538353, |
|
"rewards/rejected": -0.1424352377653122, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 4.03125, |
|
"learning_rate": 3.8270207926477e-06, |
|
"log_odds_chosen": 0.1352790892124176, |
|
"log_odds_ratio": -0.6838266253471375, |
|
"logits/chosen": -3.096400737762451, |
|
"logits/rejected": -3.08994460105896, |
|
"logps/chosen": -0.7112253904342651, |
|
"logps/rejected": -0.7838465571403503, |
|
"loss": 0.5347, |
|
"nll_loss": 0.3965539336204529, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.1422450691461563, |
|
"rewards/margins": 0.014524241909384727, |
|
"rewards/rejected": -0.1567693054676056, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 3.953125, |
|
"learning_rate": 3.68127788750129e-06, |
|
"log_odds_chosen": 0.05867941305041313, |
|
"log_odds_ratio": -0.7102705836296082, |
|
"logits/chosen": -3.093903064727783, |
|
"logits/rejected": -3.092332363128662, |
|
"logps/chosen": -0.7055046558380127, |
|
"logps/rejected": -0.7451164722442627, |
|
"loss": 0.5676, |
|
"nll_loss": 0.40267667174339294, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.1411009281873703, |
|
"rewards/margins": 0.007922361604869366, |
|
"rewards/rejected": -0.14902329444885254, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 3.8125, |
|
"learning_rate": 3.5301681813375343e-06, |
|
"log_odds_chosen": 0.12995854020118713, |
|
"log_odds_ratio": -0.6886736154556274, |
|
"logits/chosen": -3.115527629852295, |
|
"logits/rejected": -3.131301164627075, |
|
"logps/chosen": -0.6788499355316162, |
|
"logps/rejected": -0.7528234720230103, |
|
"loss": 0.5571, |
|
"nll_loss": 0.4142919182777405, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.13576999306678772, |
|
"rewards/margins": 0.014794701710343361, |
|
"rewards/rejected": -0.15056470036506653, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 4.5, |
|
"learning_rate": 3.3743781982447533e-06, |
|
"log_odds_chosen": 0.19499924778938293, |
|
"log_odds_ratio": -0.6581609845161438, |
|
"logits/chosen": -3.1499171257019043, |
|
"logits/rejected": -3.1490375995635986, |
|
"logps/chosen": -0.6707606911659241, |
|
"logps/rejected": -0.7726969718933105, |
|
"loss": 0.5664, |
|
"nll_loss": 0.40519580245018005, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.1341521441936493, |
|
"rewards/margins": 0.020387252792716026, |
|
"rewards/rejected": -0.15453937649726868, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 3.640625, |
|
"learning_rate": 3.2146157258219534e-06, |
|
"log_odds_chosen": 0.14189398288726807, |
|
"log_odds_ratio": -0.674296498298645, |
|
"logits/chosen": -3.1837196350097656, |
|
"logits/rejected": -3.186469793319702, |
|
"logps/chosen": -0.6766322255134583, |
|
"logps/rejected": -0.7536298036575317, |
|
"loss": 0.5414, |
|
"nll_loss": 0.44522786140441895, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.13532646000385284, |
|
"rewards/margins": 0.015399503521621227, |
|
"rewards/rejected": -0.15072596073150635, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 3.84375, |
|
"learning_rate": 3.0516065995466336e-06, |
|
"log_odds_chosen": 0.1995691955089569, |
|
"log_odds_ratio": -0.6483355760574341, |
|
"logits/chosen": -3.134974956512451, |
|
"logits/rejected": -3.1205341815948486, |
|
"logps/chosen": -0.6498687863349915, |
|
"logps/rejected": -0.7565893530845642, |
|
"loss": 0.5465, |
|
"nll_loss": 0.38540342450141907, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.12997373938560486, |
|
"rewards/margins": 0.021344134584069252, |
|
"rewards/rejected": -0.15131787955760956, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 3.890625, |
|
"learning_rate": 2.8860914051471722e-06, |
|
"log_odds_chosen": 0.06047767400741577, |
|
"log_odds_ratio": -0.7144695520401001, |
|
"logits/chosen": -3.132312774658203, |
|
"logits/rejected": -3.1476542949676514, |
|
"logps/chosen": -0.6728143692016602, |
|
"logps/rejected": -0.6911222338676453, |
|
"loss": 0.5295, |
|
"nll_loss": 0.3443630337715149, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.1345628798007965, |
|
"rewards/margins": 0.003661577822640538, |
|
"rewards/rejected": -0.13822445273399353, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 3.734375, |
|
"learning_rate": 2.7188221139616303e-06, |
|
"log_odds_chosen": 0.11169638484716415, |
|
"log_odds_ratio": -0.6887539625167847, |
|
"logits/chosen": -3.1210544109344482, |
|
"logits/rejected": -3.1303868293762207, |
|
"logps/chosen": -0.6995309591293335, |
|
"logps/rejected": -0.7801494598388672, |
|
"loss": 0.5703, |
|
"nll_loss": 0.4030347764492035, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.13990618288516998, |
|
"rewards/margins": 0.016123712062835693, |
|
"rewards/rejected": -0.15602989494800568, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 2.550558666569279e-06, |
|
"log_odds_chosen": 0.09025579690933228, |
|
"log_odds_ratio": -0.7007181644439697, |
|
"logits/chosen": -3.1683359146118164, |
|
"logits/rejected": -3.1638879776000977, |
|
"logps/chosen": -0.6835473775863647, |
|
"logps/rejected": -0.742916464805603, |
|
"loss": 0.5612, |
|
"nll_loss": 0.4019767642021179, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.13670948147773743, |
|
"rewards/margins": 0.011873816139996052, |
|
"rewards/rejected": -0.1485833078622818, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 2.3820655202161237e-06, |
|
"log_odds_chosen": 0.4793614447116852, |
|
"log_odds_ratio": -0.548845648765564, |
|
"logits/chosen": -3.1354401111602783, |
|
"logits/rejected": -3.124065637588501, |
|
"logps/chosen": -0.5931288003921509, |
|
"logps/rejected": -0.8161810636520386, |
|
"loss": 0.5134, |
|
"nll_loss": 0.40906891226768494, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.11862574517726898, |
|
"rewards/margins": 0.04461048170924187, |
|
"rewards/rejected": -0.16323623061180115, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 3.96875, |
|
"learning_rate": 2.214108175720246e-06, |
|
"log_odds_chosen": 0.7576763033866882, |
|
"log_odds_ratio": -0.4282462000846863, |
|
"logits/chosen": -3.118053674697876, |
|
"logits/rejected": -3.1379125118255615, |
|
"logps/chosen": -0.49083226919174194, |
|
"logps/rejected": -0.853580117225647, |
|
"loss": 0.459, |
|
"nll_loss": 0.3588988184928894, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.09816645830869675, |
|
"rewards/margins": 0.07254956662654877, |
|
"rewards/rejected": -0.17071601748466492, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 4.28125, |
|
"learning_rate": 2.0474496996359676e-06, |
|
"log_odds_chosen": 0.733576774597168, |
|
"log_odds_ratio": -0.4507887363433838, |
|
"logits/chosen": -3.137216567993164, |
|
"logits/rejected": -3.1352245807647705, |
|
"logps/chosen": -0.5455455183982849, |
|
"logps/rejected": -0.9026147723197937, |
|
"loss": 0.4736, |
|
"nll_loss": 0.38031843304634094, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.10910910367965698, |
|
"rewards/margins": 0.07141385227441788, |
|
"rewards/rejected": -0.18052296340465546, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 4.46875, |
|
"learning_rate": 1.882847257477398e-06, |
|
"log_odds_chosen": 0.8710149526596069, |
|
"log_odds_ratio": -0.4077950417995453, |
|
"logits/chosen": -3.1231517791748047, |
|
"logits/rejected": -3.1361801624298096, |
|
"logps/chosen": -0.5210133194923401, |
|
"logps/rejected": -0.9504634737968445, |
|
"loss": 0.4573, |
|
"nll_loss": 0.3601577877998352, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.10420265048742294, |
|
"rewards/margins": 0.0858900398015976, |
|
"rewards/rejected": -0.19009268283843994, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 4.625, |
|
"learning_rate": 1.7210486737516947e-06, |
|
"log_odds_chosen": 0.8500790596008301, |
|
"log_odds_ratio": -0.409542977809906, |
|
"logits/chosen": -3.1291608810424805, |
|
"logits/rejected": -3.1443474292755127, |
|
"logps/chosen": -0.5176515579223633, |
|
"logps/rejected": -0.9122639894485474, |
|
"loss": 0.441, |
|
"nll_loss": 0.38534316420555115, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.10353031009435654, |
|
"rewards/margins": 0.07892249524593353, |
|
"rewards/rejected": -0.18245279788970947, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 4.21875, |
|
"learning_rate": 1.5627890344305256e-06, |
|
"log_odds_chosen": 0.7590165734291077, |
|
"log_odds_ratio": -0.4255223274230957, |
|
"logits/chosen": -3.1419529914855957, |
|
"logits/rejected": -3.1484503746032715, |
|
"logps/chosen": -0.5282884836196899, |
|
"logps/rejected": -0.8793913125991821, |
|
"loss": 0.4385, |
|
"nll_loss": 0.3436318039894104, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.10565771162509918, |
|
"rewards/margins": 0.07022054493427277, |
|
"rewards/rejected": -0.17587824165821075, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 4.40625, |
|
"learning_rate": 1.4087873472954638e-06, |
|
"log_odds_chosen": 0.8060294985771179, |
|
"log_odds_ratio": -0.429814875125885, |
|
"logits/chosen": -3.1374385356903076, |
|
"logits/rejected": -3.137779712677002, |
|
"logps/chosen": -0.5177971124649048, |
|
"logps/rejected": -0.9097731709480286, |
|
"loss": 0.4492, |
|
"nll_loss": 0.35047072172164917, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.1035594493150711, |
|
"rewards/margins": 0.07839521020650864, |
|
"rewards/rejected": -0.18195465207099915, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 1.2597432753300753e-06, |
|
"log_odds_chosen": 0.9330030679702759, |
|
"log_odds_ratio": -0.3743920624256134, |
|
"logits/chosen": -3.1239657402038574, |
|
"logits/rejected": -3.1310207843780518, |
|
"logps/chosen": -0.4897725582122803, |
|
"logps/rejected": -0.9372097253799438, |
|
"loss": 0.4426, |
|
"nll_loss": 0.35372328758239746, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.09795451909303665, |
|
"rewards/margins": 0.08948741853237152, |
|
"rewards/rejected": -0.18744193017482758, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 4.34375, |
|
"learning_rate": 1.116333957999608e-06, |
|
"log_odds_chosen": 0.7943806648254395, |
|
"log_odds_ratio": -0.43320780992507935, |
|
"logits/chosen": -3.1169140338897705, |
|
"logits/rejected": -3.1135616302490234, |
|
"logps/chosen": -0.5348241925239563, |
|
"logps/rejected": -0.9205295443534851, |
|
"loss": 0.4582, |
|
"nll_loss": 0.38884326815605164, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.1069648265838623, |
|
"rewards/margins": 0.07714107632637024, |
|
"rewards/rejected": -0.18410590291023254, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 4.28125, |
|
"learning_rate": 9.792109348599036e-07, |
|
"log_odds_chosen": 0.8558658361434937, |
|
"log_odds_ratio": -0.40837377309799194, |
|
"logits/chosen": -3.1396241188049316, |
|
"logits/rejected": -3.1491103172302246, |
|
"logps/chosen": -0.5247679948806763, |
|
"logps/rejected": -0.9536547660827637, |
|
"loss": 0.4342, |
|
"nll_loss": 0.34443485736846924, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.1049535870552063, |
|
"rewards/margins": 0.08577735722064972, |
|
"rewards/rejected": -0.1907309591770172, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 4.21875, |
|
"learning_rate": 8.48997185472226e-07, |
|
"log_odds_chosen": 0.8510887026786804, |
|
"log_odds_ratio": -0.4203471541404724, |
|
"logits/chosen": -3.1297011375427246, |
|
"logits/rejected": -3.139014720916748, |
|
"logps/chosen": -0.5276013612747192, |
|
"logps/rejected": -0.9365232586860657, |
|
"loss": 0.4576, |
|
"nll_loss": 0.35662391781806946, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.10552027076482773, |
|
"rewards/margins": 0.08178436756134033, |
|
"rewards/rejected": -0.18730461597442627, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 4.75, |
|
"learning_rate": 7.26284299072334e-07, |
|
"log_odds_chosen": 0.8458667993545532, |
|
"log_odds_ratio": -0.4165124297142029, |
|
"logits/chosen": -3.1148293018341064, |
|
"logits/rejected": -3.1330175399780273, |
|
"logps/chosen": -0.523215651512146, |
|
"logps/rejected": -0.9373798370361328, |
|
"loss": 0.4358, |
|
"nll_loss": 0.36000293493270874, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.10464314371347427, |
|
"rewards/margins": 0.08283283561468124, |
|
"rewards/rejected": -0.1874759942293167, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 4.1875, |
|
"learning_rate": 6.11629786852592e-07, |
|
"log_odds_chosen": 0.7515332102775574, |
|
"log_odds_ratio": -0.4506092965602875, |
|
"logits/chosen": -3.124948501586914, |
|
"logits/rejected": -3.1396474838256836, |
|
"logps/chosen": -0.5801728963851929, |
|
"logps/rejected": -0.9574558138847351, |
|
"loss": 0.4377, |
|
"nll_loss": 0.3803955018520355, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.11603458225727081, |
|
"rewards/margins": 0.07545658200979233, |
|
"rewards/rejected": -0.19149115681648254, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 4.3125, |
|
"learning_rate": 5.055545490679981e-07, |
|
"log_odds_chosen": 0.8198745846748352, |
|
"log_odds_ratio": -0.4333067834377289, |
|
"logits/chosen": -3.0795700550079346, |
|
"logits/rejected": -3.0752415657043457, |
|
"logps/chosen": -0.5191539525985718, |
|
"logps/rejected": -0.9393302798271179, |
|
"loss": 0.4312, |
|
"nll_loss": 0.3498368263244629, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.10383079200983047, |
|
"rewards/margins": 0.08403525501489639, |
|
"rewards/rejected": -0.18786606192588806, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 4.46875, |
|
"learning_rate": 4.0854050847362966e-07, |
|
"log_odds_chosen": 0.752915620803833, |
|
"log_odds_ratio": -0.4345216751098633, |
|
"logits/chosen": -3.094586133956909, |
|
"logits/rejected": -3.128570318222046, |
|
"logps/chosen": -0.4940599501132965, |
|
"logps/rejected": -0.8385321497917175, |
|
"loss": 0.4401, |
|
"nll_loss": 0.3326614499092102, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.09881198406219482, |
|
"rewards/margins": 0.06889443844556808, |
|
"rewards/rejected": -0.1677064150571823, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 4.59375, |
|
"learning_rate": 3.2102842084530293e-07, |
|
"log_odds_chosen": 0.8905293345451355, |
|
"log_odds_ratio": -0.40456423163414, |
|
"logits/chosen": -3.128862142562866, |
|
"logits/rejected": -3.1354432106018066, |
|
"logps/chosen": -0.48209333419799805, |
|
"logps/rejected": -0.8838127851486206, |
|
"loss": 0.4485, |
|
"nll_loss": 0.34660106897354126, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.09641867876052856, |
|
"rewards/margins": 0.08034388720989227, |
|
"rewards/rejected": -0.17676255106925964, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 4.125, |
|
"learning_rate": 2.4341587253072035e-07, |
|
"log_odds_chosen": 0.7810487747192383, |
|
"log_odds_ratio": -0.4349869191646576, |
|
"logits/chosen": -3.108931064605713, |
|
"logits/rejected": -3.120337963104248, |
|
"logps/chosen": -0.5262182354927063, |
|
"logps/rejected": -0.8968960046768188, |
|
"loss": 0.439, |
|
"nll_loss": 0.3392297923564911, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.10524364560842514, |
|
"rewards/margins": 0.07413554936647415, |
|
"rewards/rejected": -0.1793791949748993, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 3.796875, |
|
"learning_rate": 1.7605547412867574e-07, |
|
"log_odds_chosen": 0.8104718923568726, |
|
"log_odds_ratio": -0.43349266052246094, |
|
"logits/chosen": -3.1305861473083496, |
|
"logits/rejected": -3.1289594173431396, |
|
"logps/chosen": -0.532878577709198, |
|
"logps/rejected": -0.9315230250358582, |
|
"loss": 0.4151, |
|
"nll_loss": 0.31806549429893494, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.10657572746276855, |
|
"rewards/margins": 0.07972888648509979, |
|
"rewards/rejected": -0.18630459904670715, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 4.3125, |
|
"learning_rate": 1.1925325850281416e-07, |
|
"log_odds_chosen": 0.8517113924026489, |
|
"log_odds_ratio": -0.41829895973205566, |
|
"logits/chosen": -3.1101014614105225, |
|
"logits/rejected": -3.1129114627838135, |
|
"logps/chosen": -0.5812093019485474, |
|
"logps/rejected": -1.0427311658859253, |
|
"loss": 0.4593, |
|
"nll_loss": 0.40889596939086914, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.11624185740947723, |
|
"rewards/margins": 0.09230439364910126, |
|
"rewards/rejected": -0.2085462361574173, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 4.40625, |
|
"learning_rate": 7.326729040812136e-08, |
|
"log_odds_chosen": 0.8634054064750671, |
|
"log_odds_ratio": -0.41465049982070923, |
|
"logits/chosen": -3.142824172973633, |
|
"logits/rejected": -3.1401774883270264, |
|
"logps/chosen": -0.5450412631034851, |
|
"logps/rejected": -0.9720172882080078, |
|
"loss": 0.4621, |
|
"nll_loss": 0.39653775095939636, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.10900826752185822, |
|
"rewards/margins": 0.0853951945900917, |
|
"rewards/rejected": -0.19440343976020813, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 3.830649404690939e-08, |
|
"log_odds_chosen": 0.900564968585968, |
|
"log_odds_ratio": -0.4068581461906433, |
|
"logits/chosen": -3.128530979156494, |
|
"logits/rejected": -3.1158556938171387, |
|
"logps/chosen": -0.503007709980011, |
|
"logps/rejected": -0.9345412254333496, |
|
"loss": 0.4293, |
|
"nll_loss": 0.33139926195144653, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.10060155391693115, |
|
"rewards/margins": 0.08630671352148056, |
|
"rewards/rejected": -0.1869082748889923, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 4.46875, |
|
"learning_rate": 1.452970388096192e-08, |
|
"log_odds_chosen": 0.8053537607192993, |
|
"log_odds_ratio": -0.4206915497779846, |
|
"logits/chosen": -3.1261043548583984, |
|
"logits/rejected": -3.133706569671631, |
|
"logps/chosen": -0.5194380283355713, |
|
"logps/rejected": -0.9027393460273743, |
|
"loss": 0.4336, |
|
"nll_loss": 0.37930217385292053, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.10388760268688202, |
|
"rewards/margins": 0.07666026055812836, |
|
"rewards/rejected": -0.18054786324501038, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 4.40625, |
|
"learning_rate": 2.044943012210754e-09, |
|
"log_odds_chosen": 0.8168908357620239, |
|
"log_odds_ratio": -0.4145738482475281, |
|
"logits/chosen": -3.13958740234375, |
|
"logits/rejected": -3.1476328372955322, |
|
"logps/chosen": -0.5368055105209351, |
|
"logps/rejected": -0.9489006996154785, |
|
"loss": 0.4614, |
|
"nll_loss": 0.3538859188556671, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.10736110061407089, |
|
"rewards/margins": 0.0824190229177475, |
|
"rewards/rejected": -0.18978014588356018, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 466, |
|
"total_flos": 0.0, |
|
"train_loss": 0.508239566000746, |
|
"train_runtime": 6462.3532, |
|
"train_samples_per_second": 4.62, |
|
"train_steps_per_second": 0.072 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 466, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|