|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9977426636568849, |
|
"eval_steps": 500, |
|
"global_step": 221, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 3.3125, |
|
"learning_rate": 4.9747829807701e-06, |
|
"log_odds_chosen": 0.29668617248535156, |
|
"log_odds_ratio": -0.6379951238632202, |
|
"logits/chosen": -3.2007384300231934, |
|
"logits/rejected": -3.1898930072784424, |
|
"logps/chosen": -0.8916305303573608, |
|
"logps/rejected": -1.0567858219146729, |
|
"loss": 0.6627, |
|
"nll_loss": 0.5468634366989136, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.1783261001110077, |
|
"rewards/margins": 0.033031076192855835, |
|
"rewards/rejected": -0.2113572061061859, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.234375, |
|
"learning_rate": 4.89964064152747e-06, |
|
"log_odds_chosen": 0.3181908428668976, |
|
"log_odds_ratio": -0.6044929623603821, |
|
"logits/chosen": -3.215482711791992, |
|
"logits/rejected": -3.215388059616089, |
|
"logps/chosen": -0.8658155202865601, |
|
"logps/rejected": -1.0561679601669312, |
|
"loss": 0.6162, |
|
"nll_loss": 0.4879694879055023, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.17316310107707977, |
|
"rewards/margins": 0.03807050734758377, |
|
"rewards/rejected": -0.21123358607292175, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.3125, |
|
"learning_rate": 4.7760888749230414e-06, |
|
"log_odds_chosen": 0.3909495174884796, |
|
"log_odds_ratio": -0.6076517701148987, |
|
"logits/chosen": -3.2152676582336426, |
|
"logits/rejected": -3.202721357345581, |
|
"logps/chosen": -0.8426260948181152, |
|
"logps/rejected": -1.061783790588379, |
|
"loss": 0.6037, |
|
"nll_loss": 0.4774637222290039, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.16852520406246185, |
|
"rewards/margins": 0.04383154585957527, |
|
"rewards/rejected": -0.21235676109790802, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 2.96875, |
|
"learning_rate": 4.6066201667762944e-06, |
|
"log_odds_chosen": 0.3192257285118103, |
|
"log_odds_ratio": -0.6141721606254578, |
|
"logits/chosen": -3.200482130050659, |
|
"logits/rejected": -3.1900203227996826, |
|
"logps/chosen": -0.8548553586006165, |
|
"logps/rejected": -1.034092903137207, |
|
"loss": 0.6009, |
|
"nll_loss": 0.4767337441444397, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.17097108066082, |
|
"rewards/margins": 0.0358474999666214, |
|
"rewards/rejected": -0.2068185806274414, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 2.84375, |
|
"learning_rate": 4.3946533136249926e-06, |
|
"log_odds_chosen": 0.3027415871620178, |
|
"log_odds_ratio": -0.6166602969169617, |
|
"logits/chosen": -3.206148147583008, |
|
"logits/rejected": -3.1972875595092773, |
|
"logps/chosen": -0.8349093198776245, |
|
"logps/rejected": -1.0070207118988037, |
|
"loss": 0.5918, |
|
"nll_loss": 0.4716118276119232, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.16698187589645386, |
|
"rewards/margins": 0.03442227095365524, |
|
"rewards/rejected": -0.2014041393995285, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 3.09375, |
|
"learning_rate": 4.1444644532387485e-06, |
|
"log_odds_chosen": 0.48492059111595154, |
|
"log_odds_ratio": -0.5473419427871704, |
|
"logits/chosen": -3.209733247756958, |
|
"logits/rejected": -3.202362537384033, |
|
"logps/chosen": -0.8019172549247742, |
|
"logps/rejected": -1.0743623971939087, |
|
"loss": 0.5974, |
|
"nll_loss": 0.476468950510025, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.16038347780704498, |
|
"rewards/margins": 0.05448903515934944, |
|
"rewards/rejected": -0.21487247943878174, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.28125, |
|
"learning_rate": 3.861100799460336e-06, |
|
"log_odds_chosen": 0.331600546836853, |
|
"log_odds_ratio": -0.6023644208908081, |
|
"logits/chosen": -3.1663670539855957, |
|
"logits/rejected": -3.150857925415039, |
|
"logps/chosen": -0.8181543350219727, |
|
"logps/rejected": -0.9955471754074097, |
|
"loss": 0.603, |
|
"nll_loss": 0.4812262952327728, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.163630872964859, |
|
"rewards/margins": 0.03547856956720352, |
|
"rewards/rejected": -0.19910944998264313, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.328125, |
|
"learning_rate": 3.550278821654866e-06, |
|
"log_odds_chosen": 0.477538526058197, |
|
"log_odds_ratio": -0.5380920171737671, |
|
"logits/chosen": -3.194234848022461, |
|
"logits/rejected": -3.1841280460357666, |
|
"logps/chosen": -0.8073774576187134, |
|
"logps/rejected": -1.0695488452911377, |
|
"loss": 0.5619, |
|
"nll_loss": 0.47237372398376465, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.16147547960281372, |
|
"rewards/margins": 0.05243431776762009, |
|
"rewards/rejected": -0.2139098197221756, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 3.390625, |
|
"learning_rate": 3.218268922855452e-06, |
|
"log_odds_chosen": 0.3934742212295532, |
|
"log_odds_ratio": -0.598767101764679, |
|
"logits/chosen": -3.1895227432250977, |
|
"logits/rejected": -3.173553943634033, |
|
"logps/chosen": -0.8475399017333984, |
|
"logps/rejected": -1.0798330307006836, |
|
"loss": 0.6021, |
|
"nll_loss": 0.4789814054965973, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.16950799524784088, |
|
"rewards/margins": 0.046458613127470016, |
|
"rewards/rejected": -0.2159666121006012, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.921875, |
|
"learning_rate": 2.871768943064129e-06, |
|
"log_odds_chosen": 0.44187504053115845, |
|
"log_odds_ratio": -0.5802451372146606, |
|
"logits/chosen": -3.1786134243011475, |
|
"logits/rejected": -3.1717491149902344, |
|
"logps/chosen": -0.8232837915420532, |
|
"logps/rejected": -1.0571515560150146, |
|
"loss": 0.5994, |
|
"nll_loss": 0.49746760725975037, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.16465675830841064, |
|
"rewards/margins": 0.046773575246334076, |
|
"rewards/rejected": -0.21143031120300293, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.75, |
|
"learning_rate": 2.517769039603744e-06, |
|
"log_odds_chosen": 0.45674100518226624, |
|
"log_odds_ratio": -0.5514575242996216, |
|
"logits/chosen": -3.192737102508545, |
|
"logits/rejected": -3.182710647583008, |
|
"logps/chosen": -0.7476301789283752, |
|
"logps/rejected": -0.9913870692253113, |
|
"loss": 0.5818, |
|
"nll_loss": 0.4433298707008362, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.14952602982521057, |
|
"rewards/margins": 0.04875140264630318, |
|
"rewards/rejected": -0.19827742874622345, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 3.34375, |
|
"learning_rate": 2.163410670372652e-06, |
|
"log_odds_chosen": 0.4035864770412445, |
|
"log_odds_ratio": -0.6150745153427124, |
|
"logits/chosen": -3.193054676055908, |
|
"logits/rejected": -3.178786277770996, |
|
"logps/chosen": -0.816441535949707, |
|
"logps/rejected": -1.0266228914260864, |
|
"loss": 0.5753, |
|
"nll_loss": 0.44871068000793457, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.16328832507133484, |
|
"rewards/margins": 0.042036257684230804, |
|
"rewards/rejected": -0.20532457530498505, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 3.078125, |
|
"learning_rate": 1.8158425248197931e-06, |
|
"log_odds_chosen": 0.3830532431602478, |
|
"log_odds_ratio": -0.5974889993667603, |
|
"logits/chosen": -3.196091890335083, |
|
"logits/rejected": -3.1858651638031006, |
|
"logps/chosen": -0.8129725456237793, |
|
"logps/rejected": -1.0201164484024048, |
|
"loss": 0.5808, |
|
"nll_loss": 0.4634559154510498, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.1625945270061493, |
|
"rewards/margins": 0.041428789496421814, |
|
"rewards/rejected": -0.20402328670024872, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 3.375, |
|
"learning_rate": 1.482076309033254e-06, |
|
"log_odds_chosen": 0.6047395467758179, |
|
"log_odds_ratio": -0.5417571663856506, |
|
"logits/chosen": -3.196877956390381, |
|
"logits/rejected": -3.181436061859131, |
|
"logps/chosen": -0.7740004062652588, |
|
"logps/rejected": -1.068457007408142, |
|
"loss": 0.5764, |
|
"nll_loss": 0.5040202140808105, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.15480007231235504, |
|
"rewards/margins": 0.058891307562589645, |
|
"rewards/rejected": -0.21369138360023499, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 2.9375, |
|
"learning_rate": 1.1688452942784592e-06, |
|
"log_odds_chosen": 0.44741934537887573, |
|
"log_odds_ratio": -0.5705805420875549, |
|
"logits/chosen": -3.1841094493865967, |
|
"logits/rejected": -3.182647705078125, |
|
"logps/chosen": -0.8209434747695923, |
|
"logps/rejected": -1.067769169807434, |
|
"loss": 0.5718, |
|
"nll_loss": 0.4761679768562317, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.1641887128353119, |
|
"rewards/margins": 0.04936511814594269, |
|
"rewards/rejected": -0.21355381608009338, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 3.171875, |
|
"learning_rate": 8.824684825733865e-07, |
|
"log_odds_chosen": 0.3429742455482483, |
|
"log_odds_ratio": -0.6090758442878723, |
|
"logits/chosen": -3.207231044769287, |
|
"logits/rejected": -3.193406581878662, |
|
"logps/chosen": -0.8023210763931274, |
|
"logps/rejected": -0.9921668767929077, |
|
"loss": 0.5897, |
|
"nll_loss": 0.4722462594509125, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.16046421229839325, |
|
"rewards/margins": 0.03796914964914322, |
|
"rewards/rejected": -0.19843336939811707, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 4.25, |
|
"learning_rate": 6.28723129572247e-07, |
|
"log_odds_chosen": 0.3038043677806854, |
|
"log_odds_ratio": -0.6364859342575073, |
|
"logits/chosen": -3.2201812267303467, |
|
"logits/rejected": -3.2117972373962402, |
|
"logps/chosen": -0.7899866104125977, |
|
"logps/rejected": -0.9470660090446472, |
|
"loss": 0.614, |
|
"nll_loss": 0.47518712282180786, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.15799733996391296, |
|
"rewards/margins": 0.03141586109995842, |
|
"rewards/rejected": -0.18941320478916168, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 4.0, |
|
"learning_rate": 4.127281964319446e-07, |
|
"log_odds_chosen": 0.42332348227500916, |
|
"log_odds_ratio": -0.5784670114517212, |
|
"logits/chosen": -3.194354295730591, |
|
"logits/rejected": -3.1853244304656982, |
|
"logps/chosen": -0.7676048278808594, |
|
"logps/rejected": -0.984086811542511, |
|
"loss": 0.5862, |
|
"nll_loss": 0.4467201232910156, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.15352095663547516, |
|
"rewards/margins": 0.04329640045762062, |
|
"rewards/rejected": -0.19681736826896667, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 3.15625, |
|
"learning_rate": 2.388410818585263e-07, |
|
"log_odds_chosen": 0.47039803862571716, |
|
"log_odds_ratio": -0.5836545825004578, |
|
"logits/chosen": -3.205718994140625, |
|
"logits/rejected": -3.188554286956787, |
|
"logps/chosen": -0.7774848937988281, |
|
"logps/rejected": -1.0028108358383179, |
|
"loss": 0.5743, |
|
"nll_loss": 0.4325433671474457, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.1554969847202301, |
|
"rewards/margins": 0.045065198093652725, |
|
"rewards/rejected": -0.20056216418743134, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 2.9375, |
|
"learning_rate": 1.1056971762161584e-07, |
|
"log_odds_chosen": 0.2785649299621582, |
|
"log_odds_ratio": -0.643887460231781, |
|
"logits/chosen": -3.1912004947662354, |
|
"logits/rejected": -3.1815104484558105, |
|
"logps/chosen": -0.87229984998703, |
|
"logps/rejected": -1.022983193397522, |
|
"loss": 0.5796, |
|
"nll_loss": 0.4835774004459381, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.17445996403694153, |
|
"rewards/margins": 0.030136678367853165, |
|
"rewards/rejected": -0.20459666848182678, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 3.03125, |
|
"learning_rate": 3.050180088809973e-08, |
|
"log_odds_chosen": 0.30176714062690735, |
|
"log_odds_ratio": -0.6279340386390686, |
|
"logits/chosen": -3.187382221221924, |
|
"logits/rejected": -3.172898054122925, |
|
"logps/chosen": -0.842138946056366, |
|
"logps/rejected": -1.0073583126068115, |
|
"loss": 0.5913, |
|
"nll_loss": 0.4963778555393219, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.16842779517173767, |
|
"rewards/margins": 0.03304388374090195, |
|
"rewards/rejected": -0.20147165656089783, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 3.34375, |
|
"learning_rate": 2.525910147516131e-10, |
|
"log_odds_chosen": 0.4221881031990051, |
|
"log_odds_ratio": -0.5777419805526733, |
|
"logits/chosen": -3.206446886062622, |
|
"logits/rejected": -3.1952741146087646, |
|
"logps/chosen": -0.7956362962722778, |
|
"logps/rejected": -1.0130151510238647, |
|
"loss": 0.6099, |
|
"nll_loss": 0.4894731640815735, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.15912725031375885, |
|
"rewards/margins": 0.04347577691078186, |
|
"rewards/rejected": -0.2026030272245407, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 221, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5939142045931579, |
|
"train_runtime": 4430.1575, |
|
"train_samples_per_second": 3.195, |
|
"train_steps_per_second": 0.05 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 221, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|