JW17's picture
Model save
881dc3e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9977426636568849,
"eval_steps": 500,
"global_step": 221,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"grad_norm": 3.3125,
"learning_rate": 4.9747829807701e-06,
"log_odds_chosen": 0.29668617248535156,
"log_odds_ratio": -0.6379951238632202,
"logits/chosen": -3.2007384300231934,
"logits/rejected": -3.1898930072784424,
"logps/chosen": -0.8916305303573608,
"logps/rejected": -1.0567858219146729,
"loss": 0.6627,
"nll_loss": 0.5468634366989136,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.1783261001110077,
"rewards/margins": 0.033031076192855835,
"rewards/rejected": -0.2113572061061859,
"step": 10
},
{
"epoch": 0.09,
"grad_norm": 3.234375,
"learning_rate": 4.89964064152747e-06,
"log_odds_chosen": 0.3181908428668976,
"log_odds_ratio": -0.6044929623603821,
"logits/chosen": -3.215482711791992,
"logits/rejected": -3.215388059616089,
"logps/chosen": -0.8658155202865601,
"logps/rejected": -1.0561679601669312,
"loss": 0.6162,
"nll_loss": 0.4879694879055023,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.17316310107707977,
"rewards/margins": 0.03807050734758377,
"rewards/rejected": -0.21123358607292175,
"step": 20
},
{
"epoch": 0.14,
"grad_norm": 3.3125,
"learning_rate": 4.7760888749230414e-06,
"log_odds_chosen": 0.3909495174884796,
"log_odds_ratio": -0.6076517701148987,
"logits/chosen": -3.2152676582336426,
"logits/rejected": -3.202721357345581,
"logps/chosen": -0.8426260948181152,
"logps/rejected": -1.061783790588379,
"loss": 0.6037,
"nll_loss": 0.4774637222290039,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.16852520406246185,
"rewards/margins": 0.04383154585957527,
"rewards/rejected": -0.21235676109790802,
"step": 30
},
{
"epoch": 0.18,
"grad_norm": 2.96875,
"learning_rate": 4.6066201667762944e-06,
"log_odds_chosen": 0.3192257285118103,
"log_odds_ratio": -0.6141721606254578,
"logits/chosen": -3.200482130050659,
"logits/rejected": -3.1900203227996826,
"logps/chosen": -0.8548553586006165,
"logps/rejected": -1.034092903137207,
"loss": 0.6009,
"nll_loss": 0.4767337441444397,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.17097108066082,
"rewards/margins": 0.0358474999666214,
"rewards/rejected": -0.2068185806274414,
"step": 40
},
{
"epoch": 0.23,
"grad_norm": 2.84375,
"learning_rate": 4.3946533136249926e-06,
"log_odds_chosen": 0.3027415871620178,
"log_odds_ratio": -0.6166602969169617,
"logits/chosen": -3.206148147583008,
"logits/rejected": -3.1972875595092773,
"logps/chosen": -0.8349093198776245,
"logps/rejected": -1.0070207118988037,
"loss": 0.5918,
"nll_loss": 0.4716118276119232,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.16698187589645386,
"rewards/margins": 0.03442227095365524,
"rewards/rejected": -0.2014041393995285,
"step": 50
},
{
"epoch": 0.27,
"grad_norm": 3.09375,
"learning_rate": 4.1444644532387485e-06,
"log_odds_chosen": 0.48492059111595154,
"log_odds_ratio": -0.5473419427871704,
"logits/chosen": -3.209733247756958,
"logits/rejected": -3.202362537384033,
"logps/chosen": -0.8019172549247742,
"logps/rejected": -1.0743623971939087,
"loss": 0.5974,
"nll_loss": 0.476468950510025,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.16038347780704498,
"rewards/margins": 0.05448903515934944,
"rewards/rejected": -0.21487247943878174,
"step": 60
},
{
"epoch": 0.32,
"grad_norm": 3.28125,
"learning_rate": 3.861100799460336e-06,
"log_odds_chosen": 0.331600546836853,
"log_odds_ratio": -0.6023644208908081,
"logits/chosen": -3.1663670539855957,
"logits/rejected": -3.150857925415039,
"logps/chosen": -0.8181543350219727,
"logps/rejected": -0.9955471754074097,
"loss": 0.603,
"nll_loss": 0.4812262952327728,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.163630872964859,
"rewards/margins": 0.03547856956720352,
"rewards/rejected": -0.19910944998264313,
"step": 70
},
{
"epoch": 0.36,
"grad_norm": 3.328125,
"learning_rate": 3.550278821654866e-06,
"log_odds_chosen": 0.477538526058197,
"log_odds_ratio": -0.5380920171737671,
"logits/chosen": -3.194234848022461,
"logits/rejected": -3.1841280460357666,
"logps/chosen": -0.8073774576187134,
"logps/rejected": -1.0695488452911377,
"loss": 0.5619,
"nll_loss": 0.47237372398376465,
"rewards/accuracies": 0.7437499761581421,
"rewards/chosen": -0.16147547960281372,
"rewards/margins": 0.05243431776762009,
"rewards/rejected": -0.2139098197221756,
"step": 80
},
{
"epoch": 0.41,
"grad_norm": 3.390625,
"learning_rate": 3.218268922855452e-06,
"log_odds_chosen": 0.3934742212295532,
"log_odds_ratio": -0.598767101764679,
"logits/chosen": -3.1895227432250977,
"logits/rejected": -3.173553943634033,
"logps/chosen": -0.8475399017333984,
"logps/rejected": -1.0798330307006836,
"loss": 0.6021,
"nll_loss": 0.4789814054965973,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.16950799524784088,
"rewards/margins": 0.046458613127470016,
"rewards/rejected": -0.2159666121006012,
"step": 90
},
{
"epoch": 0.45,
"grad_norm": 2.921875,
"learning_rate": 2.871768943064129e-06,
"log_odds_chosen": 0.44187504053115845,
"log_odds_ratio": -0.5802451372146606,
"logits/chosen": -3.1786134243011475,
"logits/rejected": -3.1717491149902344,
"logps/chosen": -0.8232837915420532,
"logps/rejected": -1.0571515560150146,
"loss": 0.5994,
"nll_loss": 0.49746760725975037,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.16465675830841064,
"rewards/margins": 0.046773575246334076,
"rewards/rejected": -0.21143031120300293,
"step": 100
},
{
"epoch": 0.5,
"grad_norm": 3.75,
"learning_rate": 2.517769039603744e-06,
"log_odds_chosen": 0.45674100518226624,
"log_odds_ratio": -0.5514575242996216,
"logits/chosen": -3.192737102508545,
"logits/rejected": -3.182710647583008,
"logps/chosen": -0.7476301789283752,
"logps/rejected": -0.9913870692253113,
"loss": 0.5818,
"nll_loss": 0.4433298707008362,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.14952602982521057,
"rewards/margins": 0.04875140264630318,
"rewards/rejected": -0.19827742874622345,
"step": 110
},
{
"epoch": 0.54,
"grad_norm": 3.34375,
"learning_rate": 2.163410670372652e-06,
"log_odds_chosen": 0.4035864770412445,
"log_odds_ratio": -0.6150745153427124,
"logits/chosen": -3.193054676055908,
"logits/rejected": -3.178786277770996,
"logps/chosen": -0.816441535949707,
"logps/rejected": -1.0266228914260864,
"loss": 0.5753,
"nll_loss": 0.44871068000793457,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.16328832507133484,
"rewards/margins": 0.042036257684230804,
"rewards/rejected": -0.20532457530498505,
"step": 120
},
{
"epoch": 0.59,
"grad_norm": 3.078125,
"learning_rate": 1.8158425248197931e-06,
"log_odds_chosen": 0.3830532431602478,
"log_odds_ratio": -0.5974889993667603,
"logits/chosen": -3.196091890335083,
"logits/rejected": -3.1858651638031006,
"logps/chosen": -0.8129725456237793,
"logps/rejected": -1.0201164484024048,
"loss": 0.5808,
"nll_loss": 0.4634559154510498,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.1625945270061493,
"rewards/margins": 0.041428789496421814,
"rewards/rejected": -0.20402328670024872,
"step": 130
},
{
"epoch": 0.63,
"grad_norm": 3.375,
"learning_rate": 1.482076309033254e-06,
"log_odds_chosen": 0.6047395467758179,
"log_odds_ratio": -0.5417571663856506,
"logits/chosen": -3.196877956390381,
"logits/rejected": -3.181436061859131,
"logps/chosen": -0.7740004062652588,
"logps/rejected": -1.068457007408142,
"loss": 0.5764,
"nll_loss": 0.5040202140808105,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.15480007231235504,
"rewards/margins": 0.058891307562589645,
"rewards/rejected": -0.21369138360023499,
"step": 140
},
{
"epoch": 0.68,
"grad_norm": 2.9375,
"learning_rate": 1.1688452942784592e-06,
"log_odds_chosen": 0.44741934537887573,
"log_odds_ratio": -0.5705805420875549,
"logits/chosen": -3.1841094493865967,
"logits/rejected": -3.182647705078125,
"logps/chosen": -0.8209434747695923,
"logps/rejected": -1.067769169807434,
"loss": 0.5718,
"nll_loss": 0.4761679768562317,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.1641887128353119,
"rewards/margins": 0.04936511814594269,
"rewards/rejected": -0.21355381608009338,
"step": 150
},
{
"epoch": 0.72,
"grad_norm": 3.171875,
"learning_rate": 8.824684825733865e-07,
"log_odds_chosen": 0.3429742455482483,
"log_odds_ratio": -0.6090758442878723,
"logits/chosen": -3.207231044769287,
"logits/rejected": -3.193406581878662,
"logps/chosen": -0.8023210763931274,
"logps/rejected": -0.9921668767929077,
"loss": 0.5897,
"nll_loss": 0.4722462594509125,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.16046421229839325,
"rewards/margins": 0.03796914964914322,
"rewards/rejected": -0.19843336939811707,
"step": 160
},
{
"epoch": 0.77,
"grad_norm": 4.25,
"learning_rate": 6.28723129572247e-07,
"log_odds_chosen": 0.3038043677806854,
"log_odds_ratio": -0.6364859342575073,
"logits/chosen": -3.2201812267303467,
"logits/rejected": -3.2117972373962402,
"logps/chosen": -0.7899866104125977,
"logps/rejected": -0.9470660090446472,
"loss": 0.614,
"nll_loss": 0.47518712282180786,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.15799733996391296,
"rewards/margins": 0.03141586109995842,
"rewards/rejected": -0.18941320478916168,
"step": 170
},
{
"epoch": 0.81,
"grad_norm": 4.0,
"learning_rate": 4.127281964319446e-07,
"log_odds_chosen": 0.42332348227500916,
"log_odds_ratio": -0.5784670114517212,
"logits/chosen": -3.194354295730591,
"logits/rejected": -3.1853244304656982,
"logps/chosen": -0.7676048278808594,
"logps/rejected": -0.984086811542511,
"loss": 0.5862,
"nll_loss": 0.4467201232910156,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.15352095663547516,
"rewards/margins": 0.04329640045762062,
"rewards/rejected": -0.19681736826896667,
"step": 180
},
{
"epoch": 0.86,
"grad_norm": 3.15625,
"learning_rate": 2.388410818585263e-07,
"log_odds_chosen": 0.47039803862571716,
"log_odds_ratio": -0.5836545825004578,
"logits/chosen": -3.205718994140625,
"logits/rejected": -3.188554286956787,
"logps/chosen": -0.7774848937988281,
"logps/rejected": -1.0028108358383179,
"loss": 0.5743,
"nll_loss": 0.4325433671474457,
"rewards/accuracies": 0.706250011920929,
"rewards/chosen": -0.1554969847202301,
"rewards/margins": 0.045065198093652725,
"rewards/rejected": -0.20056216418743134,
"step": 190
},
{
"epoch": 0.9,
"grad_norm": 2.9375,
"learning_rate": 1.1056971762161584e-07,
"log_odds_chosen": 0.2785649299621582,
"log_odds_ratio": -0.643887460231781,
"logits/chosen": -3.1912004947662354,
"logits/rejected": -3.1815104484558105,
"logps/chosen": -0.87229984998703,
"logps/rejected": -1.022983193397522,
"loss": 0.5796,
"nll_loss": 0.4835774004459381,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.17445996403694153,
"rewards/margins": 0.030136678367853165,
"rewards/rejected": -0.20459666848182678,
"step": 200
},
{
"epoch": 0.95,
"grad_norm": 3.03125,
"learning_rate": 3.050180088809973e-08,
"log_odds_chosen": 0.30176714062690735,
"log_odds_ratio": -0.6279340386390686,
"logits/chosen": -3.187382221221924,
"logits/rejected": -3.172898054122925,
"logps/chosen": -0.842138946056366,
"logps/rejected": -1.0073583126068115,
"loss": 0.5913,
"nll_loss": 0.4963778555393219,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.16842779517173767,
"rewards/margins": 0.03304388374090195,
"rewards/rejected": -0.20147165656089783,
"step": 210
},
{
"epoch": 0.99,
"grad_norm": 3.34375,
"learning_rate": 2.525910147516131e-10,
"log_odds_chosen": 0.4221881031990051,
"log_odds_ratio": -0.5777419805526733,
"logits/chosen": -3.206446886062622,
"logits/rejected": -3.1952741146087646,
"logps/chosen": -0.7956362962722778,
"logps/rejected": -1.0130151510238647,
"loss": 0.6099,
"nll_loss": 0.4894731640815735,
"rewards/accuracies": 0.731249988079071,
"rewards/chosen": -0.15912725031375885,
"rewards/margins": 0.04347577691078186,
"rewards/rejected": -0.2026030272245407,
"step": 220
},
{
"epoch": 1.0,
"step": 221,
"total_flos": 0.0,
"train_loss": 0.5939142045931579,
"train_runtime": 4430.1575,
"train_samples_per_second": 3.195,
"train_steps_per_second": 0.05
}
],
"logging_steps": 10,
"max_steps": 221,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}