HealthTeam's picture
Training in progress, step 77336
582b51f
raw
history blame
19.5 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.150456695724614,
"global_step": 77336,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 1.9950413059216727e-05,
"loss": 14.0627,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 1.9900826118433453e-05,
"loss": 6.3799,
"step": 1000
},
{
"epoch": 0.02,
"learning_rate": 1.9851239177650176e-05,
"loss": 5.31,
"step": 1500
},
{
"epoch": 0.03,
"learning_rate": 1.9801652236866898e-05,
"loss": 4.9781,
"step": 2000
},
{
"epoch": 0.04,
"learning_rate": 1.9752065296083624e-05,
"loss": 4.7392,
"step": 2500
},
{
"epoch": 0.04,
"learning_rate": 1.970247835530035e-05,
"loss": 4.5779,
"step": 3000
},
{
"epoch": 0.05,
"learning_rate": 1.9652891414517075e-05,
"loss": 4.4691,
"step": 3500
},
{
"epoch": 0.06,
"learning_rate": 1.96033044737338e-05,
"loss": 4.3745,
"step": 4000
},
{
"epoch": 0.07,
"learning_rate": 1.9553717532950524e-05,
"loss": 4.2883,
"step": 4500
},
{
"epoch": 0.07,
"learning_rate": 1.9504130592167246e-05,
"loss": 4.2342,
"step": 5000
},
{
"epoch": 0.08,
"learning_rate": 1.9454543651383972e-05,
"loss": 4.1614,
"step": 5500
},
{
"epoch": 0.09,
"learning_rate": 1.9404956710600698e-05,
"loss": 4.1279,
"step": 6000
},
{
"epoch": 0.1,
"learning_rate": 1.9355369769817423e-05,
"loss": 4.0802,
"step": 6500
},
{
"epoch": 0.1,
"learning_rate": 1.930578282903415e-05,
"loss": 4.0298,
"step": 7000
},
{
"epoch": 0.11,
"learning_rate": 1.925619588825087e-05,
"loss": 3.9697,
"step": 7500
},
{
"epoch": 0.12,
"learning_rate": 1.9206608947467594e-05,
"loss": 3.9584,
"step": 8000
},
{
"epoch": 0.13,
"learning_rate": 1.915702200668432e-05,
"loss": 3.9196,
"step": 8500
},
{
"epoch": 0.13,
"learning_rate": 1.9107435065901046e-05,
"loss": 3.9081,
"step": 9000
},
{
"epoch": 0.14,
"learning_rate": 1.905784812511777e-05,
"loss": 3.8419,
"step": 9500
},
{
"epoch": 0.15,
"learning_rate": 1.9008261184334497e-05,
"loss": 3.8363,
"step": 10000
},
{
"epoch": 0.16,
"learning_rate": 1.895867424355122e-05,
"loss": 3.8047,
"step": 10500
},
{
"epoch": 0.16,
"learning_rate": 1.8909087302767945e-05,
"loss": 3.7728,
"step": 11000
},
{
"epoch": 0.17,
"learning_rate": 1.8859500361984668e-05,
"loss": 3.7731,
"step": 11500
},
{
"epoch": 0.18,
"learning_rate": 1.8809913421201393e-05,
"loss": 3.7408,
"step": 12000
},
{
"epoch": 0.19,
"learning_rate": 1.876032648041812e-05,
"loss": 3.7027,
"step": 12500
},
{
"epoch": 0.19,
"learning_rate": 1.8710739539634845e-05,
"loss": 3.6865,
"step": 13000
},
{
"epoch": 0.2,
"learning_rate": 1.8661152598851567e-05,
"loss": 3.6456,
"step": 13500
},
{
"epoch": 0.21,
"learning_rate": 1.8611565658068293e-05,
"loss": 3.6539,
"step": 14000
},
{
"epoch": 0.22,
"learning_rate": 1.8561978717285016e-05,
"loss": 3.6222,
"step": 14500
},
{
"epoch": 0.22,
"learning_rate": 1.851239177650174e-05,
"loss": 3.6127,
"step": 15000
},
{
"epoch": 0.23,
"learning_rate": 1.8462804835718467e-05,
"loss": 3.6133,
"step": 15500
},
{
"epoch": 0.24,
"learning_rate": 1.8413217894935193e-05,
"loss": 3.5863,
"step": 16000
},
{
"epoch": 0.25,
"learning_rate": 1.8363630954151915e-05,
"loss": 3.5669,
"step": 16500
},
{
"epoch": 0.25,
"learning_rate": 1.831404401336864e-05,
"loss": 3.5518,
"step": 17000
},
{
"epoch": 0.26,
"learning_rate": 1.8264457072585367e-05,
"loss": 3.5368,
"step": 17500
},
{
"epoch": 0.27,
"learning_rate": 1.821487013180209e-05,
"loss": 3.5294,
"step": 18000
},
{
"epoch": 0.28,
"learning_rate": 1.8165283191018815e-05,
"loss": 3.5097,
"step": 18500
},
{
"epoch": 0.28,
"learning_rate": 1.811569625023554e-05,
"loss": 3.5198,
"step": 19000
},
{
"epoch": 0.29,
"learning_rate": 1.8066109309452263e-05,
"loss": 3.4702,
"step": 19500
},
{
"epoch": 0.3,
"learning_rate": 1.801652236866899e-05,
"loss": 3.485,
"step": 20000
},
{
"epoch": 0.3,
"learning_rate": 1.7966935427885715e-05,
"loss": 3.4853,
"step": 20500
},
{
"epoch": 0.31,
"learning_rate": 1.7917348487102437e-05,
"loss": 3.4395,
"step": 21000
},
{
"epoch": 0.32,
"learning_rate": 1.7867761546319163e-05,
"loss": 3.4515,
"step": 21500
},
{
"epoch": 0.33,
"learning_rate": 1.781817460553589e-05,
"loss": 3.4307,
"step": 22000
},
{
"epoch": 0.33,
"learning_rate": 1.776858766475261e-05,
"loss": 3.4343,
"step": 22500
},
{
"epoch": 0.34,
"learning_rate": 1.7719000723969337e-05,
"loss": 3.4053,
"step": 23000
},
{
"epoch": 0.35,
"learning_rate": 1.7669413783186063e-05,
"loss": 3.4008,
"step": 23500
},
{
"epoch": 0.36,
"learning_rate": 1.7619826842402785e-05,
"loss": 3.3951,
"step": 24000
},
{
"epoch": 0.36,
"learning_rate": 1.757023990161951e-05,
"loss": 3.3871,
"step": 24500
},
{
"epoch": 0.37,
"learning_rate": 1.7520652960836234e-05,
"loss": 3.3822,
"step": 25000
},
{
"epoch": 0.38,
"learning_rate": 1.747106602005296e-05,
"loss": 3.3816,
"step": 25500
},
{
"epoch": 0.39,
"learning_rate": 1.7421479079269685e-05,
"loss": 3.3759,
"step": 26000
},
{
"epoch": 0.39,
"learning_rate": 1.737189213848641e-05,
"loss": 3.3624,
"step": 26500
},
{
"epoch": 0.4,
"learning_rate": 1.7322305197703137e-05,
"loss": 3.3535,
"step": 27000
},
{
"epoch": 0.41,
"learning_rate": 1.727271825691986e-05,
"loss": 3.3366,
"step": 27500
},
{
"epoch": 0.42,
"learning_rate": 1.722313131613658e-05,
"loss": 3.3245,
"step": 28000
},
{
"epoch": 0.42,
"learning_rate": 1.7173544375353307e-05,
"loss": 3.3575,
"step": 28500
},
{
"epoch": 0.43,
"learning_rate": 1.7123957434570033e-05,
"loss": 3.3133,
"step": 29000
},
{
"epoch": 0.44,
"learning_rate": 1.707437049378676e-05,
"loss": 3.3124,
"step": 29500
},
{
"epoch": 0.45,
"learning_rate": 1.7024783553003485e-05,
"loss": 3.3295,
"step": 30000
},
{
"epoch": 0.45,
"learning_rate": 1.6975196612220207e-05,
"loss": 3.3192,
"step": 30500
},
{
"epoch": 0.46,
"learning_rate": 1.692560967143693e-05,
"loss": 3.3241,
"step": 31000
},
{
"epoch": 0.47,
"learning_rate": 1.6876022730653655e-05,
"loss": 3.2989,
"step": 31500
},
{
"epoch": 0.48,
"learning_rate": 1.682643578987038e-05,
"loss": 3.2956,
"step": 32000
},
{
"epoch": 0.48,
"learning_rate": 1.6776848849087107e-05,
"loss": 3.2889,
"step": 32500
},
{
"epoch": 0.49,
"learning_rate": 1.6727261908303833e-05,
"loss": 3.2934,
"step": 33000
},
{
"epoch": 0.5,
"learning_rate": 1.6677674967520555e-05,
"loss": 3.2642,
"step": 33500
},
{
"epoch": 0.51,
"learning_rate": 1.6628088026737277e-05,
"loss": 3.2513,
"step": 34000
},
{
"epoch": 0.51,
"learning_rate": 1.6578501085954003e-05,
"loss": 3.2584,
"step": 34500
},
{
"epoch": 0.52,
"learning_rate": 1.652891414517073e-05,
"loss": 3.2576,
"step": 35000
},
{
"epoch": 0.53,
"learning_rate": 1.6479327204387455e-05,
"loss": 3.2532,
"step": 35500
},
{
"epoch": 0.54,
"learning_rate": 1.642974026360418e-05,
"loss": 3.2349,
"step": 36000
},
{
"epoch": 0.54,
"learning_rate": 1.6380153322820903e-05,
"loss": 3.2349,
"step": 36500
},
{
"epoch": 0.55,
"learning_rate": 1.6330566382037625e-05,
"loss": 3.2158,
"step": 37000
},
{
"epoch": 0.56,
"learning_rate": 1.628097944125435e-05,
"loss": 3.2309,
"step": 37500
},
{
"epoch": 0.57,
"learning_rate": 1.6231392500471077e-05,
"loss": 3.2227,
"step": 38000
},
{
"epoch": 0.57,
"learning_rate": 1.6181805559687803e-05,
"loss": 3.2134,
"step": 38500
},
{
"epoch": 0.58,
"learning_rate": 1.613221861890453e-05,
"loss": 3.2206,
"step": 39000
},
{
"epoch": 0.59,
"learning_rate": 1.608263167812125e-05,
"loss": 3.2002,
"step": 39500
},
{
"epoch": 0.6,
"learning_rate": 1.6033044737337973e-05,
"loss": 3.1988,
"step": 40000
},
{
"epoch": 0.6,
"learning_rate": 1.59834577965547e-05,
"loss": 3.2081,
"step": 40500
},
{
"epoch": 0.61,
"learning_rate": 1.5933870855771425e-05,
"loss": 3.1891,
"step": 41000
},
{
"epoch": 0.62,
"learning_rate": 1.588428391498815e-05,
"loss": 3.2007,
"step": 41500
},
{
"epoch": 0.62,
"learning_rate": 1.5834696974204877e-05,
"loss": 3.1948,
"step": 42000
},
{
"epoch": 0.63,
"learning_rate": 1.57851100334216e-05,
"loss": 3.1673,
"step": 42500
},
{
"epoch": 0.64,
"learning_rate": 1.5735523092638325e-05,
"loss": 3.158,
"step": 43000
},
{
"epoch": 0.65,
"learning_rate": 1.5685936151855047e-05,
"loss": 3.1561,
"step": 43500
},
{
"epoch": 0.65,
"learning_rate": 1.5636349211071773e-05,
"loss": 3.1734,
"step": 44000
},
{
"epoch": 0.66,
"learning_rate": 1.55867622702885e-05,
"loss": 3.1401,
"step": 44500
},
{
"epoch": 0.67,
"learning_rate": 1.5537175329505225e-05,
"loss": 3.1463,
"step": 45000
},
{
"epoch": 0.68,
"learning_rate": 1.5487588388721947e-05,
"loss": 3.1431,
"step": 45500
},
{
"epoch": 0.68,
"learning_rate": 1.5438001447938673e-05,
"loss": 3.1316,
"step": 46000
},
{
"epoch": 0.69,
"learning_rate": 1.5388414507155395e-05,
"loss": 3.1606,
"step": 46500
},
{
"epoch": 0.7,
"learning_rate": 1.533882756637212e-05,
"loss": 3.1362,
"step": 47000
},
{
"epoch": 0.71,
"learning_rate": 1.5289240625588847e-05,
"loss": 3.1335,
"step": 47500
},
{
"epoch": 0.71,
"learning_rate": 1.523965368480557e-05,
"loss": 3.149,
"step": 48000
},
{
"epoch": 0.72,
"learning_rate": 1.5190066744022297e-05,
"loss": 3.1293,
"step": 48500
},
{
"epoch": 0.73,
"learning_rate": 1.514047980323902e-05,
"loss": 3.1286,
"step": 49000
},
{
"epoch": 0.74,
"learning_rate": 1.5090892862455743e-05,
"loss": 3.1196,
"step": 49500
},
{
"epoch": 0.74,
"learning_rate": 1.5041305921672469e-05,
"loss": 3.1238,
"step": 50000
},
{
"epoch": 0.75,
"learning_rate": 1.4991718980889195e-05,
"loss": 3.1033,
"step": 50500
},
{
"epoch": 0.76,
"learning_rate": 1.4942132040105919e-05,
"loss": 3.1112,
"step": 51000
},
{
"epoch": 0.77,
"learning_rate": 1.4892545099322645e-05,
"loss": 3.0936,
"step": 51500
},
{
"epoch": 0.77,
"learning_rate": 1.4842958158539369e-05,
"loss": 3.107,
"step": 52000
},
{
"epoch": 0.78,
"learning_rate": 1.4793371217756094e-05,
"loss": 3.1063,
"step": 52500
},
{
"epoch": 0.79,
"learning_rate": 1.4743784276972817e-05,
"loss": 3.0639,
"step": 53000
},
{
"epoch": 0.8,
"learning_rate": 1.4694197336189543e-05,
"loss": 3.1028,
"step": 53500
},
{
"epoch": 0.8,
"learning_rate": 1.4644610395406267e-05,
"loss": 3.0821,
"step": 54000
},
{
"epoch": 0.81,
"learning_rate": 1.4595023454622992e-05,
"loss": 3.0596,
"step": 54500
},
{
"epoch": 0.82,
"learning_rate": 1.4545436513839717e-05,
"loss": 3.0787,
"step": 55000
},
{
"epoch": 0.83,
"learning_rate": 1.4495849573056442e-05,
"loss": 3.0755,
"step": 55500
},
{
"epoch": 0.83,
"learning_rate": 1.4446262632273165e-05,
"loss": 3.066,
"step": 56000
},
{
"epoch": 0.84,
"learning_rate": 1.439667569148989e-05,
"loss": 3.0695,
"step": 56500
},
{
"epoch": 0.85,
"learning_rate": 1.4347088750706615e-05,
"loss": 3.059,
"step": 57000
},
{
"epoch": 0.86,
"learning_rate": 1.429750180992334e-05,
"loss": 3.0628,
"step": 57500
},
{
"epoch": 0.86,
"learning_rate": 1.4247914869140065e-05,
"loss": 3.0733,
"step": 58000
},
{
"epoch": 0.87,
"learning_rate": 1.419832792835679e-05,
"loss": 3.0591,
"step": 58500
},
{
"epoch": 0.88,
"learning_rate": 1.4148740987573514e-05,
"loss": 3.0468,
"step": 59000
},
{
"epoch": 0.89,
"learning_rate": 1.4099154046790237e-05,
"loss": 3.0265,
"step": 59500
},
{
"epoch": 0.89,
"learning_rate": 1.4049567106006963e-05,
"loss": 3.0282,
"step": 60000
},
{
"epoch": 0.9,
"learning_rate": 1.3999980165223688e-05,
"loss": 3.0222,
"step": 60500
},
{
"epoch": 0.91,
"learning_rate": 1.3950393224440413e-05,
"loss": 3.0275,
"step": 61000
},
{
"epoch": 0.91,
"learning_rate": 1.3900806283657138e-05,
"loss": 3.0277,
"step": 61500
},
{
"epoch": 0.92,
"learning_rate": 1.3851219342873862e-05,
"loss": 3.0551,
"step": 62000
},
{
"epoch": 0.93,
"learning_rate": 1.3801632402090585e-05,
"loss": 3.0205,
"step": 62500
},
{
"epoch": 0.94,
"learning_rate": 1.375204546130731e-05,
"loss": 3.023,
"step": 63000
},
{
"epoch": 0.94,
"learning_rate": 1.3702458520524036e-05,
"loss": 3.0244,
"step": 63500
},
{
"epoch": 0.95,
"learning_rate": 1.365287157974076e-05,
"loss": 3.0116,
"step": 64000
},
{
"epoch": 0.96,
"learning_rate": 1.3603284638957486e-05,
"loss": 3.0141,
"step": 64500
},
{
"epoch": 0.97,
"learning_rate": 1.355369769817421e-05,
"loss": 3.0284,
"step": 65000
},
{
"epoch": 0.97,
"learning_rate": 1.3504110757390933e-05,
"loss": 3.0236,
"step": 65500
},
{
"epoch": 0.98,
"learning_rate": 1.3454523816607659e-05,
"loss": 3.013,
"step": 66000
},
{
"epoch": 0.99,
"learning_rate": 1.3404936875824384e-05,
"loss": 3.0027,
"step": 66500
},
{
"epoch": 1.0,
"learning_rate": 1.3355349935041108e-05,
"loss": 3.0155,
"step": 67000
},
{
"epoch": 1.0,
"eval_bleu": 11.298551127218651,
"eval_loss": 2.3749005794525146,
"eval_runtime": 4929.9601,
"eval_samples_per_second": 8.201,
"eval_steps_per_second": 0.513,
"step": 67222
},
{
"epoch": 1.0,
"learning_rate": 1.3305762994257834e-05,
"loss": 3.0195,
"step": 67500
},
{
"epoch": 1.01,
"learning_rate": 1.3256176053474558e-05,
"loss": 2.9924,
"step": 68000
},
{
"epoch": 1.02,
"learning_rate": 1.3206589112691284e-05,
"loss": 2.997,
"step": 68500
},
{
"epoch": 1.03,
"learning_rate": 1.3157002171908007e-05,
"loss": 2.9694,
"step": 69000
},
{
"epoch": 1.03,
"learning_rate": 1.3107415231124732e-05,
"loss": 2.9804,
"step": 69500
},
{
"epoch": 1.04,
"learning_rate": 1.3057828290341456e-05,
"loss": 2.9879,
"step": 70000
},
{
"epoch": 1.05,
"learning_rate": 1.3008241349558182e-05,
"loss": 2.9919,
"step": 70500
},
{
"epoch": 1.06,
"learning_rate": 1.2958654408774906e-05,
"loss": 2.9875,
"step": 71000
},
{
"epoch": 1.06,
"learning_rate": 1.2909067467991632e-05,
"loss": 2.9912,
"step": 71500
},
{
"epoch": 1.07,
"learning_rate": 1.2859480527208354e-05,
"loss": 2.974,
"step": 72000
},
{
"epoch": 1.08,
"learning_rate": 1.280989358642508e-05,
"loss": 2.9581,
"step": 72500
},
{
"epoch": 1.09,
"learning_rate": 1.2760306645641804e-05,
"loss": 2.975,
"step": 73000
},
{
"epoch": 1.09,
"learning_rate": 1.271071970485853e-05,
"loss": 2.9737,
"step": 73500
},
{
"epoch": 1.1,
"learning_rate": 1.2661132764075254e-05,
"loss": 2.9722,
"step": 74000
},
{
"epoch": 1.11,
"learning_rate": 1.261154582329198e-05,
"loss": 2.9727,
"step": 74500
},
{
"epoch": 1.12,
"learning_rate": 1.2561958882508702e-05,
"loss": 2.9618,
"step": 75000
},
{
"epoch": 1.12,
"learning_rate": 1.2512371941725428e-05,
"loss": 2.9554,
"step": 75500
},
{
"epoch": 1.13,
"learning_rate": 1.2462785000942152e-05,
"loss": 2.961,
"step": 76000
},
{
"epoch": 1.14,
"learning_rate": 1.2413198060158878e-05,
"loss": 2.9627,
"step": 76500
},
{
"epoch": 1.15,
"learning_rate": 1.2363611119375602e-05,
"loss": 2.9896,
"step": 77000
}
],
"max_steps": 201666,
"num_train_epochs": 3,
"total_flos": 9.093214173619814e+16,
"trial_name": null,
"trial_params": null
}