|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.150456695724614, |
|
"global_step": 77336, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9950413059216727e-05, |
|
"loss": 14.0627, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9900826118433453e-05, |
|
"loss": 6.3799, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9851239177650176e-05, |
|
"loss": 5.31, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9801652236866898e-05, |
|
"loss": 4.9781, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9752065296083624e-05, |
|
"loss": 4.7392, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.970247835530035e-05, |
|
"loss": 4.5779, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9652891414517075e-05, |
|
"loss": 4.4691, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.96033044737338e-05, |
|
"loss": 4.3745, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9553717532950524e-05, |
|
"loss": 4.2883, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9504130592167246e-05, |
|
"loss": 4.2342, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9454543651383972e-05, |
|
"loss": 4.1614, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9404956710600698e-05, |
|
"loss": 4.1279, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9355369769817423e-05, |
|
"loss": 4.0802, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.930578282903415e-05, |
|
"loss": 4.0298, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.925619588825087e-05, |
|
"loss": 3.9697, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9206608947467594e-05, |
|
"loss": 3.9584, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.915702200668432e-05, |
|
"loss": 3.9196, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.9107435065901046e-05, |
|
"loss": 3.9081, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.905784812511777e-05, |
|
"loss": 3.8419, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9008261184334497e-05, |
|
"loss": 3.8363, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.895867424355122e-05, |
|
"loss": 3.8047, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.8909087302767945e-05, |
|
"loss": 3.7728, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8859500361984668e-05, |
|
"loss": 3.7731, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8809913421201393e-05, |
|
"loss": 3.7408, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.876032648041812e-05, |
|
"loss": 3.7027, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.8710739539634845e-05, |
|
"loss": 3.6865, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8661152598851567e-05, |
|
"loss": 3.6456, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.8611565658068293e-05, |
|
"loss": 3.6539, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8561978717285016e-05, |
|
"loss": 3.6222, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.851239177650174e-05, |
|
"loss": 3.6127, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.8462804835718467e-05, |
|
"loss": 3.6133, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.8413217894935193e-05, |
|
"loss": 3.5863, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.8363630954151915e-05, |
|
"loss": 3.5669, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.831404401336864e-05, |
|
"loss": 3.5518, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.8264457072585367e-05, |
|
"loss": 3.5368, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.821487013180209e-05, |
|
"loss": 3.5294, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.8165283191018815e-05, |
|
"loss": 3.5097, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.811569625023554e-05, |
|
"loss": 3.5198, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.8066109309452263e-05, |
|
"loss": 3.4702, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.801652236866899e-05, |
|
"loss": 3.485, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.7966935427885715e-05, |
|
"loss": 3.4853, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.7917348487102437e-05, |
|
"loss": 3.4395, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.7867761546319163e-05, |
|
"loss": 3.4515, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.781817460553589e-05, |
|
"loss": 3.4307, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.776858766475261e-05, |
|
"loss": 3.4343, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.7719000723969337e-05, |
|
"loss": 3.4053, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.7669413783186063e-05, |
|
"loss": 3.4008, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.7619826842402785e-05, |
|
"loss": 3.3951, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.757023990161951e-05, |
|
"loss": 3.3871, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.7520652960836234e-05, |
|
"loss": 3.3822, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.747106602005296e-05, |
|
"loss": 3.3816, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.7421479079269685e-05, |
|
"loss": 3.3759, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.737189213848641e-05, |
|
"loss": 3.3624, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.7322305197703137e-05, |
|
"loss": 3.3535, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.727271825691986e-05, |
|
"loss": 3.3366, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.722313131613658e-05, |
|
"loss": 3.3245, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.7173544375353307e-05, |
|
"loss": 3.3575, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.7123957434570033e-05, |
|
"loss": 3.3133, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.707437049378676e-05, |
|
"loss": 3.3124, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.7024783553003485e-05, |
|
"loss": 3.3295, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.6975196612220207e-05, |
|
"loss": 3.3192, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.692560967143693e-05, |
|
"loss": 3.3241, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.6876022730653655e-05, |
|
"loss": 3.2989, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.682643578987038e-05, |
|
"loss": 3.2956, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.6776848849087107e-05, |
|
"loss": 3.2889, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.6727261908303833e-05, |
|
"loss": 3.2934, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.6677674967520555e-05, |
|
"loss": 3.2642, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.6628088026737277e-05, |
|
"loss": 3.2513, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.6578501085954003e-05, |
|
"loss": 3.2584, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.652891414517073e-05, |
|
"loss": 3.2576, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.6479327204387455e-05, |
|
"loss": 3.2532, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.642974026360418e-05, |
|
"loss": 3.2349, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.6380153322820903e-05, |
|
"loss": 3.2349, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.6330566382037625e-05, |
|
"loss": 3.2158, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.628097944125435e-05, |
|
"loss": 3.2309, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.6231392500471077e-05, |
|
"loss": 3.2227, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.6181805559687803e-05, |
|
"loss": 3.2134, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.613221861890453e-05, |
|
"loss": 3.2206, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.608263167812125e-05, |
|
"loss": 3.2002, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.6033044737337973e-05, |
|
"loss": 3.1988, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.59834577965547e-05, |
|
"loss": 3.2081, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.5933870855771425e-05, |
|
"loss": 3.1891, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.588428391498815e-05, |
|
"loss": 3.2007, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.5834696974204877e-05, |
|
"loss": 3.1948, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.57851100334216e-05, |
|
"loss": 3.1673, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.5735523092638325e-05, |
|
"loss": 3.158, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.5685936151855047e-05, |
|
"loss": 3.1561, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.5636349211071773e-05, |
|
"loss": 3.1734, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.55867622702885e-05, |
|
"loss": 3.1401, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.5537175329505225e-05, |
|
"loss": 3.1463, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5487588388721947e-05, |
|
"loss": 3.1431, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5438001447938673e-05, |
|
"loss": 3.1316, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5388414507155395e-05, |
|
"loss": 3.1606, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.533882756637212e-05, |
|
"loss": 3.1362, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5289240625588847e-05, |
|
"loss": 3.1335, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.523965368480557e-05, |
|
"loss": 3.149, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5190066744022297e-05, |
|
"loss": 3.1293, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.514047980323902e-05, |
|
"loss": 3.1286, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.5090892862455743e-05, |
|
"loss": 3.1196, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.5041305921672469e-05, |
|
"loss": 3.1238, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4991718980889195e-05, |
|
"loss": 3.1033, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.4942132040105919e-05, |
|
"loss": 3.1112, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.4892545099322645e-05, |
|
"loss": 3.0936, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.4842958158539369e-05, |
|
"loss": 3.107, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.4793371217756094e-05, |
|
"loss": 3.1063, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.4743784276972817e-05, |
|
"loss": 3.0639, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.4694197336189543e-05, |
|
"loss": 3.1028, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.4644610395406267e-05, |
|
"loss": 3.0821, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.4595023454622992e-05, |
|
"loss": 3.0596, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.4545436513839717e-05, |
|
"loss": 3.0787, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.4495849573056442e-05, |
|
"loss": 3.0755, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.4446262632273165e-05, |
|
"loss": 3.066, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.439667569148989e-05, |
|
"loss": 3.0695, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.4347088750706615e-05, |
|
"loss": 3.059, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.429750180992334e-05, |
|
"loss": 3.0628, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4247914869140065e-05, |
|
"loss": 3.0733, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.419832792835679e-05, |
|
"loss": 3.0591, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4148740987573514e-05, |
|
"loss": 3.0468, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4099154046790237e-05, |
|
"loss": 3.0265, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.4049567106006963e-05, |
|
"loss": 3.0282, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.3999980165223688e-05, |
|
"loss": 3.0222, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3950393224440413e-05, |
|
"loss": 3.0275, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3900806283657138e-05, |
|
"loss": 3.0277, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3851219342873862e-05, |
|
"loss": 3.0551, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.3801632402090585e-05, |
|
"loss": 3.0205, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.375204546130731e-05, |
|
"loss": 3.023, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3702458520524036e-05, |
|
"loss": 3.0244, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.365287157974076e-05, |
|
"loss": 3.0116, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.3603284638957486e-05, |
|
"loss": 3.0141, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.355369769817421e-05, |
|
"loss": 3.0284, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.3504110757390933e-05, |
|
"loss": 3.0236, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3454523816607659e-05, |
|
"loss": 3.013, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.3404936875824384e-05, |
|
"loss": 3.0027, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.3355349935041108e-05, |
|
"loss": 3.0155, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 11.298551127218651, |
|
"eval_loss": 2.3749005794525146, |
|
"eval_runtime": 4929.9601, |
|
"eval_samples_per_second": 8.201, |
|
"eval_steps_per_second": 0.513, |
|
"step": 67222 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.3305762994257834e-05, |
|
"loss": 3.0195, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.3256176053474558e-05, |
|
"loss": 2.9924, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.3206589112691284e-05, |
|
"loss": 2.997, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.3157002171908007e-05, |
|
"loss": 2.9694, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.3107415231124732e-05, |
|
"loss": 2.9804, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.3057828290341456e-05, |
|
"loss": 2.9879, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.3008241349558182e-05, |
|
"loss": 2.9919, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.2958654408774906e-05, |
|
"loss": 2.9875, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.2909067467991632e-05, |
|
"loss": 2.9912, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.2859480527208354e-05, |
|
"loss": 2.974, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.280989358642508e-05, |
|
"loss": 2.9581, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.2760306645641804e-05, |
|
"loss": 2.975, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.271071970485853e-05, |
|
"loss": 2.9737, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.2661132764075254e-05, |
|
"loss": 2.9722, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.261154582329198e-05, |
|
"loss": 2.9727, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.2561958882508702e-05, |
|
"loss": 2.9618, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.2512371941725428e-05, |
|
"loss": 2.9554, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.2462785000942152e-05, |
|
"loss": 2.961, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 1.2413198060158878e-05, |
|
"loss": 2.9627, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.2363611119375602e-05, |
|
"loss": 2.9896, |
|
"step": 77000 |
|
} |
|
], |
|
"max_steps": 201666, |
|
"num_train_epochs": 3, |
|
"total_flos": 9.093214173619814e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|