|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 68478, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.963491924413681e-05, |
|
"loss": 2.5929, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9269838488273604e-05, |
|
"loss": 2.2003, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.890475773241041e-05, |
|
"loss": 2.0692, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.853967697654722e-05, |
|
"loss": 1.9485, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.817459622068401e-05, |
|
"loss": 1.9084, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.780951546482082e-05, |
|
"loss": 1.8495, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.744443470895763e-05, |
|
"loss": 1.8025, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.707935395309443e-05, |
|
"loss": 1.7957, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.671427319723123e-05, |
|
"loss": 1.7414, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.634919244136804e-05, |
|
"loss": 1.7284, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.598411168550484e-05, |
|
"loss": 1.7033, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.561903092964164e-05, |
|
"loss": 1.705, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.525395017377844e-05, |
|
"loss": 1.667, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4888869417915246e-05, |
|
"loss": 1.6606, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.452378866205205e-05, |
|
"loss": 1.6179, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.415870790618885e-05, |
|
"loss": 1.5977, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3793627150325655e-05, |
|
"loss": 1.5827, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3428546394462456e-05, |
|
"loss": 1.5621, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.306346563859926e-05, |
|
"loss": 1.5675, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2698384882736064e-05, |
|
"loss": 1.5729, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2333304126872865e-05, |
|
"loss": 1.546, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.1968223371009666e-05, |
|
"loss": 1.5546, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.160314261514647e-05, |
|
"loss": 1.5163, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.123806185928328e-05, |
|
"loss": 1.5097, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0872981103420075e-05, |
|
"loss": 1.5074, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.050790034755688e-05, |
|
"loss": 1.4856, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.014281959169369e-05, |
|
"loss": 1.5068, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.9777738835830483e-05, |
|
"loss": 1.5035, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.941265807996729e-05, |
|
"loss": 1.4676, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.90475773241041e-05, |
|
"loss": 1.4623, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.868249656824089e-05, |
|
"loss": 1.4472, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.83174158123777e-05, |
|
"loss": 1.447, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.795233505651451e-05, |
|
"loss": 1.4557, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.75872543006513e-05, |
|
"loss": 1.4439, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.722217354478811e-05, |
|
"loss": 1.4332, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.685709278892491e-05, |
|
"loss": 1.4365, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.649201203306172e-05, |
|
"loss": 1.4098, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.612693127719852e-05, |
|
"loss": 1.4125, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.576185052133532e-05, |
|
"loss": 1.4088, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5396769765472126e-05, |
|
"loss": 1.3968, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.503168900960893e-05, |
|
"loss": 1.4125, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.466660825374573e-05, |
|
"loss": 1.385, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.4301527497882535e-05, |
|
"loss": 1.3985, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.3936446742019336e-05, |
|
"loss": 1.3747, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.3571365986156136e-05, |
|
"loss": 1.3743, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.3206285230292944e-05, |
|
"loss": 1.3065, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.2841204474429745e-05, |
|
"loss": 1.1883, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.2476123718566545e-05, |
|
"loss": 1.2017, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.211104296270335e-05, |
|
"loss": 1.1919, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.1745962206840153e-05, |
|
"loss": 1.1873, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.1380881450976954e-05, |
|
"loss": 1.2045, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.101580069511376e-05, |
|
"loss": 1.1915, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.065071993925057e-05, |
|
"loss": 1.1932, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.0285639183387367e-05, |
|
"loss": 1.1793, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.992055842752417e-05, |
|
"loss": 1.2086, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.9555477671660975e-05, |
|
"loss": 1.2023, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.9190396915797775e-05, |
|
"loss": 1.1953, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.882531615993458e-05, |
|
"loss": 1.2049, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.8460235404071384e-05, |
|
"loss": 1.1931, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.8095154648208184e-05, |
|
"loss": 1.197, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.773007389234499e-05, |
|
"loss": 1.1704, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7364993136481793e-05, |
|
"loss": 1.1548, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.6999912380618593e-05, |
|
"loss": 1.187, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.6634831624755397e-05, |
|
"loss": 1.1769, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.62697508688922e-05, |
|
"loss": 1.181, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.5904670113029006e-05, |
|
"loss": 1.1564, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5539589357165806e-05, |
|
"loss": 1.1742, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.517450860130261e-05, |
|
"loss": 1.1651, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.480942784543941e-05, |
|
"loss": 1.1637, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4444347089576215e-05, |
|
"loss": 1.1531, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.407926633371302e-05, |
|
"loss": 1.1703, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.371418557784982e-05, |
|
"loss": 1.1482, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3349104821986624e-05, |
|
"loss": 1.1501, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.2984024066123428e-05, |
|
"loss": 1.1525, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.261894331026023e-05, |
|
"loss": 1.1576, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.2253862554397033e-05, |
|
"loss": 1.1518, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.1888781798533837e-05, |
|
"loss": 1.1594, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.152370104267064e-05, |
|
"loss": 1.1614, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1158620286807442e-05, |
|
"loss": 1.185, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.0793539530944246e-05, |
|
"loss": 1.1399, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.042845877508105e-05, |
|
"loss": 1.1262, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.006337801921785e-05, |
|
"loss": 1.1458, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.9698297263354655e-05, |
|
"loss": 1.1379, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.933321650749146e-05, |
|
"loss": 1.1437, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.896813575162826e-05, |
|
"loss": 1.1161, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8603054995765064e-05, |
|
"loss": 1.1205, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8237974239901868e-05, |
|
"loss": 1.1422, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.787289348403867e-05, |
|
"loss": 1.1265, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.7507812728175473e-05, |
|
"loss": 1.0999, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7142731972312277e-05, |
|
"loss": 1.11, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.677765121644908e-05, |
|
"loss": 1.1267, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.6412570460585882e-05, |
|
"loss": 1.0115, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.6047489704722686e-05, |
|
"loss": 0.9535, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.568240894885949e-05, |
|
"loss": 0.9458, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.531732819299629e-05, |
|
"loss": 0.9517, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.4952247437133093e-05, |
|
"loss": 0.9496, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.4587166681269897e-05, |
|
"loss": 0.9649, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.42220859254067e-05, |
|
"loss": 0.934, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3857005169543506e-05, |
|
"loss": 0.9757, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3491924413680306e-05, |
|
"loss": 0.9349, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.3126843657817109e-05, |
|
"loss": 0.9548, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.2761762901953914e-05, |
|
"loss": 0.9426, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.2396682146090715e-05, |
|
"loss": 0.9543, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.203160139022752e-05, |
|
"loss": 0.9438, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1666520634364322e-05, |
|
"loss": 0.963, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1301439878501124e-05, |
|
"loss": 0.9334, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.0936359122637928e-05, |
|
"loss": 0.9526, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.057127836677473e-05, |
|
"loss": 0.9397, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0206197610911535e-05, |
|
"loss": 0.9569, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.841116855048337e-06, |
|
"loss": 0.94, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.47603609918514e-06, |
|
"loss": 0.9345, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.110955343321944e-06, |
|
"loss": 0.933, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.745874587458746e-06, |
|
"loss": 0.9289, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.380793831595548e-06, |
|
"loss": 0.9581, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.015713075732353e-06, |
|
"loss": 0.9203, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.650632319869155e-06, |
|
"loss": 0.9539, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.285551564005959e-06, |
|
"loss": 0.9482, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 6.920470808142762e-06, |
|
"loss": 0.9206, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.555390052279564e-06, |
|
"loss": 0.9186, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.190309296416367e-06, |
|
"loss": 0.9168, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.825228540553171e-06, |
|
"loss": 0.9151, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.460147784689974e-06, |
|
"loss": 0.924, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.095067028826777e-06, |
|
"loss": 0.916, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.729986272963579e-06, |
|
"loss": 0.9311, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.364905517100383e-06, |
|
"loss": 0.9175, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.999824761237186e-06, |
|
"loss": 0.9121, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.634744005373989e-06, |
|
"loss": 0.9226, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.269663249510792e-06, |
|
"loss": 0.9335, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.9045824936475953e-06, |
|
"loss": 0.9104, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.539501737784398e-06, |
|
"loss": 0.9197, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.1744209819212013e-06, |
|
"loss": 0.8976, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.8093402260580042e-06, |
|
"loss": 0.9401, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.444259470194807e-06, |
|
"loss": 0.9265, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.07917871433161e-06, |
|
"loss": 0.9255, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.140979584684132e-07, |
|
"loss": 0.9099, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.490172026052163e-07, |
|
"loss": 0.9033, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 68478, |
|
"total_flos": 9.218136679415808e+16, |
|
"train_loss": 1.233144689885432, |
|
"train_runtime": 17748.5921, |
|
"train_samples_per_second": 38.582, |
|
"train_steps_per_second": 3.858 |
|
} |
|
], |
|
"max_steps": 68478, |
|
"num_train_epochs": 3, |
|
"total_flos": 9.218136679415808e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|