|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 53280, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.876876876876877e-08, |
|
"loss": 26.3774, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 5.011261261261262e-06, |
|
"loss": 28.5448, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.0022522522522524e-05, |
|
"loss": 27.0258, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.5033783783783784e-05, |
|
"loss": 22.7776, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.0045045045045048e-05, |
|
"loss": 17.4592, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.505630630630631e-05, |
|
"loss": 8.3509, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.006756756756757e-05, |
|
"loss": 0.8438, |
|
"step": 1602 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.507882882882883e-05, |
|
"loss": 0.3406, |
|
"step": 1869 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.0090090090090096e-05, |
|
"loss": 0.2968, |
|
"step": 2136 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.510135135135135e-05, |
|
"loss": 0.2324, |
|
"step": 2403 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.10719971358776093, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.1818, |
|
"eval_samples_per_second": 302.937, |
|
"eval_steps_per_second": 20.309, |
|
"step": 2664 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.011261261261262e-05, |
|
"loss": 0.122, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5.512387387387388e-05, |
|
"loss": 0.1234, |
|
"step": 2937 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 6.013513513513514e-05, |
|
"loss": 0.1377, |
|
"step": 3204 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 6.514639639639641e-05, |
|
"loss": 0.0721, |
|
"step": 3471 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 7.015765765765766e-05, |
|
"loss": 0.0447, |
|
"step": 3738 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.516891891891891e-05, |
|
"loss": 0.0262, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 8.018018018018019e-05, |
|
"loss": 0.0262, |
|
"step": 4272 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.519144144144144e-05, |
|
"loss": 0.0235, |
|
"step": 4539 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 9.02027027027027e-05, |
|
"loss": 0.0163, |
|
"step": 4806 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 9.521396396396397e-05, |
|
"loss": 0.0151, |
|
"step": 5073 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.04358534514904022, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.906, |
|
"eval_samples_per_second": 395.165, |
|
"eval_steps_per_second": 26.492, |
|
"step": 5328 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.997497497497498e-05, |
|
"loss": 0.014, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.941816816816817e-05, |
|
"loss": 0.0136, |
|
"step": 5607 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 9.886136136136137e-05, |
|
"loss": 0.007, |
|
"step": 5874 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 9.830455455455457e-05, |
|
"loss": 0.0144, |
|
"step": 6141 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.774774774774775e-05, |
|
"loss": 0.008, |
|
"step": 6408 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 9.719094094094095e-05, |
|
"loss": 0.0109, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 9.663413413413414e-05, |
|
"loss": 0.0141, |
|
"step": 6942 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 9.607732732732732e-05, |
|
"loss": 0.011, |
|
"step": 7209 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 9.552052052052053e-05, |
|
"loss": 0.0136, |
|
"step": 7476 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 9.496371371371372e-05, |
|
"loss": 0.0094, |
|
"step": 7743 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.024132976308465004, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9049, |
|
"eval_samples_per_second": 395.615, |
|
"eval_steps_per_second": 26.522, |
|
"step": 7992 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 9.440690690690692e-05, |
|
"loss": 0.0158, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 9.38501001001001e-05, |
|
"loss": 0.0085, |
|
"step": 8277 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 9.329329329329329e-05, |
|
"loss": 0.0074, |
|
"step": 8544 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 9.27364864864865e-05, |
|
"loss": 0.0069, |
|
"step": 8811 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 9.217967967967968e-05, |
|
"loss": 0.0049, |
|
"step": 9078 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 9.162287287287288e-05, |
|
"loss": 0.0045, |
|
"step": 9345 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 9.106606606606607e-05, |
|
"loss": 0.0053, |
|
"step": 9612 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 9.050925925925925e-05, |
|
"loss": 0.0069, |
|
"step": 9879 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 8.995245245245245e-05, |
|
"loss": 0.0067, |
|
"step": 10146 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 8.939564564564565e-05, |
|
"loss": 0.0056, |
|
"step": 10413 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.030885161831974983, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9782, |
|
"eval_samples_per_second": 365.991, |
|
"eval_steps_per_second": 24.536, |
|
"step": 10656 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 8.883883883883885e-05, |
|
"loss": 0.0057, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 8.828203203203204e-05, |
|
"loss": 0.0018, |
|
"step": 10947 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 8.772522522522522e-05, |
|
"loss": 0.0034, |
|
"step": 11214 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 8.716841841841842e-05, |
|
"loss": 0.0074, |
|
"step": 11481 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 8.661161161161162e-05, |
|
"loss": 0.002, |
|
"step": 11748 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 8.605480480480482e-05, |
|
"loss": 0.0121, |
|
"step": 12015 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 8.5497997997998e-05, |
|
"loss": 0.0049, |
|
"step": 12282 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 8.49411911911912e-05, |
|
"loss": 0.0045, |
|
"step": 12549 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 8.438438438438439e-05, |
|
"loss": 0.0022, |
|
"step": 12816 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 8.382757757757757e-05, |
|
"loss": 0.0068, |
|
"step": 13083 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.03561040014028549, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.882, |
|
"eval_samples_per_second": 405.884, |
|
"eval_steps_per_second": 27.21, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 8.327077077077078e-05, |
|
"loss": 0.0063, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 8.271396396396397e-05, |
|
"loss": 0.002, |
|
"step": 13617 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 8.215715715715717e-05, |
|
"loss": 0.007, |
|
"step": 13884 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 8.160035035035035e-05, |
|
"loss": 0.0036, |
|
"step": 14151 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 8.104354354354354e-05, |
|
"loss": 0.0036, |
|
"step": 14418 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 8.048673673673675e-05, |
|
"loss": 0.0087, |
|
"step": 14685 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 7.992992992992994e-05, |
|
"loss": 0.0013, |
|
"step": 14952 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 7.937312312312313e-05, |
|
"loss": 0.0031, |
|
"step": 15219 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 7.881631631631632e-05, |
|
"loss": 0.0031, |
|
"step": 15486 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 7.82595095095095e-05, |
|
"loss": 0.0041, |
|
"step": 15753 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.018554789945483208, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.8772, |
|
"eval_samples_per_second": 408.133, |
|
"eval_steps_per_second": 27.361, |
|
"step": 15984 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 7.77027027027027e-05, |
|
"loss": 0.0044, |
|
"step": 16020 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 7.71458958958959e-05, |
|
"loss": 0.0012, |
|
"step": 16287 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 7.65890890890891e-05, |
|
"loss": 0.0036, |
|
"step": 16554 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 7.603228228228229e-05, |
|
"loss": 0.0028, |
|
"step": 16821 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 7.547547547547547e-05, |
|
"loss": 0.0039, |
|
"step": 17088 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 7.491866866866867e-05, |
|
"loss": 0.0013, |
|
"step": 17355 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 7.436186186186187e-05, |
|
"loss": 0.0026, |
|
"step": 17622 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 7.380505505505507e-05, |
|
"loss": 0.0015, |
|
"step": 17889 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 7.324824824824825e-05, |
|
"loss": 0.0009, |
|
"step": 18156 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 7.269144144144144e-05, |
|
"loss": 0.0034, |
|
"step": 18423 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.04260706901550293, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9016, |
|
"eval_samples_per_second": 397.069, |
|
"eval_steps_per_second": 26.619, |
|
"step": 18648 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 7.213463463463464e-05, |
|
"loss": 0.0027, |
|
"step": 18690 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 7.157782782782782e-05, |
|
"loss": 0.0018, |
|
"step": 18957 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 7.102102102102103e-05, |
|
"loss": 0.0024, |
|
"step": 19224 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 7.046421421421422e-05, |
|
"loss": 0.0021, |
|
"step": 19491 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 6.99074074074074e-05, |
|
"loss": 0.0017, |
|
"step": 19758 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 6.93506006006006e-05, |
|
"loss": 0.0013, |
|
"step": 20025 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 6.879379379379379e-05, |
|
"loss": 0.0025, |
|
"step": 20292 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 6.8236986986987e-05, |
|
"loss": 0.0017, |
|
"step": 20559 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 6.768018018018019e-05, |
|
"loss": 0.0022, |
|
"step": 20826 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 6.712337337337337e-05, |
|
"loss": 0.0043, |
|
"step": 21093 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.017180927097797394, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.8803, |
|
"eval_samples_per_second": 406.66, |
|
"eval_steps_per_second": 27.262, |
|
"step": 21312 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 6.656656656656657e-05, |
|
"loss": 0.0035, |
|
"step": 21360 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 6.600975975975976e-05, |
|
"loss": 0.0024, |
|
"step": 21627 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 6.545295295295295e-05, |
|
"loss": 0.0036, |
|
"step": 21894 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 6.489614614614615e-05, |
|
"loss": 0.0034, |
|
"step": 22161 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 6.433933933933934e-05, |
|
"loss": 0.002, |
|
"step": 22428 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"learning_rate": 6.378253253253254e-05, |
|
"loss": 0.0029, |
|
"step": 22695 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 6.322572572572572e-05, |
|
"loss": 0.002, |
|
"step": 22962 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 6.266891891891892e-05, |
|
"loss": 0.0011, |
|
"step": 23229 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"learning_rate": 6.211211211211212e-05, |
|
"loss": 0.0006, |
|
"step": 23496 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 6.15553053053053e-05, |
|
"loss": 0.004, |
|
"step": 23763 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.027217011898756027, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.8897, |
|
"eval_samples_per_second": 402.398, |
|
"eval_steps_per_second": 26.976, |
|
"step": 23976 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 6.0998498498498503e-05, |
|
"loss": 0.0038, |
|
"step": 24030 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 6.0441691691691695e-05, |
|
"loss": 0.0022, |
|
"step": 24297 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 5.988488488488489e-05, |
|
"loss": 0.0011, |
|
"step": 24564 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 5.932807807807807e-05, |
|
"loss": 0.0013, |
|
"step": 24831 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 5.877127127127128e-05, |
|
"loss": 0.0014, |
|
"step": 25098 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 5.821446446446447e-05, |
|
"loss": 0.0037, |
|
"step": 25365 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 5.765765765765766e-05, |
|
"loss": 0.0011, |
|
"step": 25632 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 5.7100850850850854e-05, |
|
"loss": 0.0028, |
|
"step": 25899 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 5.654404404404404e-05, |
|
"loss": 0.0012, |
|
"step": 26166 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 5.5987237237237245e-05, |
|
"loss": 0.0005, |
|
"step": 26433 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.033312857151031494, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.9102, |
|
"eval_samples_per_second": 393.315, |
|
"eval_steps_per_second": 26.367, |
|
"step": 26640 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 5.543043043043044e-05, |
|
"loss": 0.0023, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 5.487362362362363e-05, |
|
"loss": 0.0032, |
|
"step": 26967 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 5.431681681681682e-05, |
|
"loss": 0.0013, |
|
"step": 27234 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 5.3760010010010006e-05, |
|
"loss": 0.0009, |
|
"step": 27501 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"learning_rate": 5.320320320320321e-05, |
|
"loss": 0.0039, |
|
"step": 27768 |
|
}, |
|
{ |
|
"epoch": 10.52, |
|
"learning_rate": 5.2646396396396403e-05, |
|
"loss": 0.0008, |
|
"step": 28035 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 5.2089589589589595e-05, |
|
"loss": 0.0009, |
|
"step": 28302 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"learning_rate": 5.153278278278279e-05, |
|
"loss": 0.002, |
|
"step": 28569 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 5.097597597597597e-05, |
|
"loss": 0.0006, |
|
"step": 28836 |
|
}, |
|
{ |
|
"epoch": 10.92, |
|
"learning_rate": 5.0419169169169165e-05, |
|
"loss": 0.0025, |
|
"step": 29103 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.03584469109773636, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.8813, |
|
"eval_samples_per_second": 406.214, |
|
"eval_steps_per_second": 27.232, |
|
"step": 29304 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 4.9862362362362363e-05, |
|
"loss": 0.0041, |
|
"step": 29370 |
|
}, |
|
{ |
|
"epoch": 11.12, |
|
"learning_rate": 4.930555555555556e-05, |
|
"loss": 0.0002, |
|
"step": 29637 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"learning_rate": 4.8748748748748754e-05, |
|
"loss": 0.0001, |
|
"step": 29904 |
|
}, |
|
{ |
|
"epoch": 11.33, |
|
"learning_rate": 4.819194194194194e-05, |
|
"loss": 0.0001, |
|
"step": 30171 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 4.763513513513514e-05, |
|
"loss": 0.0003, |
|
"step": 30438 |
|
}, |
|
{ |
|
"epoch": 11.53, |
|
"learning_rate": 4.707832832832833e-05, |
|
"loss": 0.003, |
|
"step": 30705 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 4.652152152152152e-05, |
|
"loss": 0.0002, |
|
"step": 30972 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 4.596471471471472e-05, |
|
"loss": 0.0037, |
|
"step": 31239 |
|
}, |
|
{ |
|
"epoch": 11.83, |
|
"learning_rate": 4.540790790790791e-05, |
|
"loss": 0.0014, |
|
"step": 31506 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"learning_rate": 4.48511011011011e-05, |
|
"loss": 0.0021, |
|
"step": 31773 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.04740298539400101, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.881, |
|
"eval_samples_per_second": 406.347, |
|
"eval_steps_per_second": 27.241, |
|
"step": 31968 |
|
}, |
|
{ |
|
"epoch": 12.03, |
|
"learning_rate": 4.42942942942943e-05, |
|
"loss": 0.0006, |
|
"step": 32040 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 4.373748748748749e-05, |
|
"loss": 0.0007, |
|
"step": 32307 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"learning_rate": 4.318068068068069e-05, |
|
"loss": 0.0015, |
|
"step": 32574 |
|
}, |
|
{ |
|
"epoch": 12.33, |
|
"learning_rate": 4.262387387387388e-05, |
|
"loss": 0.0014, |
|
"step": 32841 |
|
}, |
|
{ |
|
"epoch": 12.43, |
|
"learning_rate": 4.2067067067067065e-05, |
|
"loss": 0.0012, |
|
"step": 33108 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 4.1510260260260263e-05, |
|
"loss": 0.0019, |
|
"step": 33375 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 4.0953453453453455e-05, |
|
"loss": 0.0011, |
|
"step": 33642 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"learning_rate": 4.039664664664665e-05, |
|
"loss": 0.0016, |
|
"step": 33909 |
|
}, |
|
{ |
|
"epoch": 12.83, |
|
"learning_rate": 3.9839839839839846e-05, |
|
"loss": 0.0001, |
|
"step": 34176 |
|
}, |
|
{ |
|
"epoch": 12.93, |
|
"learning_rate": 3.928303303303303e-05, |
|
"loss": 0.0007, |
|
"step": 34443 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.0401989221572876, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.887, |
|
"eval_samples_per_second": 403.585, |
|
"eval_steps_per_second": 27.056, |
|
"step": 34632 |
|
}, |
|
{ |
|
"epoch": 13.03, |
|
"learning_rate": 3.8726226226226223e-05, |
|
"loss": 0.0006, |
|
"step": 34710 |
|
}, |
|
{ |
|
"epoch": 13.13, |
|
"learning_rate": 3.816941941941942e-05, |
|
"loss": 0.0002, |
|
"step": 34977 |
|
}, |
|
{ |
|
"epoch": 13.23, |
|
"learning_rate": 3.7612612612612614e-05, |
|
"loss": 0.0012, |
|
"step": 35244 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 3.705580580580581e-05, |
|
"loss": 0.0027, |
|
"step": 35511 |
|
}, |
|
{ |
|
"epoch": 13.43, |
|
"learning_rate": 3.6498998998999e-05, |
|
"loss": 0.0014, |
|
"step": 35778 |
|
}, |
|
{ |
|
"epoch": 13.53, |
|
"learning_rate": 3.594219219219219e-05, |
|
"loss": 0.0006, |
|
"step": 36045 |
|
}, |
|
{ |
|
"epoch": 13.63, |
|
"learning_rate": 3.538538538538539e-05, |
|
"loss": 0.0023, |
|
"step": 36312 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"learning_rate": 3.482857857857858e-05, |
|
"loss": 0.0, |
|
"step": 36579 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"learning_rate": 3.427177177177177e-05, |
|
"loss": 0.0056, |
|
"step": 36846 |
|
}, |
|
{ |
|
"epoch": 13.93, |
|
"learning_rate": 3.3714964964964965e-05, |
|
"loss": 0.0017, |
|
"step": 37113 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.03919493407011032, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.8793, |
|
"eval_samples_per_second": 407.126, |
|
"eval_steps_per_second": 27.293, |
|
"step": 37296 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 3.315815815815816e-05, |
|
"loss": 0.0002, |
|
"step": 37380 |
|
}, |
|
{ |
|
"epoch": 14.13, |
|
"learning_rate": 3.260135135135135e-05, |
|
"loss": 0.0012, |
|
"step": 37647 |
|
}, |
|
{ |
|
"epoch": 14.23, |
|
"learning_rate": 3.204454454454455e-05, |
|
"loss": 0.0009, |
|
"step": 37914 |
|
}, |
|
{ |
|
"epoch": 14.33, |
|
"learning_rate": 3.148773773773774e-05, |
|
"loss": 0.0006, |
|
"step": 38181 |
|
}, |
|
{ |
|
"epoch": 14.43, |
|
"learning_rate": 3.093093093093093e-05, |
|
"loss": 0.0006, |
|
"step": 38448 |
|
}, |
|
{ |
|
"epoch": 14.53, |
|
"learning_rate": 3.0374124124124127e-05, |
|
"loss": 0.0015, |
|
"step": 38715 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 2.9817317317317315e-05, |
|
"loss": 0.0014, |
|
"step": 38982 |
|
}, |
|
{ |
|
"epoch": 14.73, |
|
"learning_rate": 2.9260510510510514e-05, |
|
"loss": 0.0009, |
|
"step": 39249 |
|
}, |
|
{ |
|
"epoch": 14.83, |
|
"learning_rate": 2.8703703703703706e-05, |
|
"loss": 0.0016, |
|
"step": 39516 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 2.8146896896896895e-05, |
|
"loss": 0.0007, |
|
"step": 39783 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.03942238539457321, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.8858, |
|
"eval_samples_per_second": 404.155, |
|
"eval_steps_per_second": 27.094, |
|
"step": 39960 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"learning_rate": 2.7590090090090094e-05, |
|
"loss": 0.0003, |
|
"step": 40050 |
|
}, |
|
{ |
|
"epoch": 15.13, |
|
"learning_rate": 2.7033283283283286e-05, |
|
"loss": 0.0002, |
|
"step": 40317 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"learning_rate": 2.6476476476476474e-05, |
|
"loss": 0.0002, |
|
"step": 40584 |
|
}, |
|
{ |
|
"epoch": 15.33, |
|
"learning_rate": 2.5919669669669673e-05, |
|
"loss": 0.0, |
|
"step": 40851 |
|
}, |
|
{ |
|
"epoch": 15.43, |
|
"learning_rate": 2.536286286286286e-05, |
|
"loss": 0.0017, |
|
"step": 41118 |
|
}, |
|
{ |
|
"epoch": 15.53, |
|
"learning_rate": 2.4806056056056057e-05, |
|
"loss": 0.0002, |
|
"step": 41385 |
|
}, |
|
{ |
|
"epoch": 15.64, |
|
"learning_rate": 2.4249249249249252e-05, |
|
"loss": 0.0007, |
|
"step": 41652 |
|
}, |
|
{ |
|
"epoch": 15.74, |
|
"learning_rate": 2.3692442442442444e-05, |
|
"loss": 0.0001, |
|
"step": 41919 |
|
}, |
|
{ |
|
"epoch": 15.84, |
|
"learning_rate": 2.3135635635635636e-05, |
|
"loss": 0.0011, |
|
"step": 42186 |
|
}, |
|
{ |
|
"epoch": 15.94, |
|
"learning_rate": 2.2578828828828828e-05, |
|
"loss": 0.0013, |
|
"step": 42453 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.04424785450100899, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.8837, |
|
"eval_samples_per_second": 405.133, |
|
"eval_steps_per_second": 27.16, |
|
"step": 42624 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"learning_rate": 2.2022022022022024e-05, |
|
"loss": 0.0006, |
|
"step": 42720 |
|
}, |
|
{ |
|
"epoch": 16.14, |
|
"learning_rate": 2.146521521521522e-05, |
|
"loss": 0.0013, |
|
"step": 42987 |
|
}, |
|
{ |
|
"epoch": 16.24, |
|
"learning_rate": 2.0908408408408408e-05, |
|
"loss": 0.0005, |
|
"step": 43254 |
|
}, |
|
{ |
|
"epoch": 16.34, |
|
"learning_rate": 2.0351601601601603e-05, |
|
"loss": 0.0008, |
|
"step": 43521 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"learning_rate": 1.9794794794794795e-05, |
|
"loss": 0.0003, |
|
"step": 43788 |
|
}, |
|
{ |
|
"epoch": 16.54, |
|
"learning_rate": 1.9237987987987987e-05, |
|
"loss": 0.0001, |
|
"step": 44055 |
|
}, |
|
{ |
|
"epoch": 16.64, |
|
"learning_rate": 1.8681181181181182e-05, |
|
"loss": 0.0005, |
|
"step": 44322 |
|
}, |
|
{ |
|
"epoch": 16.74, |
|
"learning_rate": 1.8124374374374374e-05, |
|
"loss": 0.0026, |
|
"step": 44589 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"learning_rate": 1.756756756756757e-05, |
|
"loss": 0.0008, |
|
"step": 44856 |
|
}, |
|
{ |
|
"epoch": 16.94, |
|
"learning_rate": 1.701076076076076e-05, |
|
"loss": 0.0002, |
|
"step": 45123 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.04427039995789528, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.8885, |
|
"eval_samples_per_second": 402.91, |
|
"eval_steps_per_second": 27.011, |
|
"step": 45288 |
|
}, |
|
{ |
|
"epoch": 17.04, |
|
"learning_rate": 1.6453953953953954e-05, |
|
"loss": 0.0001, |
|
"step": 45390 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 1.589714714714715e-05, |
|
"loss": 0.0004, |
|
"step": 45657 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"learning_rate": 1.534034034034034e-05, |
|
"loss": 0.0001, |
|
"step": 45924 |
|
}, |
|
{ |
|
"epoch": 17.34, |
|
"learning_rate": 1.4783533533533533e-05, |
|
"loss": 0.0011, |
|
"step": 46191 |
|
}, |
|
{ |
|
"epoch": 17.44, |
|
"learning_rate": 1.4226726726726727e-05, |
|
"loss": 0.001, |
|
"step": 46458 |
|
}, |
|
{ |
|
"epoch": 17.54, |
|
"learning_rate": 1.3669919919919922e-05, |
|
"loss": 0.0003, |
|
"step": 46725 |
|
}, |
|
{ |
|
"epoch": 17.64, |
|
"learning_rate": 1.3113113113113112e-05, |
|
"loss": 0.0007, |
|
"step": 46992 |
|
}, |
|
{ |
|
"epoch": 17.74, |
|
"learning_rate": 1.2556306306306306e-05, |
|
"loss": 0.0002, |
|
"step": 47259 |
|
}, |
|
{ |
|
"epoch": 17.84, |
|
"learning_rate": 1.19994994994995e-05, |
|
"loss": 0.0024, |
|
"step": 47526 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"learning_rate": 1.1442692692692693e-05, |
|
"loss": 0.0013, |
|
"step": 47793 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.038870543241500854, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.8826, |
|
"eval_samples_per_second": 405.621, |
|
"eval_steps_per_second": 27.192, |
|
"step": 47952 |
|
}, |
|
{ |
|
"epoch": 18.04, |
|
"learning_rate": 1.0885885885885887e-05, |
|
"loss": 0.0005, |
|
"step": 48060 |
|
}, |
|
{ |
|
"epoch": 18.14, |
|
"learning_rate": 1.0329079079079079e-05, |
|
"loss": 0.0, |
|
"step": 48327 |
|
}, |
|
{ |
|
"epoch": 18.24, |
|
"learning_rate": 9.772272272272273e-06, |
|
"loss": 0.0005, |
|
"step": 48594 |
|
}, |
|
{ |
|
"epoch": 18.34, |
|
"learning_rate": 9.215465465465466e-06, |
|
"loss": 0.0017, |
|
"step": 48861 |
|
}, |
|
{ |
|
"epoch": 18.44, |
|
"learning_rate": 8.65865865865866e-06, |
|
"loss": 0.0003, |
|
"step": 49128 |
|
}, |
|
{ |
|
"epoch": 18.54, |
|
"learning_rate": 8.101851851851852e-06, |
|
"loss": 0.0012, |
|
"step": 49395 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 7.545045045045046e-06, |
|
"loss": 0.0, |
|
"step": 49662 |
|
}, |
|
{ |
|
"epoch": 18.74, |
|
"learning_rate": 6.9882382382382385e-06, |
|
"loss": 0.0011, |
|
"step": 49929 |
|
}, |
|
{ |
|
"epoch": 18.84, |
|
"learning_rate": 6.431431431431431e-06, |
|
"loss": 0.0013, |
|
"step": 50196 |
|
}, |
|
{ |
|
"epoch": 18.94, |
|
"learning_rate": 5.874624624624625e-06, |
|
"loss": 0.0001, |
|
"step": 50463 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.04122824966907501, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.8851, |
|
"eval_samples_per_second": 404.458, |
|
"eval_steps_per_second": 27.114, |
|
"step": 50616 |
|
}, |
|
{ |
|
"epoch": 19.04, |
|
"learning_rate": 5.317817817817819e-06, |
|
"loss": 0.0012, |
|
"step": 50730 |
|
}, |
|
{ |
|
"epoch": 19.14, |
|
"learning_rate": 4.7610110110110115e-06, |
|
"loss": 0.0001, |
|
"step": 50997 |
|
}, |
|
{ |
|
"epoch": 19.24, |
|
"learning_rate": 4.204204204204204e-06, |
|
"loss": 0.0002, |
|
"step": 51264 |
|
}, |
|
{ |
|
"epoch": 19.34, |
|
"learning_rate": 3.647397397397397e-06, |
|
"loss": 0.0, |
|
"step": 51531 |
|
}, |
|
{ |
|
"epoch": 19.44, |
|
"learning_rate": 3.090590590590591e-06, |
|
"loss": 0.0003, |
|
"step": 51798 |
|
}, |
|
{ |
|
"epoch": 19.54, |
|
"learning_rate": 2.533783783783784e-06, |
|
"loss": 0.0021, |
|
"step": 52065 |
|
}, |
|
{ |
|
"epoch": 19.64, |
|
"learning_rate": 1.976976976976977e-06, |
|
"loss": 0.0, |
|
"step": 52332 |
|
}, |
|
{ |
|
"epoch": 19.74, |
|
"learning_rate": 1.4201701701701704e-06, |
|
"loss": 0.0001, |
|
"step": 52599 |
|
}, |
|
{ |
|
"epoch": 19.84, |
|
"learning_rate": 8.633633633633634e-07, |
|
"loss": 0.0009, |
|
"step": 52866 |
|
}, |
|
{ |
|
"epoch": 19.94, |
|
"learning_rate": 3.0655655655655656e-07, |
|
"loss": 0.0001, |
|
"step": 53133 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.0392613410949707, |
|
"eval_max_distance": 1, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 0.8819, |
|
"eval_samples_per_second": 405.923, |
|
"eval_steps_per_second": 27.213, |
|
"step": 53280 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 53280, |
|
"total_flos": 3.004276018040832e+16, |
|
"train_loss": 0.5359516274340651, |
|
"train_runtime": 5327.8303, |
|
"train_samples_per_second": 149.956, |
|
"train_steps_per_second": 10.0 |
|
} |
|
], |
|
"logging_steps": 267, |
|
"max_steps": 53280, |
|
"num_train_epochs": 20, |
|
"save_steps": 533, |
|
"total_flos": 3.004276018040832e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|