|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 49.997014925373136, |
|
"global_step": 8350, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0009880239520958084, |
|
"loss": 5.0465, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0009760479041916168, |
|
"loss": 3.0126, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0009640718562874252, |
|
"loss": 2.6782, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.0009520958083832335, |
|
"loss": 2.4056, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0009401197604790419, |
|
"loss": 2.2999, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 0.0009281437125748503, |
|
"loss": 1.9862, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.0009161676646706587, |
|
"loss": 1.8922, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.0009041916167664672, |
|
"loss": 1.699, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 0.0008922155688622756, |
|
"loss": 1.5454, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 0.0008802395209580839, |
|
"loss": 1.4636, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 0.0008682634730538922, |
|
"loss": 1.2505, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 0.0008562874251497006, |
|
"loss": 1.1771, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 0.000844311377245509, |
|
"loss": 1.0694, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 0.0008323353293413174, |
|
"loss": 0.9962, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 0.0008203592814371258, |
|
"loss": 0.9573, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 0.0008083832335329342, |
|
"loss": 0.8383, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 10.18, |
|
"learning_rate": 0.0007964071856287425, |
|
"loss": 0.813, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"learning_rate": 0.0007844311377245509, |
|
"loss": 0.7532, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 11.38, |
|
"learning_rate": 0.0007724550898203593, |
|
"loss": 0.6846, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"learning_rate": 0.0007604790419161677, |
|
"loss": 0.6736, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"eval_bleu": 2.5636, |
|
"eval_gen_len": 15.2363, |
|
"eval_loss": 0.29240211844444275, |
|
"eval_runtime": 438.109, |
|
"eval_samples_per_second": 12.228, |
|
"eval_steps_per_second": 3.059, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 12.57, |
|
"learning_rate": 0.0007485029940119761, |
|
"loss": 0.5751, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 13.17, |
|
"learning_rate": 0.0007365269461077845, |
|
"loss": 0.5731, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 0.0007245508982035929, |
|
"loss": 0.527, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 14.37, |
|
"learning_rate": 0.0007125748502994012, |
|
"loss": 0.5077, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"learning_rate": 0.0007005988023952096, |
|
"loss": 0.4783, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 15.57, |
|
"learning_rate": 0.000688622754491018, |
|
"loss": 0.4261, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"learning_rate": 0.0006766467065868264, |
|
"loss": 0.4283, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 16.76, |
|
"learning_rate": 0.0006646706586826347, |
|
"loss": 0.3955, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 17.36, |
|
"learning_rate": 0.0006526946107784431, |
|
"loss": 0.3785, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 17.96, |
|
"learning_rate": 0.0006407185628742515, |
|
"loss": 0.3739, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 18.56, |
|
"learning_rate": 0.0006287425149700598, |
|
"loss": 0.3335, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 19.16, |
|
"learning_rate": 0.0006167664670658682, |
|
"loss": 0.3438, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 19.76, |
|
"learning_rate": 0.0006047904191616766, |
|
"loss": 0.3164, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 20.36, |
|
"learning_rate": 0.000592814371257485, |
|
"loss": 0.3068, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 20.96, |
|
"learning_rate": 0.0005808383233532934, |
|
"loss": 0.3188, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 21.56, |
|
"learning_rate": 0.0005688622754491018, |
|
"loss": 0.288, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 22.16, |
|
"learning_rate": 0.0005568862275449101, |
|
"loss": 0.2916, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 22.75, |
|
"learning_rate": 0.0005449101796407185, |
|
"loss": 0.2731, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 23.35, |
|
"learning_rate": 0.000532934131736527, |
|
"loss": 0.2655, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 23.95, |
|
"learning_rate": 0.0005209580838323354, |
|
"loss": 0.2621, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 23.95, |
|
"eval_bleu": 3.3704, |
|
"eval_gen_len": 16.0834, |
|
"eval_loss": 0.1354922354221344, |
|
"eval_runtime": 461.5346, |
|
"eval_samples_per_second": 11.607, |
|
"eval_steps_per_second": 2.903, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 24.55, |
|
"learning_rate": 0.0005089820359281438, |
|
"loss": 0.2493, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 25.15, |
|
"learning_rate": 0.0004970059880239521, |
|
"loss": 0.2502, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 25.75, |
|
"learning_rate": 0.00048502994011976046, |
|
"loss": 0.237, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 26.35, |
|
"learning_rate": 0.00047305389221556887, |
|
"loss": 0.2322, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 26.94, |
|
"learning_rate": 0.00046107784431137727, |
|
"loss": 0.2356, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 27.54, |
|
"learning_rate": 0.0004491017964071856, |
|
"loss": 0.2194, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 28.14, |
|
"learning_rate": 0.00043712574850299403, |
|
"loss": 0.2181, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 28.74, |
|
"learning_rate": 0.00042514970059880243, |
|
"loss": 0.2113, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 29.34, |
|
"learning_rate": 0.0004131736526946108, |
|
"loss": 0.2087, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 29.94, |
|
"learning_rate": 0.0004011976047904192, |
|
"loss": 0.214, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 30.54, |
|
"learning_rate": 0.0003892215568862276, |
|
"loss": 0.2028, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 31.14, |
|
"learning_rate": 0.0003772455089820359, |
|
"loss": 0.2036, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 31.73, |
|
"learning_rate": 0.0003652694610778443, |
|
"loss": 0.1953, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 32.33, |
|
"learning_rate": 0.0003532934131736527, |
|
"loss": 0.1926, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 32.93, |
|
"learning_rate": 0.0003413173652694611, |
|
"loss": 0.1974, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 33.53, |
|
"learning_rate": 0.00032934131736526946, |
|
"loss": 0.1866, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 34.13, |
|
"learning_rate": 0.00031736526946107786, |
|
"loss": 0.1839, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 34.73, |
|
"learning_rate": 0.00030538922155688627, |
|
"loss": 0.1816, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 35.33, |
|
"learning_rate": 0.0002934131736526946, |
|
"loss": 0.1837, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 35.93, |
|
"learning_rate": 0.000281437125748503, |
|
"loss": 0.1775, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 35.93, |
|
"eval_bleu": 3.6961, |
|
"eval_gen_len": 16.5854, |
|
"eval_loss": 0.12107560783624649, |
|
"eval_runtime": 477.8329, |
|
"eval_samples_per_second": 11.211, |
|
"eval_steps_per_second": 2.804, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 36.53, |
|
"learning_rate": 0.0002694610778443114, |
|
"loss": 0.1742, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 37.13, |
|
"learning_rate": 0.0002574850299401197, |
|
"loss": 0.176, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 37.72, |
|
"learning_rate": 0.00024550898203592813, |
|
"loss": 0.1704, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 38.32, |
|
"learning_rate": 0.00023353293413173654, |
|
"loss": 0.1734, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 38.92, |
|
"learning_rate": 0.0002215568862275449, |
|
"loss": 0.1662, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 39.52, |
|
"learning_rate": 0.0002095808383233533, |
|
"loss": 0.1653, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 40.12, |
|
"learning_rate": 0.0001976047904191617, |
|
"loss": 0.1684, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 40.72, |
|
"learning_rate": 0.00018562874251497005, |
|
"loss": 0.1584, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 41.32, |
|
"learning_rate": 0.00017365269461077845, |
|
"loss": 0.1622, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 41.91, |
|
"learning_rate": 0.00016167664670658683, |
|
"loss": 0.1625, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 42.51, |
|
"learning_rate": 0.0001497005988023952, |
|
"loss": 0.1558, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 43.11, |
|
"learning_rate": 0.00013772455089820359, |
|
"loss": 0.157, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 43.71, |
|
"learning_rate": 0.00012574850299401196, |
|
"loss": 0.1568, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 44.31, |
|
"learning_rate": 0.00011377245508982036, |
|
"loss": 0.1537, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 44.91, |
|
"learning_rate": 0.00010179640718562875, |
|
"loss": 0.1521, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 45.51, |
|
"learning_rate": 8.982035928143712e-05, |
|
"loss": 0.1508, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 46.11, |
|
"learning_rate": 7.784431137724552e-05, |
|
"loss": 0.149, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 46.7, |
|
"learning_rate": 6.58682634730539e-05, |
|
"loss": 0.1462, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 47.3, |
|
"learning_rate": 5.389221556886228e-05, |
|
"loss": 0.1484, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 47.9, |
|
"learning_rate": 4.191616766467066e-05, |
|
"loss": 0.1478, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 47.9, |
|
"eval_bleu": 3.681, |
|
"eval_gen_len": 16.4697, |
|
"eval_loss": 0.11556760966777802, |
|
"eval_runtime": 415.7833, |
|
"eval_samples_per_second": 12.884, |
|
"eval_steps_per_second": 3.223, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 48.5, |
|
"learning_rate": 2.994011976047904e-05, |
|
"loss": 0.1449, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 49.1, |
|
"learning_rate": 1.7964071856287426e-05, |
|
"loss": 0.1448, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 49.7, |
|
"learning_rate": 5.9880239520958085e-06, |
|
"loss": 0.1425, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"step": 8350, |
|
"total_flos": 6.461676347272704e+16, |
|
"train_loss": 0.584253704162415, |
|
"train_runtime": 11332.4023, |
|
"train_samples_per_second": 23.636, |
|
"train_steps_per_second": 0.737 |
|
} |
|
], |
|
"max_steps": 8350, |
|
"num_train_epochs": 50, |
|
"total_flos": 6.461676347272704e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|