|
{ |
|
"best_metric": 2.6507720947265625, |
|
"best_model_checkpoint": "output-en-mul/checkpoint-975", |
|
"epoch": 3.882320064584239, |
|
"global_step": 975, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"eval_BLEU_ach": 1.8026, |
|
"eval_BLEU_lgg": 2.4121, |
|
"eval_BLEU_lug": 14.4386, |
|
"eval_BLEU_mean": 4.5453, |
|
"eval_BLEU_nyn": 3.307, |
|
"eval_BLEU_teo": 0.7663, |
|
"eval_loss": 4.095067024230957, |
|
"eval_runtime": 117.7968, |
|
"eval_samples_per_second": 21.223, |
|
"eval_steps_per_second": 1.333, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_BLEU_ach": 6.6301, |
|
"eval_BLEU_lgg": 3.6686, |
|
"eval_BLEU_lug": 17.863, |
|
"eval_BLEU_mean": 7.7115, |
|
"eval_BLEU_nyn": 7.0341, |
|
"eval_BLEU_teo": 3.3617, |
|
"eval_loss": 3.4971461296081543, |
|
"eval_runtime": 126.0103, |
|
"eval_samples_per_second": 19.84, |
|
"eval_steps_per_second": 1.246, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_BLEU_ach": 9.2766, |
|
"eval_BLEU_lgg": 5.9959, |
|
"eval_BLEU_lug": 18.5448, |
|
"eval_BLEU_mean": 10.2227, |
|
"eval_BLEU_nyn": 9.4904, |
|
"eval_BLEU_teo": 7.8061, |
|
"eval_loss": 3.2395877838134766, |
|
"eval_runtime": 126.2158, |
|
"eval_samples_per_second": 19.807, |
|
"eval_steps_per_second": 1.244, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_BLEU_ach": 11.7826, |
|
"eval_BLEU_lgg": 9.205, |
|
"eval_BLEU_lug": 20.2742, |
|
"eval_BLEU_mean": 12.501, |
|
"eval_BLEU_nyn": 10.2936, |
|
"eval_BLEU_teo": 10.9496, |
|
"eval_loss": 3.0855562686920166, |
|
"eval_runtime": 115.1293, |
|
"eval_samples_per_second": 21.715, |
|
"eval_steps_per_second": 1.364, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_BLEU_ach": 12.2884, |
|
"eval_BLEU_lgg": 10.8685, |
|
"eval_BLEU_lug": 20.0989, |
|
"eval_BLEU_mean": 13.331, |
|
"eval_BLEU_nyn": 10.8764, |
|
"eval_BLEU_teo": 12.5227, |
|
"eval_loss": 2.9902637004852295, |
|
"eval_runtime": 112.0904, |
|
"eval_samples_per_second": 22.303, |
|
"eval_steps_per_second": 1.401, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_BLEU_ach": 12.9297, |
|
"eval_BLEU_lgg": 12.0138, |
|
"eval_BLEU_lug": 21.6123, |
|
"eval_BLEU_mean": 14.4131, |
|
"eval_BLEU_nyn": 11.2431, |
|
"eval_BLEU_teo": 14.2664, |
|
"eval_loss": 2.9314682483673096, |
|
"eval_runtime": 114.1492, |
|
"eval_samples_per_second": 21.901, |
|
"eval_steps_per_second": 1.375, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_BLEU_ach": 13.8035, |
|
"eval_BLEU_lgg": 13.3415, |
|
"eval_BLEU_lug": 21.8656, |
|
"eval_BLEU_mean": 15.1199, |
|
"eval_BLEU_nyn": 11.9199, |
|
"eval_BLEU_teo": 14.669, |
|
"eval_loss": 2.8768787384033203, |
|
"eval_runtime": 110.271, |
|
"eval_samples_per_second": 22.671, |
|
"eval_steps_per_second": 1.424, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_BLEU_ach": 13.8195, |
|
"eval_BLEU_lgg": 13.478, |
|
"eval_BLEU_lug": 21.4511, |
|
"eval_BLEU_mean": 15.3648, |
|
"eval_BLEU_nyn": 12.8941, |
|
"eval_BLEU_teo": 15.1812, |
|
"eval_loss": 2.8409996032714844, |
|
"eval_runtime": 114.4231, |
|
"eval_samples_per_second": 21.849, |
|
"eval_steps_per_second": 1.372, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_BLEU_ach": 14.981, |
|
"eval_BLEU_lgg": 14.1773, |
|
"eval_BLEU_lug": 22.9603, |
|
"eval_BLEU_mean": 16.2302, |
|
"eval_BLEU_nyn": 12.8695, |
|
"eval_BLEU_teo": 16.1631, |
|
"eval_loss": 2.815749168395996, |
|
"eval_runtime": 112.6187, |
|
"eval_samples_per_second": 22.199, |
|
"eval_steps_per_second": 1.394, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_BLEU_ach": 16.184, |
|
"eval_BLEU_lgg": 15.1618, |
|
"eval_BLEU_lug": 22.062, |
|
"eval_BLEU_mean": 16.6458, |
|
"eval_BLEU_nyn": 13.7415, |
|
"eval_BLEU_teo": 16.0795, |
|
"eval_loss": 2.785487174987793, |
|
"eval_runtime": 111.0998, |
|
"eval_samples_per_second": 22.502, |
|
"eval_steps_per_second": 1.413, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_BLEU_ach": 16.0982, |
|
"eval_BLEU_lgg": 15.3667, |
|
"eval_BLEU_lug": 23.0175, |
|
"eval_BLEU_mean": 17.0507, |
|
"eval_BLEU_nyn": 13.6528, |
|
"eval_BLEU_teo": 17.1184, |
|
"eval_loss": 2.7754287719726562, |
|
"eval_runtime": 109.9249, |
|
"eval_samples_per_second": 22.743, |
|
"eval_steps_per_second": 1.428, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_BLEU_ach": 15.5565, |
|
"eval_BLEU_lgg": 15.718, |
|
"eval_BLEU_lug": 23.1352, |
|
"eval_BLEU_mean": 16.9913, |
|
"eval_BLEU_nyn": 13.8518, |
|
"eval_BLEU_teo": 16.695, |
|
"eval_loss": 2.7620601654052734, |
|
"eval_runtime": 110.9898, |
|
"eval_samples_per_second": 22.525, |
|
"eval_steps_per_second": 1.415, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_BLEU_ach": 16.1387, |
|
"eval_BLEU_lgg": 16.7057, |
|
"eval_BLEU_lug": 23.6614, |
|
"eval_BLEU_mean": 17.5894, |
|
"eval_BLEU_nyn": 13.8801, |
|
"eval_BLEU_teo": 17.561, |
|
"eval_loss": 2.7422778606414795, |
|
"eval_runtime": 108.6905, |
|
"eval_samples_per_second": 23.001, |
|
"eval_steps_per_second": 1.444, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_BLEU_ach": 16.0125, |
|
"eval_BLEU_lgg": 16.7273, |
|
"eval_BLEU_lug": 23.3442, |
|
"eval_BLEU_mean": 17.4938, |
|
"eval_BLEU_nyn": 13.8987, |
|
"eval_BLEU_teo": 17.4862, |
|
"eval_loss": 2.7281692028045654, |
|
"eval_runtime": 110.7001, |
|
"eval_samples_per_second": 22.584, |
|
"eval_steps_per_second": 1.418, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_BLEU_ach": 15.9115, |
|
"eval_BLEU_lgg": 16.5585, |
|
"eval_BLEU_lug": 24.5961, |
|
"eval_BLEU_mean": 18.0068, |
|
"eval_BLEU_nyn": 14.7126, |
|
"eval_BLEU_teo": 18.2553, |
|
"eval_loss": 2.7274107933044434, |
|
"eval_runtime": 108.8652, |
|
"eval_samples_per_second": 22.964, |
|
"eval_steps_per_second": 1.442, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_BLEU_ach": 16.5652, |
|
"eval_BLEU_lgg": 16.9672, |
|
"eval_BLEU_lug": 23.9997, |
|
"eval_BLEU_mean": 18.1261, |
|
"eval_BLEU_nyn": 14.5361, |
|
"eval_BLEU_teo": 18.5623, |
|
"eval_loss": 2.7145676612854004, |
|
"eval_runtime": 111.5209, |
|
"eval_samples_per_second": 22.417, |
|
"eval_steps_per_second": 1.408, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_BLEU_ach": 16.9721, |
|
"eval_BLEU_lgg": 17.0032, |
|
"eval_BLEU_lug": 24.5043, |
|
"eval_BLEU_mean": 18.3546, |
|
"eval_BLEU_nyn": 14.2281, |
|
"eval_BLEU_teo": 19.0653, |
|
"eval_loss": 2.7048418521881104, |
|
"eval_runtime": 110.0323, |
|
"eval_samples_per_second": 22.721, |
|
"eval_steps_per_second": 1.427, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_BLEU_ach": 16.1836, |
|
"eval_BLEU_lgg": 17.1272, |
|
"eval_BLEU_lug": 24.5073, |
|
"eval_BLEU_mean": 18.0436, |
|
"eval_BLEU_nyn": 14.385, |
|
"eval_BLEU_teo": 18.0147, |
|
"eval_loss": 2.6982274055480957, |
|
"eval_runtime": 109.4462, |
|
"eval_samples_per_second": 22.842, |
|
"eval_steps_per_second": 1.434, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_BLEU_ach": 16.1255, |
|
"eval_BLEU_lgg": 17.1414, |
|
"eval_BLEU_lug": 25.317, |
|
"eval_BLEU_mean": 18.2627, |
|
"eval_BLEU_nyn": 14.5828, |
|
"eval_BLEU_teo": 18.1469, |
|
"eval_loss": 2.689755916595459, |
|
"eval_runtime": 119.3336, |
|
"eval_samples_per_second": 20.95, |
|
"eval_steps_per_second": 1.316, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.000250996015936255, |
|
"loss": 2.9449, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_BLEU_ach": 15.5985, |
|
"eval_BLEU_lgg": 17.9639, |
|
"eval_BLEU_lug": 24.749, |
|
"eval_BLEU_mean": 18.3452, |
|
"eval_BLEU_nyn": 13.8982, |
|
"eval_BLEU_teo": 19.5164, |
|
"eval_loss": 2.6801609992980957, |
|
"eval_runtime": 119.8601, |
|
"eval_samples_per_second": 20.858, |
|
"eval_steps_per_second": 1.31, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_BLEU_ach": 16.5254, |
|
"eval_BLEU_lgg": 18.2852, |
|
"eval_BLEU_lug": 24.2641, |
|
"eval_BLEU_mean": 18.5989, |
|
"eval_BLEU_nyn": 14.5069, |
|
"eval_BLEU_teo": 19.4127, |
|
"eval_loss": 2.681849956512451, |
|
"eval_runtime": 117.9942, |
|
"eval_samples_per_second": 21.187, |
|
"eval_steps_per_second": 1.331, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_BLEU_ach": 16.299, |
|
"eval_BLEU_lgg": 17.7158, |
|
"eval_BLEU_lug": 24.1465, |
|
"eval_BLEU_mean": 18.3291, |
|
"eval_BLEU_nyn": 13.8427, |
|
"eval_BLEU_teo": 19.6415, |
|
"eval_loss": 2.6819944381713867, |
|
"eval_runtime": 117.8806, |
|
"eval_samples_per_second": 21.208, |
|
"eval_steps_per_second": 1.332, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_BLEU_ach": 16.7809, |
|
"eval_BLEU_lgg": 17.4699, |
|
"eval_BLEU_lug": 25.1645, |
|
"eval_BLEU_mean": 18.6296, |
|
"eval_BLEU_nyn": 14.751, |
|
"eval_BLEU_teo": 18.9815, |
|
"eval_loss": 2.676717758178711, |
|
"eval_runtime": 121.9425, |
|
"eval_samples_per_second": 20.501, |
|
"eval_steps_per_second": 1.287, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_BLEU_ach": 16.32, |
|
"eval_BLEU_lgg": 17.7798, |
|
"eval_BLEU_lug": 25.1038, |
|
"eval_BLEU_mean": 18.7062, |
|
"eval_BLEU_nyn": 14.4702, |
|
"eval_BLEU_teo": 19.8571, |
|
"eval_loss": 2.673452854156494, |
|
"eval_runtime": 124.1156, |
|
"eval_samples_per_second": 20.143, |
|
"eval_steps_per_second": 1.265, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_BLEU_ach": 16.4085, |
|
"eval_BLEU_lgg": 17.313, |
|
"eval_BLEU_lug": 24.7636, |
|
"eval_BLEU_mean": 18.6532, |
|
"eval_BLEU_nyn": 14.7807, |
|
"eval_BLEU_teo": 20.0003, |
|
"eval_loss": 2.6720378398895264, |
|
"eval_runtime": 129.4859, |
|
"eval_samples_per_second": 19.307, |
|
"eval_steps_per_second": 1.212, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_BLEU_ach": 17.9293, |
|
"eval_BLEU_lgg": 18.0515, |
|
"eval_BLEU_lug": 25.257, |
|
"eval_BLEU_mean": 19.2304, |
|
"eval_BLEU_nyn": 15.0062, |
|
"eval_BLEU_teo": 19.9081, |
|
"eval_loss": 2.6651768684387207, |
|
"eval_runtime": 123.349, |
|
"eval_samples_per_second": 20.268, |
|
"eval_steps_per_second": 1.273, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_BLEU_ach": 17.6407, |
|
"eval_BLEU_lgg": 18.0205, |
|
"eval_BLEU_lug": 24.7637, |
|
"eval_BLEU_mean": 19.1218, |
|
"eval_BLEU_nyn": 14.5182, |
|
"eval_BLEU_teo": 20.6661, |
|
"eval_loss": 2.6660828590393066, |
|
"eval_runtime": 128.8489, |
|
"eval_samples_per_second": 19.403, |
|
"eval_steps_per_second": 1.218, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_BLEU_ach": 17.3567, |
|
"eval_BLEU_lgg": 18.3187, |
|
"eval_BLEU_lug": 24.6553, |
|
"eval_BLEU_mean": 18.7991, |
|
"eval_BLEU_nyn": 14.4547, |
|
"eval_BLEU_teo": 19.2101, |
|
"eval_loss": 2.658956527709961, |
|
"eval_runtime": 126.8237, |
|
"eval_samples_per_second": 19.712, |
|
"eval_steps_per_second": 1.238, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_BLEU_ach": 17.3235, |
|
"eval_BLEU_lgg": 18.6854, |
|
"eval_BLEU_lug": 25.0742, |
|
"eval_BLEU_mean": 19.1268, |
|
"eval_BLEU_nyn": 14.8488, |
|
"eval_BLEU_teo": 19.7021, |
|
"eval_loss": 2.6642343997955322, |
|
"eval_runtime": 118.3036, |
|
"eval_samples_per_second": 21.132, |
|
"eval_steps_per_second": 1.327, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_BLEU_ach": 17.2203, |
|
"eval_BLEU_lgg": 18.0574, |
|
"eval_BLEU_lug": 25.487, |
|
"eval_BLEU_mean": 19.0773, |
|
"eval_BLEU_nyn": 14.7458, |
|
"eval_BLEU_teo": 19.8762, |
|
"eval_loss": 2.654205799102783, |
|
"eval_runtime": 124.2827, |
|
"eval_samples_per_second": 20.115, |
|
"eval_steps_per_second": 1.263, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"eval_BLEU_ach": 17.2162, |
|
"eval_BLEU_lgg": 18.6608, |
|
"eval_BLEU_lug": 25.0911, |
|
"eval_BLEU_mean": 19.2459, |
|
"eval_BLEU_nyn": 14.8809, |
|
"eval_BLEU_teo": 20.3803, |
|
"eval_loss": 2.6576144695281982, |
|
"eval_runtime": 120.8965, |
|
"eval_samples_per_second": 20.679, |
|
"eval_steps_per_second": 1.299, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"eval_BLEU_ach": 17.2713, |
|
"eval_BLEU_lgg": 18.5656, |
|
"eval_BLEU_lug": 25.0043, |
|
"eval_BLEU_mean": 19.155, |
|
"eval_BLEU_nyn": 14.6028, |
|
"eval_BLEU_teo": 20.3311, |
|
"eval_loss": 2.661299705505371, |
|
"eval_runtime": 127.8339, |
|
"eval_samples_per_second": 19.557, |
|
"eval_steps_per_second": 1.228, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"eval_BLEU_ach": 17.7839, |
|
"eval_BLEU_lgg": 18.5926, |
|
"eval_BLEU_lug": 25.1088, |
|
"eval_BLEU_mean": 19.183, |
|
"eval_BLEU_nyn": 14.4439, |
|
"eval_BLEU_teo": 19.9857, |
|
"eval_loss": 2.6544294357299805, |
|
"eval_runtime": 128.4239, |
|
"eval_samples_per_second": 19.467, |
|
"eval_steps_per_second": 1.223, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"eval_BLEU_ach": 17.2002, |
|
"eval_BLEU_lgg": 18.8921, |
|
"eval_BLEU_lug": 25.2989, |
|
"eval_BLEU_mean": 19.3189, |
|
"eval_BLEU_nyn": 15.0877, |
|
"eval_BLEU_teo": 20.1156, |
|
"eval_loss": 2.6571733951568604, |
|
"eval_runtime": 127.0177, |
|
"eval_samples_per_second": 19.682, |
|
"eval_steps_per_second": 1.236, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"eval_BLEU_ach": 17.4507, |
|
"eval_BLEU_lgg": 19.0401, |
|
"eval_BLEU_lug": 25.3251, |
|
"eval_BLEU_mean": 19.2048, |
|
"eval_BLEU_nyn": 14.5218, |
|
"eval_BLEU_teo": 19.6862, |
|
"eval_loss": 2.660137414932251, |
|
"eval_runtime": 123.8715, |
|
"eval_samples_per_second": 20.182, |
|
"eval_steps_per_second": 1.267, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"eval_BLEU_ach": 17.274, |
|
"eval_BLEU_lgg": 18.9203, |
|
"eval_BLEU_lug": 25.7546, |
|
"eval_BLEU_mean": 19.4384, |
|
"eval_BLEU_nyn": 14.8943, |
|
"eval_BLEU_teo": 20.349, |
|
"eval_loss": 2.6526899337768555, |
|
"eval_runtime": 128.7667, |
|
"eval_samples_per_second": 19.415, |
|
"eval_steps_per_second": 1.219, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"eval_BLEU_ach": 17.0936, |
|
"eval_BLEU_lgg": 18.8419, |
|
"eval_BLEU_lug": 25.4369, |
|
"eval_BLEU_mean": 19.3561, |
|
"eval_BLEU_nyn": 14.8679, |
|
"eval_BLEU_teo": 20.5401, |
|
"eval_loss": 2.653111219406128, |
|
"eval_runtime": 125.2153, |
|
"eval_samples_per_second": 19.966, |
|
"eval_steps_per_second": 1.254, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_BLEU_ach": 17.6214, |
|
"eval_BLEU_lgg": 18.6578, |
|
"eval_BLEU_lug": 25.5293, |
|
"eval_BLEU_mean": 19.5211, |
|
"eval_BLEU_nyn": 14.9188, |
|
"eval_BLEU_teo": 20.8783, |
|
"eval_loss": 2.6521167755126953, |
|
"eval_runtime": 121.3634, |
|
"eval_samples_per_second": 20.599, |
|
"eval_steps_per_second": 1.294, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"eval_BLEU_ach": 17.1886, |
|
"eval_BLEU_lgg": 18.6225, |
|
"eval_BLEU_lug": 25.2939, |
|
"eval_BLEU_mean": 19.3689, |
|
"eval_BLEU_nyn": 15.3009, |
|
"eval_BLEU_teo": 20.4388, |
|
"eval_loss": 2.6507720947265625, |
|
"eval_runtime": 121.9609, |
|
"eval_samples_per_second": 20.498, |
|
"eval_steps_per_second": 1.287, |
|
"step": 975 |
|
} |
|
], |
|
"max_steps": 1004, |
|
"num_train_epochs": 4, |
|
"total_flos": 1.0817461893464064e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|