{ "best_metric": 2.6507720947265625, "best_model_checkpoint": "output-en-mul/checkpoint-975", "epoch": 3.882320064584239, "global_step": 975, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "eval_BLEU_ach": 1.8026, "eval_BLEU_lgg": 2.4121, "eval_BLEU_lug": 14.4386, "eval_BLEU_mean": 4.5453, "eval_BLEU_nyn": 3.307, "eval_BLEU_teo": 0.7663, "eval_loss": 4.095067024230957, "eval_runtime": 117.7968, "eval_samples_per_second": 21.223, "eval_steps_per_second": 1.333, "step": 25 }, { "epoch": 0.2, "eval_BLEU_ach": 6.6301, "eval_BLEU_lgg": 3.6686, "eval_BLEU_lug": 17.863, "eval_BLEU_mean": 7.7115, "eval_BLEU_nyn": 7.0341, "eval_BLEU_teo": 3.3617, "eval_loss": 3.4971461296081543, "eval_runtime": 126.0103, "eval_samples_per_second": 19.84, "eval_steps_per_second": 1.246, "step": 50 }, { "epoch": 0.3, "eval_BLEU_ach": 9.2766, "eval_BLEU_lgg": 5.9959, "eval_BLEU_lug": 18.5448, "eval_BLEU_mean": 10.2227, "eval_BLEU_nyn": 9.4904, "eval_BLEU_teo": 7.8061, "eval_loss": 3.2395877838134766, "eval_runtime": 126.2158, "eval_samples_per_second": 19.807, "eval_steps_per_second": 1.244, "step": 75 }, { "epoch": 0.4, "eval_BLEU_ach": 11.7826, "eval_BLEU_lgg": 9.205, "eval_BLEU_lug": 20.2742, "eval_BLEU_mean": 12.501, "eval_BLEU_nyn": 10.2936, "eval_BLEU_teo": 10.9496, "eval_loss": 3.0855562686920166, "eval_runtime": 115.1293, "eval_samples_per_second": 21.715, "eval_steps_per_second": 1.364, "step": 100 }, { "epoch": 0.5, "eval_BLEU_ach": 12.2884, "eval_BLEU_lgg": 10.8685, "eval_BLEU_lug": 20.0989, "eval_BLEU_mean": 13.331, "eval_BLEU_nyn": 10.8764, "eval_BLEU_teo": 12.5227, "eval_loss": 2.9902637004852295, "eval_runtime": 112.0904, "eval_samples_per_second": 22.303, "eval_steps_per_second": 1.401, "step": 125 }, { "epoch": 0.6, "eval_BLEU_ach": 12.9297, "eval_BLEU_lgg": 12.0138, "eval_BLEU_lug": 21.6123, "eval_BLEU_mean": 14.4131, "eval_BLEU_nyn": 11.2431, "eval_BLEU_teo": 14.2664, "eval_loss": 2.9314682483673096, "eval_runtime": 114.1492, "eval_samples_per_second": 21.901, "eval_steps_per_second": 1.375, "step": 150 }, { "epoch": 0.7, "eval_BLEU_ach": 13.8035, "eval_BLEU_lgg": 13.3415, "eval_BLEU_lug": 21.8656, "eval_BLEU_mean": 15.1199, "eval_BLEU_nyn": 11.9199, "eval_BLEU_teo": 14.669, "eval_loss": 2.8768787384033203, "eval_runtime": 110.271, "eval_samples_per_second": 22.671, "eval_steps_per_second": 1.424, "step": 175 }, { "epoch": 0.79, "eval_BLEU_ach": 13.8195, "eval_BLEU_lgg": 13.478, "eval_BLEU_lug": 21.4511, "eval_BLEU_mean": 15.3648, "eval_BLEU_nyn": 12.8941, "eval_BLEU_teo": 15.1812, "eval_loss": 2.8409996032714844, "eval_runtime": 114.4231, "eval_samples_per_second": 21.849, "eval_steps_per_second": 1.372, "step": 200 }, { "epoch": 0.89, "eval_BLEU_ach": 14.981, "eval_BLEU_lgg": 14.1773, "eval_BLEU_lug": 22.9603, "eval_BLEU_mean": 16.2302, "eval_BLEU_nyn": 12.8695, "eval_BLEU_teo": 16.1631, "eval_loss": 2.815749168395996, "eval_runtime": 112.6187, "eval_samples_per_second": 22.199, "eval_steps_per_second": 1.394, "step": 225 }, { "epoch": 0.99, "eval_BLEU_ach": 16.184, "eval_BLEU_lgg": 15.1618, "eval_BLEU_lug": 22.062, "eval_BLEU_mean": 16.6458, "eval_BLEU_nyn": 13.7415, "eval_BLEU_teo": 16.0795, "eval_loss": 2.785487174987793, "eval_runtime": 111.0998, "eval_samples_per_second": 22.502, "eval_steps_per_second": 1.413, "step": 250 }, { "epoch": 1.1, "eval_BLEU_ach": 16.0982, "eval_BLEU_lgg": 15.3667, "eval_BLEU_lug": 23.0175, "eval_BLEU_mean": 17.0507, "eval_BLEU_nyn": 13.6528, "eval_BLEU_teo": 17.1184, "eval_loss": 2.7754287719726562, "eval_runtime": 109.9249, "eval_samples_per_second": 22.743, "eval_steps_per_second": 1.428, "step": 275 }, { "epoch": 1.19, "eval_BLEU_ach": 15.5565, "eval_BLEU_lgg": 15.718, "eval_BLEU_lug": 23.1352, "eval_BLEU_mean": 16.9913, "eval_BLEU_nyn": 13.8518, "eval_BLEU_teo": 16.695, "eval_loss": 2.7620601654052734, "eval_runtime": 110.9898, "eval_samples_per_second": 22.525, "eval_steps_per_second": 1.415, "step": 300 }, { "epoch": 1.29, "eval_BLEU_ach": 16.1387, "eval_BLEU_lgg": 16.7057, "eval_BLEU_lug": 23.6614, "eval_BLEU_mean": 17.5894, "eval_BLEU_nyn": 13.8801, "eval_BLEU_teo": 17.561, "eval_loss": 2.7422778606414795, "eval_runtime": 108.6905, "eval_samples_per_second": 23.001, "eval_steps_per_second": 1.444, "step": 325 }, { "epoch": 1.39, "eval_BLEU_ach": 16.0125, "eval_BLEU_lgg": 16.7273, "eval_BLEU_lug": 23.3442, "eval_BLEU_mean": 17.4938, "eval_BLEU_nyn": 13.8987, "eval_BLEU_teo": 17.4862, "eval_loss": 2.7281692028045654, "eval_runtime": 110.7001, "eval_samples_per_second": 22.584, "eval_steps_per_second": 1.418, "step": 350 }, { "epoch": 1.49, "eval_BLEU_ach": 15.9115, "eval_BLEU_lgg": 16.5585, "eval_BLEU_lug": 24.5961, "eval_BLEU_mean": 18.0068, "eval_BLEU_nyn": 14.7126, "eval_BLEU_teo": 18.2553, "eval_loss": 2.7274107933044434, "eval_runtime": 108.8652, "eval_samples_per_second": 22.964, "eval_steps_per_second": 1.442, "step": 375 }, { "epoch": 1.59, "eval_BLEU_ach": 16.5652, "eval_BLEU_lgg": 16.9672, "eval_BLEU_lug": 23.9997, "eval_BLEU_mean": 18.1261, "eval_BLEU_nyn": 14.5361, "eval_BLEU_teo": 18.5623, "eval_loss": 2.7145676612854004, "eval_runtime": 111.5209, "eval_samples_per_second": 22.417, "eval_steps_per_second": 1.408, "step": 400 }, { "epoch": 1.69, "eval_BLEU_ach": 16.9721, "eval_BLEU_lgg": 17.0032, "eval_BLEU_lug": 24.5043, "eval_BLEU_mean": 18.3546, "eval_BLEU_nyn": 14.2281, "eval_BLEU_teo": 19.0653, "eval_loss": 2.7048418521881104, "eval_runtime": 110.0323, "eval_samples_per_second": 22.721, "eval_steps_per_second": 1.427, "step": 425 }, { "epoch": 1.79, "eval_BLEU_ach": 16.1836, "eval_BLEU_lgg": 17.1272, "eval_BLEU_lug": 24.5073, "eval_BLEU_mean": 18.0436, "eval_BLEU_nyn": 14.385, "eval_BLEU_teo": 18.0147, "eval_loss": 2.6982274055480957, "eval_runtime": 109.4462, "eval_samples_per_second": 22.842, "eval_steps_per_second": 1.434, "step": 450 }, { "epoch": 1.89, "eval_BLEU_ach": 16.1255, "eval_BLEU_lgg": 17.1414, "eval_BLEU_lug": 25.317, "eval_BLEU_mean": 18.2627, "eval_BLEU_nyn": 14.5828, "eval_BLEU_teo": 18.1469, "eval_loss": 2.689755916595459, "eval_runtime": 119.3336, "eval_samples_per_second": 20.95, "eval_steps_per_second": 1.316, "step": 475 }, { "epoch": 1.99, "learning_rate": 0.000250996015936255, "loss": 2.9449, "step": 500 }, { "epoch": 1.99, "eval_BLEU_ach": 15.5985, "eval_BLEU_lgg": 17.9639, "eval_BLEU_lug": 24.749, "eval_BLEU_mean": 18.3452, "eval_BLEU_nyn": 13.8982, "eval_BLEU_teo": 19.5164, "eval_loss": 2.6801609992980957, "eval_runtime": 119.8601, "eval_samples_per_second": 20.858, "eval_steps_per_second": 1.31, "step": 500 }, { "epoch": 2.09, "eval_BLEU_ach": 16.5254, "eval_BLEU_lgg": 18.2852, "eval_BLEU_lug": 24.2641, "eval_BLEU_mean": 18.5989, "eval_BLEU_nyn": 14.5069, "eval_BLEU_teo": 19.4127, "eval_loss": 2.681849956512451, "eval_runtime": 117.9942, "eval_samples_per_second": 21.187, "eval_steps_per_second": 1.331, "step": 525 }, { "epoch": 2.19, "eval_BLEU_ach": 16.299, "eval_BLEU_lgg": 17.7158, "eval_BLEU_lug": 24.1465, "eval_BLEU_mean": 18.3291, "eval_BLEU_nyn": 13.8427, "eval_BLEU_teo": 19.6415, "eval_loss": 2.6819944381713867, "eval_runtime": 117.8806, "eval_samples_per_second": 21.208, "eval_steps_per_second": 1.332, "step": 550 }, { "epoch": 2.29, "eval_BLEU_ach": 16.7809, "eval_BLEU_lgg": 17.4699, "eval_BLEU_lug": 25.1645, "eval_BLEU_mean": 18.6296, "eval_BLEU_nyn": 14.751, "eval_BLEU_teo": 18.9815, "eval_loss": 2.676717758178711, "eval_runtime": 121.9425, "eval_samples_per_second": 20.501, "eval_steps_per_second": 1.287, "step": 575 }, { "epoch": 2.39, "eval_BLEU_ach": 16.32, "eval_BLEU_lgg": 17.7798, "eval_BLEU_lug": 25.1038, "eval_BLEU_mean": 18.7062, "eval_BLEU_nyn": 14.4702, "eval_BLEU_teo": 19.8571, "eval_loss": 2.673452854156494, "eval_runtime": 124.1156, "eval_samples_per_second": 20.143, "eval_steps_per_second": 1.265, "step": 600 }, { "epoch": 2.49, "eval_BLEU_ach": 16.4085, "eval_BLEU_lgg": 17.313, "eval_BLEU_lug": 24.7636, "eval_BLEU_mean": 18.6532, "eval_BLEU_nyn": 14.7807, "eval_BLEU_teo": 20.0003, "eval_loss": 2.6720378398895264, "eval_runtime": 129.4859, "eval_samples_per_second": 19.307, "eval_steps_per_second": 1.212, "step": 625 }, { "epoch": 2.59, "eval_BLEU_ach": 17.9293, "eval_BLEU_lgg": 18.0515, "eval_BLEU_lug": 25.257, "eval_BLEU_mean": 19.2304, "eval_BLEU_nyn": 15.0062, "eval_BLEU_teo": 19.9081, "eval_loss": 2.6651768684387207, "eval_runtime": 123.349, "eval_samples_per_second": 20.268, "eval_steps_per_second": 1.273, "step": 650 }, { "epoch": 2.69, "eval_BLEU_ach": 17.6407, "eval_BLEU_lgg": 18.0205, "eval_BLEU_lug": 24.7637, "eval_BLEU_mean": 19.1218, "eval_BLEU_nyn": 14.5182, "eval_BLEU_teo": 20.6661, "eval_loss": 2.6660828590393066, "eval_runtime": 128.8489, "eval_samples_per_second": 19.403, "eval_steps_per_second": 1.218, "step": 675 }, { "epoch": 2.79, "eval_BLEU_ach": 17.3567, "eval_BLEU_lgg": 18.3187, "eval_BLEU_lug": 24.6553, "eval_BLEU_mean": 18.7991, "eval_BLEU_nyn": 14.4547, "eval_BLEU_teo": 19.2101, "eval_loss": 2.658956527709961, "eval_runtime": 126.8237, "eval_samples_per_second": 19.712, "eval_steps_per_second": 1.238, "step": 700 }, { "epoch": 2.89, "eval_BLEU_ach": 17.3235, "eval_BLEU_lgg": 18.6854, "eval_BLEU_lug": 25.0742, "eval_BLEU_mean": 19.1268, "eval_BLEU_nyn": 14.8488, "eval_BLEU_teo": 19.7021, "eval_loss": 2.6642343997955322, "eval_runtime": 118.3036, "eval_samples_per_second": 21.132, "eval_steps_per_second": 1.327, "step": 725 }, { "epoch": 2.99, "eval_BLEU_ach": 17.2203, "eval_BLEU_lgg": 18.0574, "eval_BLEU_lug": 25.487, "eval_BLEU_mean": 19.0773, "eval_BLEU_nyn": 14.7458, "eval_BLEU_teo": 19.8762, "eval_loss": 2.654205799102783, "eval_runtime": 124.2827, "eval_samples_per_second": 20.115, "eval_steps_per_second": 1.263, "step": 750 }, { "epoch": 3.09, "eval_BLEU_ach": 17.2162, "eval_BLEU_lgg": 18.6608, "eval_BLEU_lug": 25.0911, "eval_BLEU_mean": 19.2459, "eval_BLEU_nyn": 14.8809, "eval_BLEU_teo": 20.3803, "eval_loss": 2.6576144695281982, "eval_runtime": 120.8965, "eval_samples_per_second": 20.679, "eval_steps_per_second": 1.299, "step": 775 }, { "epoch": 3.19, "eval_BLEU_ach": 17.2713, "eval_BLEU_lgg": 18.5656, "eval_BLEU_lug": 25.0043, "eval_BLEU_mean": 19.155, "eval_BLEU_nyn": 14.6028, "eval_BLEU_teo": 20.3311, "eval_loss": 2.661299705505371, "eval_runtime": 127.8339, "eval_samples_per_second": 19.557, "eval_steps_per_second": 1.228, "step": 800 }, { "epoch": 3.29, "eval_BLEU_ach": 17.7839, "eval_BLEU_lgg": 18.5926, "eval_BLEU_lug": 25.1088, "eval_BLEU_mean": 19.183, "eval_BLEU_nyn": 14.4439, "eval_BLEU_teo": 19.9857, "eval_loss": 2.6544294357299805, "eval_runtime": 128.4239, "eval_samples_per_second": 19.467, "eval_steps_per_second": 1.223, "step": 825 }, { "epoch": 3.39, "eval_BLEU_ach": 17.2002, "eval_BLEU_lgg": 18.8921, "eval_BLEU_lug": 25.2989, "eval_BLEU_mean": 19.3189, "eval_BLEU_nyn": 15.0877, "eval_BLEU_teo": 20.1156, "eval_loss": 2.6571733951568604, "eval_runtime": 127.0177, "eval_samples_per_second": 19.682, "eval_steps_per_second": 1.236, "step": 850 }, { "epoch": 3.48, "eval_BLEU_ach": 17.4507, "eval_BLEU_lgg": 19.0401, "eval_BLEU_lug": 25.3251, "eval_BLEU_mean": 19.2048, "eval_BLEU_nyn": 14.5218, "eval_BLEU_teo": 19.6862, "eval_loss": 2.660137414932251, "eval_runtime": 123.8715, "eval_samples_per_second": 20.182, "eval_steps_per_second": 1.267, "step": 875 }, { "epoch": 3.58, "eval_BLEU_ach": 17.274, "eval_BLEU_lgg": 18.9203, "eval_BLEU_lug": 25.7546, "eval_BLEU_mean": 19.4384, "eval_BLEU_nyn": 14.8943, "eval_BLEU_teo": 20.349, "eval_loss": 2.6526899337768555, "eval_runtime": 128.7667, "eval_samples_per_second": 19.415, "eval_steps_per_second": 1.219, "step": 900 }, { "epoch": 3.68, "eval_BLEU_ach": 17.0936, "eval_BLEU_lgg": 18.8419, "eval_BLEU_lug": 25.4369, "eval_BLEU_mean": 19.3561, "eval_BLEU_nyn": 14.8679, "eval_BLEU_teo": 20.5401, "eval_loss": 2.653111219406128, "eval_runtime": 125.2153, "eval_samples_per_second": 19.966, "eval_steps_per_second": 1.254, "step": 925 }, { "epoch": 3.78, "eval_BLEU_ach": 17.6214, "eval_BLEU_lgg": 18.6578, "eval_BLEU_lug": 25.5293, "eval_BLEU_mean": 19.5211, "eval_BLEU_nyn": 14.9188, "eval_BLEU_teo": 20.8783, "eval_loss": 2.6521167755126953, "eval_runtime": 121.3634, "eval_samples_per_second": 20.599, "eval_steps_per_second": 1.294, "step": 950 }, { "epoch": 3.88, "eval_BLEU_ach": 17.1886, "eval_BLEU_lgg": 18.6225, "eval_BLEU_lug": 25.2939, "eval_BLEU_mean": 19.3689, "eval_BLEU_nyn": 15.3009, "eval_BLEU_teo": 20.4388, "eval_loss": 2.6507720947265625, "eval_runtime": 121.9609, "eval_samples_per_second": 20.498, "eval_steps_per_second": 1.287, "step": 975 } ], "max_steps": 1004, "num_train_epochs": 4, "total_flos": 1.0817461893464064e+17, "trial_name": null, "trial_params": null }