{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9983917797277898, "global_step": 92500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.9730164383857356e-05, "loss": 2.1298, "step": 500 }, { "epoch": 0.01, "eval_bleu": 6.9284, "eval_gen_len": 23.9635, "eval_loss": 4.2180352210998535, "eval_meteor": 0.2087, "eval_runtime": 55.8494, "eval_samples_per_second": 9.311, "eval_steps_per_second": 1.164, "step": 500 }, { "epoch": 0.01, "learning_rate": 4.946032876771471e-05, "loss": 2.0441, "step": 1000 }, { "epoch": 0.01, "eval_bleu": 6.89, "eval_gen_len": 35.1442, "eval_loss": 4.191596508026123, "eval_meteor": 0.2133, "eval_runtime": 113.3213, "eval_samples_per_second": 4.589, "eval_steps_per_second": 0.574, "step": 1000 }, { "epoch": 0.02, "learning_rate": 4.9190493151572064e-05, "loss": 2.0547, "step": 1500 }, { "epoch": 0.02, "eval_bleu": 6.5842, "eval_gen_len": 26.2904, "eval_loss": 4.191189289093018, "eval_meteor": 0.214, "eval_runtime": 68.7752, "eval_samples_per_second": 7.561, "eval_steps_per_second": 0.945, "step": 1500 }, { "epoch": 0.02, "learning_rate": 4.892065753542942e-05, "loss": 1.9988, "step": 2000 }, { "epoch": 0.02, "eval_bleu": 6.3595, "eval_gen_len": 43.4173, "eval_loss": 4.144649982452393, "eval_meteor": 0.206, "eval_runtime": 168.802, "eval_samples_per_second": 3.081, "eval_steps_per_second": 0.385, "step": 2000 }, { "epoch": 0.03, "learning_rate": 4.865082191928677e-05, "loss": 1.9714, "step": 2500 }, { "epoch": 0.03, "eval_bleu": 6.0327, "eval_gen_len": 35.4712, "eval_loss": 4.147974491119385, "eval_meteor": 0.2087, "eval_runtime": 124.6806, "eval_samples_per_second": 4.171, "eval_steps_per_second": 0.521, "step": 2500 }, { "epoch": 0.03, "learning_rate": 4.8380986303144125e-05, "loss": 2.0021, "step": 3000 }, { "epoch": 0.03, "eval_bleu": 6.626, "eval_gen_len": 28.4385, "eval_loss": 4.138648509979248, "eval_meteor": 0.2004, "eval_runtime": 79.9371, "eval_samples_per_second": 6.505, "eval_steps_per_second": 0.813, "step": 3000 }, { "epoch": 0.04, "learning_rate": 4.8111150687001485e-05, "loss": 1.9541, "step": 3500 }, { "epoch": 0.04, "eval_bleu": 7.3794, "eval_gen_len": 32.2192, "eval_loss": 4.102533340454102, "eval_meteor": 0.2092, "eval_runtime": 104.2846, "eval_samples_per_second": 4.986, "eval_steps_per_second": 0.623, "step": 3500 }, { "epoch": 0.04, "learning_rate": 4.784131507085883e-05, "loss": 1.9843, "step": 4000 }, { "epoch": 0.04, "eval_bleu": 6.8478, "eval_gen_len": 28.3692, "eval_loss": 4.0901265144348145, "eval_meteor": 0.2026, "eval_runtime": 84.5232, "eval_samples_per_second": 6.152, "eval_steps_per_second": 0.769, "step": 4000 }, { "epoch": 0.05, "learning_rate": 4.7571479454716186e-05, "loss": 2.0089, "step": 4500 }, { "epoch": 0.05, "eval_bleu": 7.3969, "eval_gen_len": 32.6115, "eval_loss": 4.115621089935303, "eval_meteor": 0.2061, "eval_runtime": 108.942, "eval_samples_per_second": 4.773, "eval_steps_per_second": 0.597, "step": 4500 }, { "epoch": 0.05, "learning_rate": 4.730164383857355e-05, "loss": 1.8734, "step": 5000 }, { "epoch": 0.05, "eval_bleu": 6.4957, "eval_gen_len": 53.5423, "eval_loss": 4.048145771026611, "eval_meteor": 0.2047, "eval_runtime": 248.2193, "eval_samples_per_second": 2.095, "eval_steps_per_second": 0.262, "step": 5000 }, { "epoch": 0.06, "learning_rate": 4.7031808222430894e-05, "loss": 1.959, "step": 5500 }, { "epoch": 0.06, "eval_bleu": 6.7135, "eval_gen_len": 28.0327, "eval_loss": 4.0486321449279785, "eval_meteor": 0.1982, "eval_runtime": 78.1961, "eval_samples_per_second": 6.65, "eval_steps_per_second": 0.831, "step": 5500 }, { "epoch": 0.06, "learning_rate": 4.6761972606288254e-05, "loss": 1.9442, "step": 6000 }, { "epoch": 0.06, "eval_bleu": 7.8174, "eval_gen_len": 30.275, "eval_loss": 4.008225440979004, "eval_meteor": 0.2121, "eval_runtime": 88.4227, "eval_samples_per_second": 5.881, "eval_steps_per_second": 0.735, "step": 6000 }, { "epoch": 0.07, "learning_rate": 4.649213699014561e-05, "loss": 1.8855, "step": 6500 }, { "epoch": 0.07, "eval_bleu": 7.0941, "eval_gen_len": 30.8077, "eval_loss": 4.006565570831299, "eval_meteor": 0.2117, "eval_runtime": 90.6701, "eval_samples_per_second": 5.735, "eval_steps_per_second": 0.717, "step": 6500 }, { "epoch": 0.08, "learning_rate": 4.6222301374002955e-05, "loss": 1.873, "step": 7000 }, { "epoch": 0.08, "eval_bleu": 7.0348, "eval_gen_len": 34.7673, "eval_loss": 4.008674621582031, "eval_meteor": 0.2117, "eval_runtime": 140.0997, "eval_samples_per_second": 3.712, "eval_steps_per_second": 0.464, "step": 7000 }, { "epoch": 0.08, "learning_rate": 4.5952465757860316e-05, "loss": 1.9126, "step": 7500 }, { "epoch": 0.08, "eval_bleu": 7.2617, "eval_gen_len": 31.5173, "eval_loss": 3.955263614654541, "eval_meteor": 0.2188, "eval_runtime": 135.15, "eval_samples_per_second": 3.848, "eval_steps_per_second": 0.481, "step": 7500 }, { "epoch": 0.09, "learning_rate": 4.568263014171767e-05, "loss": 1.8181, "step": 8000 }, { "epoch": 0.09, "eval_bleu": 7.0348, "eval_gen_len": 29.8442, "eval_loss": 3.9835426807403564, "eval_meteor": 0.211, "eval_runtime": 112.2816, "eval_samples_per_second": 4.631, "eval_steps_per_second": 0.579, "step": 8000 }, { "epoch": 0.09, "learning_rate": 4.541279452557502e-05, "loss": 1.8553, "step": 8500 }, { "epoch": 0.09, "eval_bleu": 6.9695, "eval_gen_len": 31.0673, "eval_loss": 3.934365749359131, "eval_meteor": 0.2167, "eval_runtime": 126.5544, "eval_samples_per_second": 4.109, "eval_steps_per_second": 0.514, "step": 8500 }, { "epoch": 0.1, "learning_rate": 4.514295890943238e-05, "loss": 1.8856, "step": 9000 }, { "epoch": 0.1, "eval_bleu": 7.8161, "eval_gen_len": 25.7885, "eval_loss": 3.9472157955169678, "eval_meteor": 0.2202, "eval_runtime": 90.2192, "eval_samples_per_second": 5.764, "eval_steps_per_second": 0.72, "step": 9000 }, { "epoch": 0.1, "learning_rate": 4.487312329328973e-05, "loss": 1.8375, "step": 9500 }, { "epoch": 0.1, "eval_bleu": 7.3226, "eval_gen_len": 31.6615, "eval_loss": 3.9212045669555664, "eval_meteor": 0.2131, "eval_runtime": 122.1859, "eval_samples_per_second": 4.256, "eval_steps_per_second": 0.532, "step": 9500 }, { "epoch": 0.11, "learning_rate": 4.4603287677147084e-05, "loss": 1.8245, "step": 10000 }, { "epoch": 0.11, "eval_bleu": 7.5927, "eval_gen_len": 25.6827, "eval_loss": 3.9012672901153564, "eval_meteor": 0.2162, "eval_runtime": 87.2113, "eval_samples_per_second": 5.963, "eval_steps_per_second": 0.745, "step": 10000 }, { "epoch": 0.11, "learning_rate": 4.433345206100444e-05, "loss": 1.7563, "step": 10500 }, { "epoch": 0.11, "eval_bleu": 7.6493, "eval_gen_len": 24.7904, "eval_loss": 3.9165987968444824, "eval_meteor": 0.2225, "eval_runtime": 71.5438, "eval_samples_per_second": 7.268, "eval_steps_per_second": 0.909, "step": 10500 }, { "epoch": 0.12, "learning_rate": 4.406361644486179e-05, "loss": 1.7739, "step": 11000 }, { "epoch": 0.12, "eval_bleu": 7.0986, "eval_gen_len": 32.6269, "eval_loss": 3.892530679702759, "eval_meteor": 0.2162, "eval_runtime": 135.1518, "eval_samples_per_second": 3.848, "eval_steps_per_second": 0.481, "step": 11000 }, { "epoch": 0.12, "learning_rate": 4.3793780828719146e-05, "loss": 1.8389, "step": 11500 }, { "epoch": 0.12, "eval_bleu": 7.355, "eval_gen_len": 29.1173, "eval_loss": 3.891714572906494, "eval_meteor": 0.222, "eval_runtime": 103.9412, "eval_samples_per_second": 5.003, "eval_steps_per_second": 0.625, "step": 11500 }, { "epoch": 0.13, "learning_rate": 4.35239452125765e-05, "loss": 1.8359, "step": 12000 }, { "epoch": 0.13, "eval_bleu": 7.7276, "eval_gen_len": 26.5288, "eval_loss": 3.8721096515655518, "eval_meteor": 0.2224, "eval_runtime": 92.6122, "eval_samples_per_second": 5.615, "eval_steps_per_second": 0.702, "step": 12000 }, { "epoch": 0.13, "learning_rate": 4.325410959643385e-05, "loss": 1.774, "step": 12500 }, { "epoch": 0.13, "eval_bleu": 7.3227, "eval_gen_len": 25.8019, "eval_loss": 3.852199077606201, "eval_meteor": 0.2256, "eval_runtime": 80.7019, "eval_samples_per_second": 6.443, "eval_steps_per_second": 0.805, "step": 12500 }, { "epoch": 0.14, "learning_rate": 4.298427398029121e-05, "loss": 1.8502, "step": 13000 }, { "epoch": 0.14, "eval_bleu": 8.0288, "eval_gen_len": 25.7327, "eval_loss": 3.838671922683716, "eval_meteor": 0.2306, "eval_runtime": 81.2754, "eval_samples_per_second": 6.398, "eval_steps_per_second": 0.8, "step": 13000 }, { "epoch": 0.15, "learning_rate": 4.271443836414856e-05, "loss": 1.7748, "step": 13500 }, { "epoch": 0.15, "eval_bleu": 7.8972, "eval_gen_len": 25.8135, "eval_loss": 3.847898006439209, "eval_meteor": 0.2289, "eval_runtime": 77.3621, "eval_samples_per_second": 6.722, "eval_steps_per_second": 0.84, "step": 13500 }, { "epoch": 0.15, "learning_rate": 4.244460274800592e-05, "loss": 1.8212, "step": 14000 }, { "epoch": 0.15, "eval_bleu": 7.617, "eval_gen_len": 31.2462, "eval_loss": 3.831773281097412, "eval_meteor": 0.2249, "eval_runtime": 133.705, "eval_samples_per_second": 3.889, "eval_steps_per_second": 0.486, "step": 14000 }, { "epoch": 0.16, "learning_rate": 4.217476713186327e-05, "loss": 1.7943, "step": 14500 }, { "epoch": 0.16, "eval_bleu": 6.6778, "eval_gen_len": 27.7673, "eval_loss": 3.8318052291870117, "eval_meteor": 0.2197, "eval_runtime": 95.8818, "eval_samples_per_second": 5.423, "eval_steps_per_second": 0.678, "step": 14500 }, { "epoch": 0.16, "learning_rate": 4.190493151572062e-05, "loss": 1.7869, "step": 15000 }, { "epoch": 0.16, "eval_bleu": 7.5995, "eval_gen_len": 25.6712, "eval_loss": 3.793452262878418, "eval_meteor": 0.2261, "eval_runtime": 84.1529, "eval_samples_per_second": 6.179, "eval_steps_per_second": 0.772, "step": 15000 }, { "epoch": 0.17, "learning_rate": 4.163509589957798e-05, "loss": 1.7802, "step": 15500 }, { "epoch": 0.17, "eval_bleu": 6.9484, "eval_gen_len": 34.8865, "eval_loss": 3.788137674331665, "eval_meteor": 0.2214, "eval_runtime": 159.4169, "eval_samples_per_second": 3.262, "eval_steps_per_second": 0.408, "step": 15500 }, { "epoch": 0.17, "learning_rate": 4.136526028343533e-05, "loss": 1.6783, "step": 16000 }, { "epoch": 0.17, "eval_bleu": 7.1202, "eval_gen_len": 26.2692, "eval_loss": 3.7846884727478027, "eval_meteor": 0.2171, "eval_runtime": 88.5814, "eval_samples_per_second": 5.87, "eval_steps_per_second": 0.734, "step": 16000 }, { "epoch": 0.18, "learning_rate": 4.109542466729269e-05, "loss": 1.6773, "step": 16500 }, { "epoch": 0.18, "eval_bleu": 8.093, "eval_gen_len": 26.9346, "eval_loss": 3.772686243057251, "eval_meteor": 0.222, "eval_runtime": 96.6464, "eval_samples_per_second": 5.38, "eval_steps_per_second": 0.673, "step": 16500 }, { "epoch": 0.18, "learning_rate": 4.0825589051150044e-05, "loss": 1.6435, "step": 17000 }, { "epoch": 0.18, "eval_bleu": 7.5486, "eval_gen_len": 26.6231, "eval_loss": 3.785611152648926, "eval_meteor": 0.2136, "eval_runtime": 89.9499, "eval_samples_per_second": 5.781, "eval_steps_per_second": 0.723, "step": 17000 }, { "epoch": 0.19, "learning_rate": 4.055575343500739e-05, "loss": 1.6892, "step": 17500 }, { "epoch": 0.19, "eval_bleu": 7.2576, "eval_gen_len": 31.4712, "eval_loss": 3.778566837310791, "eval_meteor": 0.2088, "eval_runtime": 124.5211, "eval_samples_per_second": 4.176, "eval_steps_per_second": 0.522, "step": 17500 }, { "epoch": 0.19, "learning_rate": 4.028591781886475e-05, "loss": 1.7355, "step": 18000 }, { "epoch": 0.19, "eval_bleu": 7.8495, "eval_gen_len": 27.7846, "eval_loss": 3.739633798599243, "eval_meteor": 0.2249, "eval_runtime": 103.8056, "eval_samples_per_second": 5.009, "eval_steps_per_second": 0.626, "step": 18000 }, { "epoch": 0.2, "learning_rate": 4.0016082202722105e-05, "loss": 1.708, "step": 18500 }, { "epoch": 0.2, "eval_bleu": 8.0278, "eval_gen_len": 23.0538, "eval_loss": 3.7668354511260986, "eval_meteor": 0.2207, "eval_runtime": 70.0581, "eval_samples_per_second": 7.422, "eval_steps_per_second": 0.928, "step": 18500 }, { "epoch": 0.21, "learning_rate": 3.974624658657945e-05, "loss": 1.6596, "step": 19000 }, { "epoch": 0.21, "eval_bleu": 7.5685, "eval_gen_len": 28.1115, "eval_loss": 3.7208786010742188, "eval_meteor": 0.2287, "eval_runtime": 102.7576, "eval_samples_per_second": 5.06, "eval_steps_per_second": 0.633, "step": 19000 }, { "epoch": 0.21, "learning_rate": 3.947641097043681e-05, "loss": 1.7048, "step": 19500 }, { "epoch": 0.21, "eval_bleu": 8.1065, "eval_gen_len": 23.6308, "eval_loss": 3.7291922569274902, "eval_meteor": 0.2308, "eval_runtime": 60.5983, "eval_samples_per_second": 8.581, "eval_steps_per_second": 1.073, "step": 19500 }, { "epoch": 0.22, "learning_rate": 3.9206575354294166e-05, "loss": 1.671, "step": 20000 }, { "epoch": 0.22, "eval_bleu": 8.2527, "eval_gen_len": 28.6962, "eval_loss": 3.6946775913238525, "eval_meteor": 0.2352, "eval_runtime": 109.8594, "eval_samples_per_second": 4.733, "eval_steps_per_second": 0.592, "step": 20000 }, { "epoch": 0.22, "learning_rate": 3.893673973815152e-05, "loss": 1.7021, "step": 20500 }, { "epoch": 0.22, "eval_bleu": 7.9211, "eval_gen_len": 25.9538, "eval_loss": 3.711056709289551, "eval_meteor": 0.2288, "eval_runtime": 99.5819, "eval_samples_per_second": 5.222, "eval_steps_per_second": 0.653, "step": 20500 }, { "epoch": 0.23, "learning_rate": 3.8666904122008874e-05, "loss": 1.6431, "step": 21000 }, { "epoch": 0.23, "eval_bleu": 8.1937, "eval_gen_len": 27.2942, "eval_loss": 3.7131733894348145, "eval_meteor": 0.2303, "eval_runtime": 84.3649, "eval_samples_per_second": 6.164, "eval_steps_per_second": 0.77, "step": 21000 }, { "epoch": 0.23, "learning_rate": 3.839706850586623e-05, "loss": 1.7551, "step": 21500 }, { "epoch": 0.23, "eval_bleu": 8.0326, "eval_gen_len": 32.4615, "eval_loss": 3.7119548320770264, "eval_meteor": 0.2299, "eval_runtime": 130.1926, "eval_samples_per_second": 3.994, "eval_steps_per_second": 0.499, "step": 21500 }, { "epoch": 0.24, "learning_rate": 3.812723288972358e-05, "loss": 1.672, "step": 22000 }, { "epoch": 0.24, "eval_bleu": 8.1471, "eval_gen_len": 26.1, "eval_loss": 3.708371639251709, "eval_meteor": 0.2274, "eval_runtime": 101.2958, "eval_samples_per_second": 5.133, "eval_steps_per_second": 0.642, "step": 22000 }, { "epoch": 0.24, "learning_rate": 3.7857397273580935e-05, "loss": 1.6464, "step": 22500 }, { "epoch": 0.24, "eval_bleu": 7.974, "eval_gen_len": 26.8558, "eval_loss": 3.677321195602417, "eval_meteor": 0.2287, "eval_runtime": 101.1748, "eval_samples_per_second": 5.14, "eval_steps_per_second": 0.642, "step": 22500 }, { "epoch": 0.25, "learning_rate": 3.758756165743829e-05, "loss": 1.6484, "step": 23000 }, { "epoch": 0.25, "eval_bleu": 7.609, "eval_gen_len": 28.8173, "eval_loss": 3.6681010723114014, "eval_meteor": 0.2303, "eval_runtime": 110.9868, "eval_samples_per_second": 4.685, "eval_steps_per_second": 0.586, "step": 23000 }, { "epoch": 0.25, "learning_rate": 3.731772604129564e-05, "loss": 1.6568, "step": 23500 }, { "epoch": 0.25, "eval_bleu": 7.1469, "eval_gen_len": 27.2288, "eval_loss": 3.692307949066162, "eval_meteor": 0.2255, "eval_runtime": 94.7667, "eval_samples_per_second": 5.487, "eval_steps_per_second": 0.686, "step": 23500 }, { "epoch": 0.26, "learning_rate": 3.7047890425152996e-05, "loss": 1.6389, "step": 24000 }, { "epoch": 0.26, "eval_bleu": 7.7818, "eval_gen_len": 24.6692, "eval_loss": 3.660385847091675, "eval_meteor": 0.2249, "eval_runtime": 81.5305, "eval_samples_per_second": 6.378, "eval_steps_per_second": 0.797, "step": 24000 }, { "epoch": 0.26, "learning_rate": 3.677805480901036e-05, "loss": 1.6639, "step": 24500 }, { "epoch": 0.26, "eval_bleu": 8.0474, "eval_gen_len": 23.9308, "eval_loss": 3.659407377243042, "eval_meteor": 0.2284, "eval_runtime": 73.4805, "eval_samples_per_second": 7.077, "eval_steps_per_second": 0.885, "step": 24500 }, { "epoch": 0.27, "learning_rate": 3.6508219192867704e-05, "loss": 1.6965, "step": 25000 }, { "epoch": 0.27, "eval_bleu": 7.8415, "eval_gen_len": 29.1077, "eval_loss": 3.663590431213379, "eval_meteor": 0.2242, "eval_runtime": 99.3013, "eval_samples_per_second": 5.237, "eval_steps_per_second": 0.655, "step": 25000 }, { "epoch": 0.28, "learning_rate": 3.623838357672506e-05, "loss": 1.6891, "step": 25500 }, { "epoch": 0.28, "eval_bleu": 7.6988, "eval_gen_len": 26.9077, "eval_loss": 3.633793354034424, "eval_meteor": 0.2278, "eval_runtime": 87.8219, "eval_samples_per_second": 5.921, "eval_steps_per_second": 0.74, "step": 25500 }, { "epoch": 0.28, "learning_rate": 3.596854796058242e-05, "loss": 1.6518, "step": 26000 }, { "epoch": 0.28, "eval_bleu": 8.0628, "eval_gen_len": 24.2231, "eval_loss": 3.6404640674591064, "eval_meteor": 0.2272, "eval_runtime": 79.4014, "eval_samples_per_second": 6.549, "eval_steps_per_second": 0.819, "step": 26000 }, { "epoch": 0.29, "learning_rate": 3.5698712344439765e-05, "loss": 1.5915, "step": 26500 }, { "epoch": 0.29, "eval_bleu": 8.4956, "eval_gen_len": 25.925, "eval_loss": 3.6168606281280518, "eval_meteor": 0.2327, "eval_runtime": 84.1477, "eval_samples_per_second": 6.18, "eval_steps_per_second": 0.772, "step": 26500 }, { "epoch": 0.29, "learning_rate": 3.5428876728297126e-05, "loss": 1.6756, "step": 27000 }, { "epoch": 0.29, "eval_bleu": 7.8645, "eval_gen_len": 25.7115, "eval_loss": 3.6374764442443848, "eval_meteor": 0.2293, "eval_runtime": 68.4092, "eval_samples_per_second": 7.601, "eval_steps_per_second": 0.95, "step": 27000 }, { "epoch": 0.3, "learning_rate": 3.515904111215448e-05, "loss": 1.6085, "step": 27500 }, { "epoch": 0.3, "eval_bleu": 7.5502, "eval_gen_len": 27.7, "eval_loss": 3.6482863426208496, "eval_meteor": 0.2193, "eval_runtime": 107.6058, "eval_samples_per_second": 4.832, "eval_steps_per_second": 0.604, "step": 27500 }, { "epoch": 0.3, "learning_rate": 3.4889205496011826e-05, "loss": 1.5557, "step": 28000 }, { "epoch": 0.3, "eval_bleu": 7.4827, "eval_gen_len": 37.5385, "eval_loss": 3.6158738136291504, "eval_meteor": 0.2182, "eval_runtime": 151.1373, "eval_samples_per_second": 3.441, "eval_steps_per_second": 0.43, "step": 28000 }, { "epoch": 0.31, "learning_rate": 3.461936987986919e-05, "loss": 1.5766, "step": 28500 }, { "epoch": 0.31, "eval_bleu": 7.9417, "eval_gen_len": 28.1673, "eval_loss": 3.6332404613494873, "eval_meteor": 0.2271, "eval_runtime": 96.6869, "eval_samples_per_second": 5.378, "eval_steps_per_second": 0.672, "step": 28500 }, { "epoch": 0.31, "learning_rate": 3.434953426372654e-05, "loss": 1.6282, "step": 29000 }, { "epoch": 0.31, "eval_bleu": 7.6697, "eval_gen_len": 27.3019, "eval_loss": 3.6174378395080566, "eval_meteor": 0.2208, "eval_runtime": 89.3468, "eval_samples_per_second": 5.82, "eval_steps_per_second": 0.728, "step": 29000 }, { "epoch": 0.32, "learning_rate": 3.407969864758389e-05, "loss": 1.5901, "step": 29500 }, { "epoch": 0.32, "eval_bleu": 8.211, "eval_gen_len": 27.4885, "eval_loss": 3.6009418964385986, "eval_meteor": 0.2295, "eval_runtime": 86.4935, "eval_samples_per_second": 6.012, "eval_steps_per_second": 0.752, "step": 29500 }, { "epoch": 0.32, "learning_rate": 3.380986303144125e-05, "loss": 1.618, "step": 30000 }, { "epoch": 0.32, "eval_bleu": 7.8473, "eval_gen_len": 26.3385, "eval_loss": 3.597890853881836, "eval_meteor": 0.2276, "eval_runtime": 67.1228, "eval_samples_per_second": 7.747, "eval_steps_per_second": 0.968, "step": 30000 }, { "epoch": 0.33, "learning_rate": 3.35400274152986e-05, "loss": 1.5292, "step": 30500 }, { "epoch": 0.33, "eval_bleu": 7.6579, "eval_gen_len": 28.0481, "eval_loss": 3.578139543533325, "eval_meteor": 0.2348, "eval_runtime": 88.2928, "eval_samples_per_second": 5.889, "eval_steps_per_second": 0.736, "step": 30500 }, { "epoch": 0.33, "learning_rate": 3.3270191799155956e-05, "loss": 1.539, "step": 31000 }, { "epoch": 0.33, "eval_bleu": 7.9101, "eval_gen_len": 25.1115, "eval_loss": 3.56876540184021, "eval_meteor": 0.2294, "eval_runtime": 78.3276, "eval_samples_per_second": 6.639, "eval_steps_per_second": 0.83, "step": 31000 }, { "epoch": 0.34, "learning_rate": 3.300035618301331e-05, "loss": 1.6394, "step": 31500 }, { "epoch": 0.34, "eval_bleu": 8.1847, "eval_gen_len": 24.5731, "eval_loss": 3.5614802837371826, "eval_meteor": 0.2322, "eval_runtime": 70.0131, "eval_samples_per_second": 7.427, "eval_steps_per_second": 0.928, "step": 31500 }, { "epoch": 0.35, "learning_rate": 3.273052056687066e-05, "loss": 1.5546, "step": 32000 }, { "epoch": 0.35, "eval_bleu": 7.9997, "eval_gen_len": 25.1596, "eval_loss": 3.56968355178833, "eval_meteor": 0.2339, "eval_runtime": 68.1359, "eval_samples_per_second": 7.632, "eval_steps_per_second": 0.954, "step": 32000 }, { "epoch": 0.35, "learning_rate": 3.246068495072802e-05, "loss": 1.5173, "step": 32500 }, { "epoch": 0.35, "eval_bleu": 8.1149, "eval_gen_len": 27.0596, "eval_loss": 3.5782711505889893, "eval_meteor": 0.229, "eval_runtime": 92.9196, "eval_samples_per_second": 5.596, "eval_steps_per_second": 0.7, "step": 32500 }, { "epoch": 0.36, "learning_rate": 3.219084933458537e-05, "loss": 1.6158, "step": 33000 }, { "epoch": 0.36, "eval_bleu": 8.6261, "eval_gen_len": 25.5346, "eval_loss": 3.5298867225646973, "eval_meteor": 0.2419, "eval_runtime": 64.5727, "eval_samples_per_second": 8.053, "eval_steps_per_second": 1.007, "step": 33000 }, { "epoch": 0.36, "learning_rate": 3.1921013718442724e-05, "loss": 1.5266, "step": 33500 }, { "epoch": 0.36, "eval_bleu": 8.7619, "eval_gen_len": 31.4077, "eval_loss": 3.522501230239868, "eval_meteor": 0.2319, "eval_runtime": 122.5254, "eval_samples_per_second": 4.244, "eval_steps_per_second": 0.531, "step": 33500 }, { "epoch": 0.37, "learning_rate": 3.1651178102300085e-05, "loss": 1.577, "step": 34000 }, { "epoch": 0.37, "eval_bleu": 8.611, "eval_gen_len": 26.15, "eval_loss": 3.542168140411377, "eval_meteor": 0.2346, "eval_runtime": 72.838, "eval_samples_per_second": 7.139, "eval_steps_per_second": 0.892, "step": 34000 }, { "epoch": 0.37, "learning_rate": 3.138134248615743e-05, "loss": 1.5787, "step": 34500 }, { "epoch": 0.37, "eval_bleu": 8.0924, "eval_gen_len": 29.2692, "eval_loss": 3.5313644409179688, "eval_meteor": 0.2301, "eval_runtime": 104.0102, "eval_samples_per_second": 5.0, "eval_steps_per_second": 0.625, "step": 34500 }, { "epoch": 0.38, "learning_rate": 3.111150687001479e-05, "loss": 1.5635, "step": 35000 }, { "epoch": 0.38, "eval_bleu": 8.2094, "eval_gen_len": 25.4942, "eval_loss": 3.5328898429870605, "eval_meteor": 0.2314, "eval_runtime": 62.8928, "eval_samples_per_second": 8.268, "eval_steps_per_second": 1.034, "step": 35000 }, { "epoch": 0.38, "learning_rate": 3.0841671253872146e-05, "loss": 1.4967, "step": 35500 }, { "epoch": 0.38, "eval_bleu": 7.9224, "eval_gen_len": 28.1365, "eval_loss": 3.5347652435302734, "eval_meteor": 0.2298, "eval_runtime": 95.4251, "eval_samples_per_second": 5.449, "eval_steps_per_second": 0.681, "step": 35500 }, { "epoch": 0.39, "learning_rate": 3.057183563772949e-05, "loss": 1.5375, "step": 36000 }, { "epoch": 0.39, "eval_bleu": 8.062, "eval_gen_len": 25.7346, "eval_loss": 3.5164051055908203, "eval_meteor": 0.2375, "eval_runtime": 72.4588, "eval_samples_per_second": 7.176, "eval_steps_per_second": 0.897, "step": 36000 }, { "epoch": 0.39, "learning_rate": 3.030200002158685e-05, "loss": 1.5928, "step": 36500 }, { "epoch": 0.39, "eval_bleu": 8.4845, "eval_gen_len": 25.8462, "eval_loss": 3.5118257999420166, "eval_meteor": 0.2439, "eval_runtime": 69.4765, "eval_samples_per_second": 7.485, "eval_steps_per_second": 0.936, "step": 36500 }, { "epoch": 0.4, "learning_rate": 3.0032164405444208e-05, "loss": 1.582, "step": 37000 }, { "epoch": 0.4, "eval_bleu": 8.2526, "eval_gen_len": 25.9327, "eval_loss": 3.505610227584839, "eval_meteor": 0.2395, "eval_runtime": 60.8773, "eval_samples_per_second": 8.542, "eval_steps_per_second": 1.068, "step": 37000 }, { "epoch": 0.4, "learning_rate": 2.9762328789301558e-05, "loss": 1.4958, "step": 37500 }, { "epoch": 0.4, "eval_bleu": 8.3315, "eval_gen_len": 28.5365, "eval_loss": 3.517829418182373, "eval_meteor": 0.2271, "eval_runtime": 94.0897, "eval_samples_per_second": 5.527, "eval_steps_per_second": 0.691, "step": 37500 }, { "epoch": 0.41, "learning_rate": 2.9492493173158915e-05, "loss": 1.5524, "step": 38000 }, { "epoch": 0.41, "eval_bleu": 8.765, "eval_gen_len": 26.2865, "eval_loss": 3.506021738052368, "eval_meteor": 0.2364, "eval_runtime": 79.4441, "eval_samples_per_second": 6.545, "eval_steps_per_second": 0.818, "step": 38000 }, { "epoch": 0.42, "learning_rate": 2.922265755701627e-05, "loss": 1.4689, "step": 38500 }, { "epoch": 0.42, "eval_bleu": 8.5295, "eval_gen_len": 29.5769, "eval_loss": 3.5012190341949463, "eval_meteor": 0.2327, "eval_runtime": 103.4617, "eval_samples_per_second": 5.026, "eval_steps_per_second": 0.628, "step": 38500 }, { "epoch": 0.42, "learning_rate": 2.895282194087362e-05, "loss": 1.5345, "step": 39000 }, { "epoch": 0.42, "eval_bleu": 8.7423, "eval_gen_len": 25.3269, "eval_loss": 3.498300313949585, "eval_meteor": 0.2342, "eval_runtime": 73.6072, "eval_samples_per_second": 7.065, "eval_steps_per_second": 0.883, "step": 39000 }, { "epoch": 0.43, "learning_rate": 2.8682986324730976e-05, "loss": 1.5282, "step": 39500 }, { "epoch": 0.43, "eval_bleu": 8.9296, "eval_gen_len": 25.1173, "eval_loss": 3.4799299240112305, "eval_meteor": 0.2425, "eval_runtime": 71.1898, "eval_samples_per_second": 7.304, "eval_steps_per_second": 0.913, "step": 39500 }, { "epoch": 0.43, "learning_rate": 2.8413150708588333e-05, "loss": 1.5021, "step": 40000 }, { "epoch": 0.43, "eval_bleu": 7.9284, "eval_gen_len": 26.2962, "eval_loss": 3.4839093685150146, "eval_meteor": 0.236, "eval_runtime": 77.544, "eval_samples_per_second": 6.706, "eval_steps_per_second": 0.838, "step": 40000 }, { "epoch": 0.44, "learning_rate": 2.8143315092445684e-05, "loss": 1.5072, "step": 40500 }, { "epoch": 0.44, "eval_bleu": 8.7855, "eval_gen_len": 25.425, "eval_loss": 3.4775073528289795, "eval_meteor": 0.238, "eval_runtime": 84.0571, "eval_samples_per_second": 6.186, "eval_steps_per_second": 0.773, "step": 40500 }, { "epoch": 0.44, "learning_rate": 2.7873479476303038e-05, "loss": 1.5348, "step": 41000 }, { "epoch": 0.44, "eval_bleu": 8.3592, "eval_gen_len": 25.4673, "eval_loss": 3.4518544673919678, "eval_meteor": 0.24, "eval_runtime": 81.9583, "eval_samples_per_second": 6.345, "eval_steps_per_second": 0.793, "step": 41000 }, { "epoch": 0.45, "learning_rate": 2.7603643860160395e-05, "loss": 1.5236, "step": 41500 }, { "epoch": 0.45, "eval_bleu": 8.589, "eval_gen_len": 28.1846, "eval_loss": 3.466686248779297, "eval_meteor": 0.2352, "eval_runtime": 91.2864, "eval_samples_per_second": 5.696, "eval_steps_per_second": 0.712, "step": 41500 }, { "epoch": 0.45, "learning_rate": 2.7333808244017745e-05, "loss": 1.4695, "step": 42000 }, { "epoch": 0.45, "eval_bleu": 8.5467, "eval_gen_len": 26.9385, "eval_loss": 3.4435806274414062, "eval_meteor": 0.2396, "eval_runtime": 89.9168, "eval_samples_per_second": 5.783, "eval_steps_per_second": 0.723, "step": 42000 }, { "epoch": 0.46, "learning_rate": 2.70639726278751e-05, "loss": 1.4733, "step": 42500 }, { "epoch": 0.46, "eval_bleu": 8.3771, "eval_gen_len": 26.6096, "eval_loss": 3.455449342727661, "eval_meteor": 0.2347, "eval_runtime": 85.0784, "eval_samples_per_second": 6.112, "eval_steps_per_second": 0.764, "step": 42500 }, { "epoch": 0.46, "learning_rate": 2.6794137011732456e-05, "loss": 1.5398, "step": 43000 }, { "epoch": 0.46, "eval_bleu": 9.2126, "eval_gen_len": 24.6538, "eval_loss": 3.436018943786621, "eval_meteor": 0.2434, "eval_runtime": 81.8122, "eval_samples_per_second": 6.356, "eval_steps_per_second": 0.795, "step": 43000 }, { "epoch": 0.47, "learning_rate": 2.6524301395589806e-05, "loss": 1.5596, "step": 43500 }, { "epoch": 0.47, "eval_bleu": 9.2745, "eval_gen_len": 26.1, "eval_loss": 3.425743579864502, "eval_meteor": 0.2521, "eval_runtime": 70.71, "eval_samples_per_second": 7.354, "eval_steps_per_second": 0.919, "step": 43500 }, { "epoch": 0.47, "learning_rate": 2.6254465779447164e-05, "loss": 1.505, "step": 44000 }, { "epoch": 0.47, "eval_bleu": 8.8478, "eval_gen_len": 24.9731, "eval_loss": 3.428138017654419, "eval_meteor": 0.2421, "eval_runtime": 81.3515, "eval_samples_per_second": 6.392, "eval_steps_per_second": 0.799, "step": 44000 }, { "epoch": 0.48, "learning_rate": 2.5984630163304517e-05, "loss": 1.48, "step": 44500 }, { "epoch": 0.48, "eval_bleu": 9.237, "eval_gen_len": 24.2058, "eval_loss": 3.465346336364746, "eval_meteor": 0.2392, "eval_runtime": 78.2893, "eval_samples_per_second": 6.642, "eval_steps_per_second": 0.83, "step": 44500 }, { "epoch": 0.49, "learning_rate": 2.5714794547161868e-05, "loss": 1.5167, "step": 45000 }, { "epoch": 0.49, "eval_bleu": 8.8344, "eval_gen_len": 25.8192, "eval_loss": 3.440796375274658, "eval_meteor": 0.2355, "eval_runtime": 73.3566, "eval_samples_per_second": 7.089, "eval_steps_per_second": 0.886, "step": 45000 }, { "epoch": 0.49, "learning_rate": 2.5444958931019225e-05, "loss": 1.4691, "step": 45500 }, { "epoch": 0.49, "eval_bleu": 9.0831, "eval_gen_len": 25.8577, "eval_loss": 3.424842596054077, "eval_meteor": 0.2449, "eval_runtime": 83.8868, "eval_samples_per_second": 6.199, "eval_steps_per_second": 0.775, "step": 45500 }, { "epoch": 0.5, "learning_rate": 2.5175123314876582e-05, "loss": 1.4734, "step": 46000 }, { "epoch": 0.5, "eval_bleu": 8.6622, "eval_gen_len": 26.0385, "eval_loss": 3.406123399734497, "eval_meteor": 0.2472, "eval_runtime": 87.066, "eval_samples_per_second": 5.972, "eval_steps_per_second": 0.747, "step": 46000 }, { "epoch": 0.5, "learning_rate": 2.4905287698733932e-05, "loss": 1.4354, "step": 46500 }, { "epoch": 0.5, "eval_bleu": 9.3409, "eval_gen_len": 26.0077, "eval_loss": 3.4149692058563232, "eval_meteor": 0.2462, "eval_runtime": 75.3366, "eval_samples_per_second": 6.902, "eval_steps_per_second": 0.863, "step": 46500 }, { "epoch": 0.51, "learning_rate": 2.4635452082591286e-05, "loss": 1.4841, "step": 47000 }, { "epoch": 0.51, "eval_bleu": 8.3645, "eval_gen_len": 27.2692, "eval_loss": 3.410163640975952, "eval_meteor": 0.2377, "eval_runtime": 88.8645, "eval_samples_per_second": 5.852, "eval_steps_per_second": 0.731, "step": 47000 }, { "epoch": 0.51, "learning_rate": 2.436561646644864e-05, "loss": 1.4163, "step": 47500 }, { "epoch": 0.51, "eval_bleu": 8.7482, "eval_gen_len": 25.1442, "eval_loss": 3.4322900772094727, "eval_meteor": 0.2329, "eval_runtime": 80.4665, "eval_samples_per_second": 6.462, "eval_steps_per_second": 0.808, "step": 47500 }, { "epoch": 0.52, "learning_rate": 2.4095780850305994e-05, "loss": 1.4859, "step": 48000 }, { "epoch": 0.52, "eval_bleu": 9.0356, "eval_gen_len": 27.9096, "eval_loss": 3.409453868865967, "eval_meteor": 0.238, "eval_runtime": 97.3658, "eval_samples_per_second": 5.341, "eval_steps_per_second": 0.668, "step": 48000 }, { "epoch": 0.52, "learning_rate": 2.382594523416335e-05, "loss": 1.5179, "step": 48500 }, { "epoch": 0.52, "eval_bleu": 8.9689, "eval_gen_len": 25.9173, "eval_loss": 3.4041757583618164, "eval_meteor": 0.2426, "eval_runtime": 66.1422, "eval_samples_per_second": 7.862, "eval_steps_per_second": 0.983, "step": 48500 }, { "epoch": 0.53, "learning_rate": 2.35561096180207e-05, "loss": 1.4451, "step": 49000 }, { "epoch": 0.53, "eval_bleu": 9.0897, "eval_gen_len": 24.6654, "eval_loss": 3.383331537246704, "eval_meteor": 0.244, "eval_runtime": 60.7417, "eval_samples_per_second": 8.561, "eval_steps_per_second": 1.07, "step": 49000 }, { "epoch": 0.53, "learning_rate": 2.3286274001878055e-05, "loss": 1.4377, "step": 49500 }, { "epoch": 0.53, "eval_bleu": 9.0223, "eval_gen_len": 25.3788, "eval_loss": 3.3942527770996094, "eval_meteor": 0.2481, "eval_runtime": 71.1329, "eval_samples_per_second": 7.31, "eval_steps_per_second": 0.914, "step": 49500 }, { "epoch": 0.54, "learning_rate": 2.3016438385735412e-05, "loss": 1.4162, "step": 50000 }, { "epoch": 0.54, "eval_bleu": 9.5501, "eval_gen_len": 25.05, "eval_loss": 3.3857383728027344, "eval_meteor": 0.2468, "eval_runtime": 77.3856, "eval_samples_per_second": 6.72, "eval_steps_per_second": 0.84, "step": 50000 }, { "epoch": 0.55, "learning_rate": 2.2746602769592766e-05, "loss": 1.5209, "step": 50500 }, { "epoch": 0.55, "eval_bleu": 8.9994, "eval_gen_len": 25.1058, "eval_loss": 3.3768198490142822, "eval_meteor": 0.2424, "eval_runtime": 72.911, "eval_samples_per_second": 7.132, "eval_steps_per_second": 0.891, "step": 50500 }, { "epoch": 0.55, "learning_rate": 2.247676715345012e-05, "loss": 1.426, "step": 51000 }, { "epoch": 0.55, "eval_bleu": 9.2556, "eval_gen_len": 25.3865, "eval_loss": 3.3841092586517334, "eval_meteor": 0.2443, "eval_runtime": 62.6291, "eval_samples_per_second": 8.303, "eval_steps_per_second": 1.038, "step": 51000 }, { "epoch": 0.56, "learning_rate": 2.2206931537307473e-05, "loss": 1.4453, "step": 51500 }, { "epoch": 0.56, "eval_bleu": 9.583, "eval_gen_len": 26.3269, "eval_loss": 3.3895211219787598, "eval_meteor": 0.2448, "eval_runtime": 85.3314, "eval_samples_per_second": 6.094, "eval_steps_per_second": 0.762, "step": 51500 }, { "epoch": 0.56, "learning_rate": 2.1937095921164827e-05, "loss": 1.4162, "step": 52000 }, { "epoch": 0.56, "eval_bleu": 9.0803, "eval_gen_len": 25.5231, "eval_loss": 3.385866165161133, "eval_meteor": 0.2413, "eval_runtime": 75.8979, "eval_samples_per_second": 6.851, "eval_steps_per_second": 0.856, "step": 52000 }, { "epoch": 0.57, "learning_rate": 2.1667260305022184e-05, "loss": 1.4107, "step": 52500 }, { "epoch": 0.57, "eval_bleu": 8.9249, "eval_gen_len": 24.8615, "eval_loss": 3.3849904537200928, "eval_meteor": 0.241, "eval_runtime": 72.4747, "eval_samples_per_second": 7.175, "eval_steps_per_second": 0.897, "step": 52500 }, { "epoch": 0.57, "learning_rate": 2.1397424688879535e-05, "loss": 1.4474, "step": 53000 }, { "epoch": 0.57, "eval_bleu": 9.5048, "eval_gen_len": 23.975, "eval_loss": 3.3705201148986816, "eval_meteor": 0.2474, "eval_runtime": 76.3998, "eval_samples_per_second": 6.806, "eval_steps_per_second": 0.851, "step": 53000 }, { "epoch": 0.58, "learning_rate": 2.1127589072736888e-05, "loss": 1.4336, "step": 53500 }, { "epoch": 0.58, "eval_bleu": 9.7945, "eval_gen_len": 24.3885, "eval_loss": 3.349461078643799, "eval_meteor": 0.2549, "eval_runtime": 69.7201, "eval_samples_per_second": 7.458, "eval_steps_per_second": 0.932, "step": 53500 }, { "epoch": 0.58, "learning_rate": 2.0857753456594245e-05, "loss": 1.4261, "step": 54000 }, { "epoch": 0.58, "eval_bleu": 9.5374, "eval_gen_len": 24.2692, "eval_loss": 3.369290351867676, "eval_meteor": 0.2444, "eval_runtime": 67.4768, "eval_samples_per_second": 7.706, "eval_steps_per_second": 0.963, "step": 54000 }, { "epoch": 0.59, "learning_rate": 2.05879178404516e-05, "loss": 1.409, "step": 54500 }, { "epoch": 0.59, "eval_bleu": 9.3034, "eval_gen_len": 24.625, "eval_loss": 3.3803343772888184, "eval_meteor": 0.2436, "eval_runtime": 68.3273, "eval_samples_per_second": 7.61, "eval_steps_per_second": 0.951, "step": 54500 }, { "epoch": 0.59, "learning_rate": 2.0318082224308953e-05, "loss": 1.4364, "step": 55000 }, { "epoch": 0.59, "eval_bleu": 9.6554, "eval_gen_len": 25.5654, "eval_loss": 3.371992349624634, "eval_meteor": 0.2457, "eval_runtime": 82.9163, "eval_samples_per_second": 6.271, "eval_steps_per_second": 0.784, "step": 55000 }, { "epoch": 0.6, "learning_rate": 2.0048246608166307e-05, "loss": 1.4184, "step": 55500 }, { "epoch": 0.6, "eval_bleu": 9.4698, "eval_gen_len": 27.6, "eval_loss": 3.3737027645111084, "eval_meteor": 0.2414, "eval_runtime": 99.5873, "eval_samples_per_second": 5.222, "eval_steps_per_second": 0.653, "step": 55500 }, { "epoch": 0.6, "learning_rate": 1.977841099202366e-05, "loss": 1.4417, "step": 56000 }, { "epoch": 0.6, "eval_bleu": 9.3662, "eval_gen_len": 25.1481, "eval_loss": 3.3708438873291016, "eval_meteor": 0.2446, "eval_runtime": 87.8071, "eval_samples_per_second": 5.922, "eval_steps_per_second": 0.74, "step": 56000 }, { "epoch": 0.61, "learning_rate": 1.9508575375881014e-05, "loss": 1.3421, "step": 56500 }, { "epoch": 0.61, "eval_bleu": 9.3448, "eval_gen_len": 24.8404, "eval_loss": 3.3396267890930176, "eval_meteor": 0.2478, "eval_runtime": 73.0359, "eval_samples_per_second": 7.12, "eval_steps_per_second": 0.89, "step": 56500 }, { "epoch": 0.62, "learning_rate": 1.9238739759738368e-05, "loss": 1.396, "step": 57000 }, { "epoch": 0.62, "eval_bleu": 9.0765, "eval_gen_len": 24.7731, "eval_loss": 3.3500242233276367, "eval_meteor": 0.2413, "eval_runtime": 71.4703, "eval_samples_per_second": 7.276, "eval_steps_per_second": 0.909, "step": 57000 }, { "epoch": 0.62, "learning_rate": 1.8968904143595722e-05, "loss": 1.4152, "step": 57500 }, { "epoch": 0.62, "eval_bleu": 9.4934, "eval_gen_len": 24.7885, "eval_loss": 3.335568904876709, "eval_meteor": 0.2496, "eval_runtime": 67.659, "eval_samples_per_second": 7.686, "eval_steps_per_second": 0.961, "step": 57500 }, { "epoch": 0.63, "learning_rate": 1.8699068527453075e-05, "loss": 1.4245, "step": 58000 }, { "epoch": 0.63, "eval_bleu": 9.7477, "eval_gen_len": 24.0096, "eval_loss": 3.344538688659668, "eval_meteor": 0.2494, "eval_runtime": 69.4763, "eval_samples_per_second": 7.485, "eval_steps_per_second": 0.936, "step": 58000 }, { "epoch": 0.63, "learning_rate": 1.8429232911310433e-05, "loss": 1.4661, "step": 58500 }, { "epoch": 0.63, "eval_bleu": 9.2664, "eval_gen_len": 24.1404, "eval_loss": 3.3510961532592773, "eval_meteor": 0.2416, "eval_runtime": 71.3645, "eval_samples_per_second": 7.287, "eval_steps_per_second": 0.911, "step": 58500 }, { "epoch": 0.64, "learning_rate": 1.8159397295167786e-05, "loss": 1.4219, "step": 59000 }, { "epoch": 0.64, "eval_bleu": 9.5738, "eval_gen_len": 24.6769, "eval_loss": 3.3354907035827637, "eval_meteor": 0.2436, "eval_runtime": 76.9335, "eval_samples_per_second": 6.759, "eval_steps_per_second": 0.845, "step": 59000 }, { "epoch": 0.64, "learning_rate": 1.7889561679025137e-05, "loss": 1.4051, "step": 59500 }, { "epoch": 0.64, "eval_bleu": 9.4506, "eval_gen_len": 24.2615, "eval_loss": 3.3152685165405273, "eval_meteor": 0.2495, "eval_runtime": 68.6589, "eval_samples_per_second": 7.574, "eval_steps_per_second": 0.947, "step": 59500 }, { "epoch": 0.65, "learning_rate": 1.7619726062882494e-05, "loss": 1.4743, "step": 60000 }, { "epoch": 0.65, "eval_bleu": 9.6423, "eval_gen_len": 24.4865, "eval_loss": 3.3143715858459473, "eval_meteor": 0.2463, "eval_runtime": 71.0116, "eval_samples_per_second": 7.323, "eval_steps_per_second": 0.915, "step": 60000 }, { "epoch": 0.65, "learning_rate": 1.7349890446739848e-05, "loss": 1.3961, "step": 60500 }, { "epoch": 0.65, "eval_bleu": 9.7681, "eval_gen_len": 24.4615, "eval_loss": 3.3041951656341553, "eval_meteor": 0.2526, "eval_runtime": 66.5982, "eval_samples_per_second": 7.808, "eval_steps_per_second": 0.976, "step": 60500 }, { "epoch": 0.66, "learning_rate": 1.70800548305972e-05, "loss": 1.447, "step": 61000 }, { "epoch": 0.66, "eval_bleu": 9.6435, "eval_gen_len": 24.6365, "eval_loss": 3.3025214672088623, "eval_meteor": 0.2507, "eval_runtime": 73.4548, "eval_samples_per_second": 7.079, "eval_steps_per_second": 0.885, "step": 61000 }, { "epoch": 0.66, "learning_rate": 1.6810219214454555e-05, "loss": 1.4102, "step": 61500 }, { "epoch": 0.66, "eval_bleu": 9.6897, "eval_gen_len": 24.8731, "eval_loss": 3.3014609813690186, "eval_meteor": 0.25, "eval_runtime": 77.7893, "eval_samples_per_second": 6.685, "eval_steps_per_second": 0.836, "step": 61500 }, { "epoch": 0.67, "learning_rate": 1.654038359831191e-05, "loss": 1.3739, "step": 62000 }, { "epoch": 0.67, "eval_bleu": 9.2607, "eval_gen_len": 24.7885, "eval_loss": 3.313209056854248, "eval_meteor": 0.2447, "eval_runtime": 70.1449, "eval_samples_per_second": 7.413, "eval_steps_per_second": 0.927, "step": 62000 }, { "epoch": 0.67, "learning_rate": 1.6270547982169263e-05, "loss": 1.3794, "step": 62500 }, { "epoch": 0.67, "eval_bleu": 9.6355, "eval_gen_len": 24.4365, "eval_loss": 3.3016440868377686, "eval_meteor": 0.2488, "eval_runtime": 72.723, "eval_samples_per_second": 7.15, "eval_steps_per_second": 0.894, "step": 62500 }, { "epoch": 0.68, "learning_rate": 1.600071236602662e-05, "loss": 1.3819, "step": 63000 }, { "epoch": 0.68, "eval_bleu": 9.9125, "eval_gen_len": 24.5038, "eval_loss": 3.298640489578247, "eval_meteor": 0.248, "eval_runtime": 71.6443, "eval_samples_per_second": 7.258, "eval_steps_per_second": 0.907, "step": 63000 }, { "epoch": 0.69, "learning_rate": 1.573087674988397e-05, "loss": 1.3598, "step": 63500 }, { "epoch": 0.69, "eval_bleu": 10.2032, "eval_gen_len": 24.6558, "eval_loss": 3.2921295166015625, "eval_meteor": 0.2534, "eval_runtime": 74.7575, "eval_samples_per_second": 6.956, "eval_steps_per_second": 0.869, "step": 63500 }, { "epoch": 0.69, "learning_rate": 1.5461041133741324e-05, "loss": 1.3846, "step": 64000 }, { "epoch": 0.69, "eval_bleu": 9.9342, "eval_gen_len": 23.9923, "eval_loss": 3.2913033962249756, "eval_meteor": 0.2468, "eval_runtime": 68.6789, "eval_samples_per_second": 7.571, "eval_steps_per_second": 0.946, "step": 64000 }, { "epoch": 0.7, "learning_rate": 1.5191205517598681e-05, "loss": 1.4024, "step": 64500 }, { "epoch": 0.7, "eval_bleu": 9.445, "eval_gen_len": 25.3865, "eval_loss": 3.2889387607574463, "eval_meteor": 0.2426, "eval_runtime": 78.1546, "eval_samples_per_second": 6.653, "eval_steps_per_second": 0.832, "step": 64500 }, { "epoch": 0.7, "learning_rate": 1.4921369901456033e-05, "loss": 1.3775, "step": 65000 }, { "epoch": 0.7, "eval_bleu": 9.711, "eval_gen_len": 25.5769, "eval_loss": 3.2912492752075195, "eval_meteor": 0.2459, "eval_runtime": 102.461, "eval_samples_per_second": 5.075, "eval_steps_per_second": 0.634, "step": 65000 }, { "epoch": 0.71, "learning_rate": 1.4651534285313387e-05, "loss": 1.3192, "step": 65500 }, { "epoch": 0.71, "eval_bleu": 9.5794, "eval_gen_len": 25.2038, "eval_loss": 3.288285970687866, "eval_meteor": 0.2438, "eval_runtime": 81.3849, "eval_samples_per_second": 6.389, "eval_steps_per_second": 0.799, "step": 65500 }, { "epoch": 0.71, "learning_rate": 1.4381698669170742e-05, "loss": 1.3426, "step": 66000 }, { "epoch": 0.71, "eval_bleu": 9.9763, "eval_gen_len": 25.1942, "eval_loss": 3.282227039337158, "eval_meteor": 0.2473, "eval_runtime": 81.2601, "eval_samples_per_second": 6.399, "eval_steps_per_second": 0.8, "step": 66000 }, { "epoch": 0.72, "learning_rate": 1.4111863053028096e-05, "loss": 1.3669, "step": 66500 }, { "epoch": 0.72, "eval_bleu": 9.4662, "eval_gen_len": 23.9865, "eval_loss": 3.2904016971588135, "eval_meteor": 0.2483, "eval_runtime": 66.1145, "eval_samples_per_second": 7.865, "eval_steps_per_second": 0.983, "step": 66500 }, { "epoch": 0.72, "learning_rate": 1.384202743688545e-05, "loss": 1.3624, "step": 67000 }, { "epoch": 0.72, "eval_bleu": 9.5602, "eval_gen_len": 24.1096, "eval_loss": 3.299604654312134, "eval_meteor": 0.2452, "eval_runtime": 72.3193, "eval_samples_per_second": 7.19, "eval_steps_per_second": 0.899, "step": 67000 }, { "epoch": 0.73, "learning_rate": 1.3572191820742805e-05, "loss": 1.3435, "step": 67500 }, { "epoch": 0.73, "eval_bleu": 9.756, "eval_gen_len": 25.4038, "eval_loss": 3.2783830165863037, "eval_meteor": 0.2482, "eval_runtime": 64.4382, "eval_samples_per_second": 8.07, "eval_steps_per_second": 1.009, "step": 67500 }, { "epoch": 0.73, "learning_rate": 1.3302356204600157e-05, "loss": 1.3668, "step": 68000 }, { "epoch": 0.73, "eval_bleu": 9.9278, "eval_gen_len": 24.3115, "eval_loss": 3.2847084999084473, "eval_meteor": 0.246, "eval_runtime": 74.6646, "eval_samples_per_second": 6.964, "eval_steps_per_second": 0.871, "step": 68000 }, { "epoch": 0.74, "learning_rate": 1.3032520588457511e-05, "loss": 1.4025, "step": 68500 }, { "epoch": 0.74, "eval_bleu": 9.8915, "eval_gen_len": 24.4904, "eval_loss": 3.2775745391845703, "eval_meteor": 0.2502, "eval_runtime": 76.4639, "eval_samples_per_second": 6.801, "eval_steps_per_second": 0.85, "step": 68500 }, { "epoch": 0.74, "learning_rate": 1.2762684972314867e-05, "loss": 1.334, "step": 69000 }, { "epoch": 0.74, "eval_bleu": 10.1572, "eval_gen_len": 25.5462, "eval_loss": 3.2732129096984863, "eval_meteor": 0.2489, "eval_runtime": 88.0016, "eval_samples_per_second": 5.909, "eval_steps_per_second": 0.739, "step": 69000 }, { "epoch": 0.75, "learning_rate": 1.249284935617222e-05, "loss": 1.407, "step": 69500 }, { "epoch": 0.75, "eval_bleu": 9.9716, "eval_gen_len": 24.8692, "eval_loss": 3.2843334674835205, "eval_meteor": 0.2503, "eval_runtime": 76.3651, "eval_samples_per_second": 6.809, "eval_steps_per_second": 0.851, "step": 69500 }, { "epoch": 0.76, "learning_rate": 1.2223013740029574e-05, "loss": 1.449, "step": 70000 }, { "epoch": 0.76, "eval_bleu": 9.7516, "eval_gen_len": 24.6115, "eval_loss": 3.26999831199646, "eval_meteor": 0.2446, "eval_runtime": 57.4151, "eval_samples_per_second": 9.057, "eval_steps_per_second": 1.132, "step": 70000 }, { "epoch": 0.76, "learning_rate": 1.1953178123886928e-05, "loss": 1.3357, "step": 70500 }, { "epoch": 0.76, "eval_bleu": 9.7942, "eval_gen_len": 25.0538, "eval_loss": 3.2684452533721924, "eval_meteor": 0.2478, "eval_runtime": 69.1367, "eval_samples_per_second": 7.521, "eval_steps_per_second": 0.94, "step": 70500 }, { "epoch": 0.77, "learning_rate": 1.1683342507744283e-05, "loss": 1.3437, "step": 71000 }, { "epoch": 0.77, "eval_bleu": 9.9427, "eval_gen_len": 24.1538, "eval_loss": 3.2602343559265137, "eval_meteor": 0.2486, "eval_runtime": 72.8541, "eval_samples_per_second": 7.138, "eval_steps_per_second": 0.892, "step": 71000 }, { "epoch": 0.77, "learning_rate": 1.1413506891601637e-05, "loss": 1.3518, "step": 71500 }, { "epoch": 0.77, "eval_bleu": 9.8195, "eval_gen_len": 25.35, "eval_loss": 3.2659668922424316, "eval_meteor": 0.2466, "eval_runtime": 68.8561, "eval_samples_per_second": 7.552, "eval_steps_per_second": 0.944, "step": 71500 }, { "epoch": 0.78, "learning_rate": 1.1143671275458991e-05, "loss": 1.3762, "step": 72000 }, { "epoch": 0.78, "eval_bleu": 10.0553, "eval_gen_len": 24.8635, "eval_loss": 3.257364511489868, "eval_meteor": 0.2516, "eval_runtime": 81.5604, "eval_samples_per_second": 6.376, "eval_steps_per_second": 0.797, "step": 72000 }, { "epoch": 0.78, "learning_rate": 1.0873835659316345e-05, "loss": 1.3404, "step": 72500 }, { "epoch": 0.78, "eval_bleu": 9.9627, "eval_gen_len": 23.9154, "eval_loss": 3.2508692741394043, "eval_meteor": 0.2509, "eval_runtime": 55.132, "eval_samples_per_second": 9.432, "eval_steps_per_second": 1.179, "step": 72500 }, { "epoch": 0.79, "learning_rate": 1.06040000431737e-05, "loss": 1.3548, "step": 73000 }, { "epoch": 0.79, "eval_bleu": 9.8402, "eval_gen_len": 24.7827, "eval_loss": 3.260327100753784, "eval_meteor": 0.2499, "eval_runtime": 75.4748, "eval_samples_per_second": 6.89, "eval_steps_per_second": 0.861, "step": 73000 }, { "epoch": 0.79, "learning_rate": 1.0334164427031052e-05, "loss": 1.3642, "step": 73500 }, { "epoch": 0.79, "eval_bleu": 9.952, "eval_gen_len": 24.5327, "eval_loss": 3.2416298389434814, "eval_meteor": 0.2534, "eval_runtime": 74.6757, "eval_samples_per_second": 6.963, "eval_steps_per_second": 0.87, "step": 73500 }, { "epoch": 0.8, "learning_rate": 1.0064328810888408e-05, "loss": 1.3284, "step": 74000 }, { "epoch": 0.8, "eval_bleu": 9.8073, "eval_gen_len": 25.0212, "eval_loss": 3.2586073875427246, "eval_meteor": 0.2499, "eval_runtime": 76.8596, "eval_samples_per_second": 6.766, "eval_steps_per_second": 0.846, "step": 74000 }, { "epoch": 0.8, "learning_rate": 9.794493194745761e-06, "loss": 1.3697, "step": 74500 }, { "epoch": 0.8, "eval_bleu": 10.0465, "eval_gen_len": 24.95, "eval_loss": 3.2522428035736084, "eval_meteor": 0.249, "eval_runtime": 86.1073, "eval_samples_per_second": 6.039, "eval_steps_per_second": 0.755, "step": 74500 }, { "epoch": 0.81, "learning_rate": 9.524657578603115e-06, "loss": 1.3718, "step": 75000 }, { "epoch": 0.81, "eval_bleu": 9.8922, "eval_gen_len": 25.0846, "eval_loss": 3.246650457382202, "eval_meteor": 0.2499, "eval_runtime": 78.8353, "eval_samples_per_second": 6.596, "eval_steps_per_second": 0.825, "step": 75000 }, { "epoch": 0.81, "learning_rate": 9.254821962460469e-06, "loss": 1.3333, "step": 75500 }, { "epoch": 0.81, "eval_bleu": 9.5919, "eval_gen_len": 24.5365, "eval_loss": 3.244452953338623, "eval_meteor": 0.2509, "eval_runtime": 69.8377, "eval_samples_per_second": 7.446, "eval_steps_per_second": 0.931, "step": 75500 }, { "epoch": 0.82, "learning_rate": 8.984986346317824e-06, "loss": 1.4192, "step": 76000 }, { "epoch": 0.82, "eval_bleu": 10.1256, "eval_gen_len": 24.8904, "eval_loss": 3.230175733566284, "eval_meteor": 0.2524, "eval_runtime": 66.1729, "eval_samples_per_second": 7.858, "eval_steps_per_second": 0.982, "step": 76000 }, { "epoch": 0.83, "learning_rate": 8.715150730175178e-06, "loss": 1.3068, "step": 76500 }, { "epoch": 0.83, "eval_bleu": 10.1132, "eval_gen_len": 24.5346, "eval_loss": 3.241743326187134, "eval_meteor": 0.2533, "eval_runtime": 80.1867, "eval_samples_per_second": 6.485, "eval_steps_per_second": 0.811, "step": 76500 }, { "epoch": 0.83, "learning_rate": 8.445315114032532e-06, "loss": 1.4282, "step": 77000 }, { "epoch": 0.83, "eval_bleu": 9.95, "eval_gen_len": 24.3462, "eval_loss": 3.242541551589966, "eval_meteor": 0.2509, "eval_runtime": 61.0641, "eval_samples_per_second": 8.516, "eval_steps_per_second": 1.064, "step": 77000 }, { "epoch": 0.84, "learning_rate": 8.175479497889886e-06, "loss": 1.4073, "step": 77500 }, { "epoch": 0.84, "eval_bleu": 10.0739, "eval_gen_len": 25.3173, "eval_loss": 3.234609842300415, "eval_meteor": 0.2523, "eval_runtime": 76.5129, "eval_samples_per_second": 6.796, "eval_steps_per_second": 0.85, "step": 77500 }, { "epoch": 0.84, "learning_rate": 7.905643881747241e-06, "loss": 1.3717, "step": 78000 }, { "epoch": 0.84, "eval_bleu": 10.2478, "eval_gen_len": 24.7577, "eval_loss": 3.2297909259796143, "eval_meteor": 0.2543, "eval_runtime": 79.2758, "eval_samples_per_second": 6.559, "eval_steps_per_second": 0.82, "step": 78000 }, { "epoch": 0.85, "learning_rate": 7.635808265604593e-06, "loss": 1.3212, "step": 78500 }, { "epoch": 0.85, "eval_bleu": 9.9788, "eval_gen_len": 24.3962, "eval_loss": 3.2253217697143555, "eval_meteor": 0.2558, "eval_runtime": 74.6105, "eval_samples_per_second": 6.97, "eval_steps_per_second": 0.871, "step": 78500 }, { "epoch": 0.85, "learning_rate": 7.3659726494619485e-06, "loss": 1.3623, "step": 79000 }, { "epoch": 0.85, "eval_bleu": 10.1533, "eval_gen_len": 24.2692, "eval_loss": 3.225541830062866, "eval_meteor": 0.2516, "eval_runtime": 76.3252, "eval_samples_per_second": 6.813, "eval_steps_per_second": 0.852, "step": 79000 }, { "epoch": 0.86, "learning_rate": 7.096137033319303e-06, "loss": 1.2651, "step": 79500 }, { "epoch": 0.86, "eval_bleu": 10.1154, "eval_gen_len": 24.7058, "eval_loss": 3.2230679988861084, "eval_meteor": 0.2535, "eval_runtime": 78.0816, "eval_samples_per_second": 6.66, "eval_steps_per_second": 0.832, "step": 79500 }, { "epoch": 0.86, "learning_rate": 6.826301417176656e-06, "loss": 1.3287, "step": 80000 }, { "epoch": 0.86, "eval_bleu": 10.0863, "eval_gen_len": 24.5962, "eval_loss": 3.2264089584350586, "eval_meteor": 0.2544, "eval_runtime": 100.662, "eval_samples_per_second": 5.166, "eval_steps_per_second": 0.646, "step": 80000 }, { "epoch": 0.87, "learning_rate": 6.556465801034011e-06, "loss": 1.33, "step": 80500 }, { "epoch": 0.87, "eval_bleu": 10.1522, "eval_gen_len": 25.4885, "eval_loss": 3.221473217010498, "eval_meteor": 0.2513, "eval_runtime": 70.9756, "eval_samples_per_second": 7.326, "eval_steps_per_second": 0.916, "step": 80500 }, { "epoch": 0.87, "learning_rate": 6.286630184891365e-06, "loss": 1.2862, "step": 81000 }, { "epoch": 0.87, "eval_bleu": 10.1425, "eval_gen_len": 25.1442, "eval_loss": 3.226158618927002, "eval_meteor": 0.2538, "eval_runtime": 74.1814, "eval_samples_per_second": 7.01, "eval_steps_per_second": 0.876, "step": 81000 }, { "epoch": 0.88, "learning_rate": 6.016794568748719e-06, "loss": 1.3738, "step": 81500 }, { "epoch": 0.88, "eval_bleu": 10.1001, "eval_gen_len": 25.1846, "eval_loss": 3.2149925231933594, "eval_meteor": 0.255, "eval_runtime": 81.4268, "eval_samples_per_second": 6.386, "eval_steps_per_second": 0.798, "step": 81500 }, { "epoch": 0.89, "learning_rate": 5.746958952606073e-06, "loss": 1.3141, "step": 82000 }, { "epoch": 0.89, "eval_bleu": 10.2016, "eval_gen_len": 24.6769, "eval_loss": 3.217388391494751, "eval_meteor": 0.2549, "eval_runtime": 75.3151, "eval_samples_per_second": 6.904, "eval_steps_per_second": 0.863, "step": 82000 }, { "epoch": 0.89, "learning_rate": 5.4771233364634265e-06, "loss": 1.3326, "step": 82500 }, { "epoch": 0.89, "eval_bleu": 10.2847, "eval_gen_len": 24.2038, "eval_loss": 3.2135159969329834, "eval_meteor": 0.2529, "eval_runtime": 74.3155, "eval_samples_per_second": 6.997, "eval_steps_per_second": 0.875, "step": 82500 }, { "epoch": 0.9, "learning_rate": 5.207287720320781e-06, "loss": 1.3112, "step": 83000 }, { "epoch": 0.9, "eval_bleu": 10.1374, "eval_gen_len": 24.7308, "eval_loss": 3.2098002433776855, "eval_meteor": 0.2572, "eval_runtime": 71.2774, "eval_samples_per_second": 7.295, "eval_steps_per_second": 0.912, "step": 83000 }, { "epoch": 0.9, "learning_rate": 4.937452104178135e-06, "loss": 1.3101, "step": 83500 }, { "epoch": 0.9, "eval_bleu": 10.0941, "eval_gen_len": 24.6654, "eval_loss": 3.206242084503174, "eval_meteor": 0.256, "eval_runtime": 68.2494, "eval_samples_per_second": 7.619, "eval_steps_per_second": 0.952, "step": 83500 }, { "epoch": 0.91, "learning_rate": 4.667616488035489e-06, "loss": 1.3403, "step": 84000 }, { "epoch": 0.91, "eval_bleu": 10.1955, "eval_gen_len": 24.8115, "eval_loss": 3.2050940990448, "eval_meteor": 0.2569, "eval_runtime": 67.097, "eval_samples_per_second": 7.75, "eval_steps_per_second": 0.969, "step": 84000 }, { "epoch": 0.91, "learning_rate": 4.397780871892843e-06, "loss": 1.3651, "step": 84500 }, { "epoch": 0.91, "eval_bleu": 10.0488, "eval_gen_len": 25.5019, "eval_loss": 3.209751605987549, "eval_meteor": 0.2581, "eval_runtime": 61.2968, "eval_samples_per_second": 8.483, "eval_steps_per_second": 1.06, "step": 84500 }, { "epoch": 0.92, "learning_rate": 4.127945255750197e-06, "loss": 1.3243, "step": 85000 }, { "epoch": 0.92, "eval_bleu": 10.1076, "eval_gen_len": 24.7385, "eval_loss": 3.202362298965454, "eval_meteor": 0.2567, "eval_runtime": 68.0409, "eval_samples_per_second": 7.642, "eval_steps_per_second": 0.955, "step": 85000 }, { "epoch": 0.92, "learning_rate": 3.858109639607551e-06, "loss": 1.3147, "step": 85500 }, { "epoch": 0.92, "eval_bleu": 10.0392, "eval_gen_len": 25.1269, "eval_loss": 3.2062337398529053, "eval_meteor": 0.2544, "eval_runtime": 71.9112, "eval_samples_per_second": 7.231, "eval_steps_per_second": 0.904, "step": 85500 }, { "epoch": 0.93, "learning_rate": 3.5882740234649054e-06, "loss": 1.2663, "step": 86000 }, { "epoch": 0.93, "eval_bleu": 10.2884, "eval_gen_len": 24.925, "eval_loss": 3.2030699253082275, "eval_meteor": 0.2588, "eval_runtime": 70.7104, "eval_samples_per_second": 7.354, "eval_steps_per_second": 0.919, "step": 86000 }, { "epoch": 0.93, "learning_rate": 3.3184384073222596e-06, "loss": 1.3497, "step": 86500 }, { "epoch": 0.93, "eval_bleu": 10.1993, "eval_gen_len": 25.0269, "eval_loss": 3.208573579788208, "eval_meteor": 0.2542, "eval_runtime": 73.9279, "eval_samples_per_second": 7.034, "eval_steps_per_second": 0.879, "step": 86500 }, { "epoch": 0.94, "learning_rate": 3.0486027911796137e-06, "loss": 1.3252, "step": 87000 }, { "epoch": 0.94, "eval_bleu": 10.1155, "eval_gen_len": 24.7173, "eval_loss": 3.2008321285247803, "eval_meteor": 0.2575, "eval_runtime": 68.983, "eval_samples_per_second": 7.538, "eval_steps_per_second": 0.942, "step": 87000 }, { "epoch": 0.94, "learning_rate": 2.778767175036968e-06, "loss": 1.277, "step": 87500 }, { "epoch": 0.94, "eval_bleu": 10.3075, "eval_gen_len": 24.5346, "eval_loss": 3.2011780738830566, "eval_meteor": 0.2592, "eval_runtime": 67.2436, "eval_samples_per_second": 7.733, "eval_steps_per_second": 0.967, "step": 87500 }, { "epoch": 0.95, "learning_rate": 2.5089315588943217e-06, "loss": 1.3134, "step": 88000 }, { "epoch": 0.95, "eval_bleu": 10.31, "eval_gen_len": 24.4, "eval_loss": 3.2086844444274902, "eval_meteor": 0.2565, "eval_runtime": 90.4691, "eval_samples_per_second": 5.748, "eval_steps_per_second": 0.718, "step": 88000 }, { "epoch": 0.96, "learning_rate": 2.239095942751676e-06, "loss": 1.371, "step": 88500 }, { "epoch": 0.96, "eval_bleu": 10.3715, "eval_gen_len": 24.3385, "eval_loss": 3.206083297729492, "eval_meteor": 0.2597, "eval_runtime": 67.5985, "eval_samples_per_second": 7.692, "eval_steps_per_second": 0.962, "step": 88500 }, { "epoch": 0.96, "learning_rate": 1.96926032660903e-06, "loss": 1.2951, "step": 89000 }, { "epoch": 0.96, "eval_bleu": 10.415, "eval_gen_len": 24.4481, "eval_loss": 3.2051799297332764, "eval_meteor": 0.2597, "eval_runtime": 70.0734, "eval_samples_per_second": 7.421, "eval_steps_per_second": 0.928, "step": 89000 }, { "epoch": 0.97, "learning_rate": 1.6994247104663838e-06, "loss": 1.2891, "step": 89500 }, { "epoch": 0.97, "eval_bleu": 10.3082, "eval_gen_len": 24.3154, "eval_loss": 3.2073869705200195, "eval_meteor": 0.2566, "eval_runtime": 67.5441, "eval_samples_per_second": 7.699, "eval_steps_per_second": 0.962, "step": 89500 }, { "epoch": 0.97, "learning_rate": 1.429589094323738e-06, "loss": 1.3057, "step": 90000 }, { "epoch": 0.97, "eval_bleu": 10.3117, "eval_gen_len": 24.7635, "eval_loss": 3.204622745513916, "eval_meteor": 0.2582, "eval_runtime": 67.7948, "eval_samples_per_second": 7.67, "eval_steps_per_second": 0.959, "step": 90000 }, { "epoch": 0.98, "learning_rate": 1.1597534781810922e-06, "loss": 1.294, "step": 90500 }, { "epoch": 0.98, "eval_bleu": 10.4707, "eval_gen_len": 24.8135, "eval_loss": 3.204620838165283, "eval_meteor": 0.2593, "eval_runtime": 69.6992, "eval_samples_per_second": 7.461, "eval_steps_per_second": 0.933, "step": 90500 }, { "epoch": 0.98, "learning_rate": 8.899178620384462e-07, "loss": 1.2979, "step": 91000 }, { "epoch": 0.98, "eval_bleu": 10.179, "eval_gen_len": 24.8058, "eval_loss": 3.2011451721191406, "eval_meteor": 0.2588, "eval_runtime": 76.3465, "eval_samples_per_second": 6.811, "eval_steps_per_second": 0.851, "step": 91000 }, { "epoch": 0.99, "learning_rate": 6.200822458958003e-07, "loss": 1.3096, "step": 91500 }, { "epoch": 0.99, "eval_bleu": 10.407, "eval_gen_len": 24.5077, "eval_loss": 3.199586868286133, "eval_meteor": 0.2588, "eval_runtime": 68.5257, "eval_samples_per_second": 7.588, "eval_steps_per_second": 0.949, "step": 91500 }, { "epoch": 0.99, "learning_rate": 3.502466297531544e-07, "loss": 1.3779, "step": 92000 }, { "epoch": 0.99, "eval_bleu": 10.2969, "eval_gen_len": 24.6481, "eval_loss": 3.2000153064727783, "eval_meteor": 0.2567, "eval_runtime": 66.7091, "eval_samples_per_second": 7.795, "eval_steps_per_second": 0.974, "step": 92000 }, { "epoch": 1.0, "learning_rate": 8.041101361050848e-08, "loss": 1.3578, "step": 92500 }, { "epoch": 1.0, "eval_bleu": 10.295, "eval_gen_len": 24.6423, "eval_loss": 3.2001755237579346, "eval_meteor": 0.2573, "eval_runtime": 74.0674, "eval_samples_per_second": 7.021, "eval_steps_per_second": 0.878, "step": 92500 } ], "max_steps": 92649, "num_train_epochs": 1, "total_flos": 1.0046721126039552e+16, "trial_name": null, "trial_params": null }