opus-mt-en-hi-finetuned-en-to-hi / trainer_state.json
nid989's picture
Upload trainer_state.json
54174d9
raw
history blame
76.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9983917797277898,
"global_step": 92500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.9730164383857356e-05,
"loss": 2.1298,
"step": 500
},
{
"epoch": 0.01,
"eval_bleu": 6.9284,
"eval_gen_len": 23.9635,
"eval_loss": 4.2180352210998535,
"eval_meteor": 0.2087,
"eval_runtime": 55.8494,
"eval_samples_per_second": 9.311,
"eval_steps_per_second": 1.164,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 4.946032876771471e-05,
"loss": 2.0441,
"step": 1000
},
{
"epoch": 0.01,
"eval_bleu": 6.89,
"eval_gen_len": 35.1442,
"eval_loss": 4.191596508026123,
"eval_meteor": 0.2133,
"eval_runtime": 113.3213,
"eval_samples_per_second": 4.589,
"eval_steps_per_second": 0.574,
"step": 1000
},
{
"epoch": 0.02,
"learning_rate": 4.9190493151572064e-05,
"loss": 2.0547,
"step": 1500
},
{
"epoch": 0.02,
"eval_bleu": 6.5842,
"eval_gen_len": 26.2904,
"eval_loss": 4.191189289093018,
"eval_meteor": 0.214,
"eval_runtime": 68.7752,
"eval_samples_per_second": 7.561,
"eval_steps_per_second": 0.945,
"step": 1500
},
{
"epoch": 0.02,
"learning_rate": 4.892065753542942e-05,
"loss": 1.9988,
"step": 2000
},
{
"epoch": 0.02,
"eval_bleu": 6.3595,
"eval_gen_len": 43.4173,
"eval_loss": 4.144649982452393,
"eval_meteor": 0.206,
"eval_runtime": 168.802,
"eval_samples_per_second": 3.081,
"eval_steps_per_second": 0.385,
"step": 2000
},
{
"epoch": 0.03,
"learning_rate": 4.865082191928677e-05,
"loss": 1.9714,
"step": 2500
},
{
"epoch": 0.03,
"eval_bleu": 6.0327,
"eval_gen_len": 35.4712,
"eval_loss": 4.147974491119385,
"eval_meteor": 0.2087,
"eval_runtime": 124.6806,
"eval_samples_per_second": 4.171,
"eval_steps_per_second": 0.521,
"step": 2500
},
{
"epoch": 0.03,
"learning_rate": 4.8380986303144125e-05,
"loss": 2.0021,
"step": 3000
},
{
"epoch": 0.03,
"eval_bleu": 6.626,
"eval_gen_len": 28.4385,
"eval_loss": 4.138648509979248,
"eval_meteor": 0.2004,
"eval_runtime": 79.9371,
"eval_samples_per_second": 6.505,
"eval_steps_per_second": 0.813,
"step": 3000
},
{
"epoch": 0.04,
"learning_rate": 4.8111150687001485e-05,
"loss": 1.9541,
"step": 3500
},
{
"epoch": 0.04,
"eval_bleu": 7.3794,
"eval_gen_len": 32.2192,
"eval_loss": 4.102533340454102,
"eval_meteor": 0.2092,
"eval_runtime": 104.2846,
"eval_samples_per_second": 4.986,
"eval_steps_per_second": 0.623,
"step": 3500
},
{
"epoch": 0.04,
"learning_rate": 4.784131507085883e-05,
"loss": 1.9843,
"step": 4000
},
{
"epoch": 0.04,
"eval_bleu": 6.8478,
"eval_gen_len": 28.3692,
"eval_loss": 4.0901265144348145,
"eval_meteor": 0.2026,
"eval_runtime": 84.5232,
"eval_samples_per_second": 6.152,
"eval_steps_per_second": 0.769,
"step": 4000
},
{
"epoch": 0.05,
"learning_rate": 4.7571479454716186e-05,
"loss": 2.0089,
"step": 4500
},
{
"epoch": 0.05,
"eval_bleu": 7.3969,
"eval_gen_len": 32.6115,
"eval_loss": 4.115621089935303,
"eval_meteor": 0.2061,
"eval_runtime": 108.942,
"eval_samples_per_second": 4.773,
"eval_steps_per_second": 0.597,
"step": 4500
},
{
"epoch": 0.05,
"learning_rate": 4.730164383857355e-05,
"loss": 1.8734,
"step": 5000
},
{
"epoch": 0.05,
"eval_bleu": 6.4957,
"eval_gen_len": 53.5423,
"eval_loss": 4.048145771026611,
"eval_meteor": 0.2047,
"eval_runtime": 248.2193,
"eval_samples_per_second": 2.095,
"eval_steps_per_second": 0.262,
"step": 5000
},
{
"epoch": 0.06,
"learning_rate": 4.7031808222430894e-05,
"loss": 1.959,
"step": 5500
},
{
"epoch": 0.06,
"eval_bleu": 6.7135,
"eval_gen_len": 28.0327,
"eval_loss": 4.0486321449279785,
"eval_meteor": 0.1982,
"eval_runtime": 78.1961,
"eval_samples_per_second": 6.65,
"eval_steps_per_second": 0.831,
"step": 5500
},
{
"epoch": 0.06,
"learning_rate": 4.6761972606288254e-05,
"loss": 1.9442,
"step": 6000
},
{
"epoch": 0.06,
"eval_bleu": 7.8174,
"eval_gen_len": 30.275,
"eval_loss": 4.008225440979004,
"eval_meteor": 0.2121,
"eval_runtime": 88.4227,
"eval_samples_per_second": 5.881,
"eval_steps_per_second": 0.735,
"step": 6000
},
{
"epoch": 0.07,
"learning_rate": 4.649213699014561e-05,
"loss": 1.8855,
"step": 6500
},
{
"epoch": 0.07,
"eval_bleu": 7.0941,
"eval_gen_len": 30.8077,
"eval_loss": 4.006565570831299,
"eval_meteor": 0.2117,
"eval_runtime": 90.6701,
"eval_samples_per_second": 5.735,
"eval_steps_per_second": 0.717,
"step": 6500
},
{
"epoch": 0.08,
"learning_rate": 4.6222301374002955e-05,
"loss": 1.873,
"step": 7000
},
{
"epoch": 0.08,
"eval_bleu": 7.0348,
"eval_gen_len": 34.7673,
"eval_loss": 4.008674621582031,
"eval_meteor": 0.2117,
"eval_runtime": 140.0997,
"eval_samples_per_second": 3.712,
"eval_steps_per_second": 0.464,
"step": 7000
},
{
"epoch": 0.08,
"learning_rate": 4.5952465757860316e-05,
"loss": 1.9126,
"step": 7500
},
{
"epoch": 0.08,
"eval_bleu": 7.2617,
"eval_gen_len": 31.5173,
"eval_loss": 3.955263614654541,
"eval_meteor": 0.2188,
"eval_runtime": 135.15,
"eval_samples_per_second": 3.848,
"eval_steps_per_second": 0.481,
"step": 7500
},
{
"epoch": 0.09,
"learning_rate": 4.568263014171767e-05,
"loss": 1.8181,
"step": 8000
},
{
"epoch": 0.09,
"eval_bleu": 7.0348,
"eval_gen_len": 29.8442,
"eval_loss": 3.9835426807403564,
"eval_meteor": 0.211,
"eval_runtime": 112.2816,
"eval_samples_per_second": 4.631,
"eval_steps_per_second": 0.579,
"step": 8000
},
{
"epoch": 0.09,
"learning_rate": 4.541279452557502e-05,
"loss": 1.8553,
"step": 8500
},
{
"epoch": 0.09,
"eval_bleu": 6.9695,
"eval_gen_len": 31.0673,
"eval_loss": 3.934365749359131,
"eval_meteor": 0.2167,
"eval_runtime": 126.5544,
"eval_samples_per_second": 4.109,
"eval_steps_per_second": 0.514,
"step": 8500
},
{
"epoch": 0.1,
"learning_rate": 4.514295890943238e-05,
"loss": 1.8856,
"step": 9000
},
{
"epoch": 0.1,
"eval_bleu": 7.8161,
"eval_gen_len": 25.7885,
"eval_loss": 3.9472157955169678,
"eval_meteor": 0.2202,
"eval_runtime": 90.2192,
"eval_samples_per_second": 5.764,
"eval_steps_per_second": 0.72,
"step": 9000
},
{
"epoch": 0.1,
"learning_rate": 4.487312329328973e-05,
"loss": 1.8375,
"step": 9500
},
{
"epoch": 0.1,
"eval_bleu": 7.3226,
"eval_gen_len": 31.6615,
"eval_loss": 3.9212045669555664,
"eval_meteor": 0.2131,
"eval_runtime": 122.1859,
"eval_samples_per_second": 4.256,
"eval_steps_per_second": 0.532,
"step": 9500
},
{
"epoch": 0.11,
"learning_rate": 4.4603287677147084e-05,
"loss": 1.8245,
"step": 10000
},
{
"epoch": 0.11,
"eval_bleu": 7.5927,
"eval_gen_len": 25.6827,
"eval_loss": 3.9012672901153564,
"eval_meteor": 0.2162,
"eval_runtime": 87.2113,
"eval_samples_per_second": 5.963,
"eval_steps_per_second": 0.745,
"step": 10000
},
{
"epoch": 0.11,
"learning_rate": 4.433345206100444e-05,
"loss": 1.7563,
"step": 10500
},
{
"epoch": 0.11,
"eval_bleu": 7.6493,
"eval_gen_len": 24.7904,
"eval_loss": 3.9165987968444824,
"eval_meteor": 0.2225,
"eval_runtime": 71.5438,
"eval_samples_per_second": 7.268,
"eval_steps_per_second": 0.909,
"step": 10500
},
{
"epoch": 0.12,
"learning_rate": 4.406361644486179e-05,
"loss": 1.7739,
"step": 11000
},
{
"epoch": 0.12,
"eval_bleu": 7.0986,
"eval_gen_len": 32.6269,
"eval_loss": 3.892530679702759,
"eval_meteor": 0.2162,
"eval_runtime": 135.1518,
"eval_samples_per_second": 3.848,
"eval_steps_per_second": 0.481,
"step": 11000
},
{
"epoch": 0.12,
"learning_rate": 4.3793780828719146e-05,
"loss": 1.8389,
"step": 11500
},
{
"epoch": 0.12,
"eval_bleu": 7.355,
"eval_gen_len": 29.1173,
"eval_loss": 3.891714572906494,
"eval_meteor": 0.222,
"eval_runtime": 103.9412,
"eval_samples_per_second": 5.003,
"eval_steps_per_second": 0.625,
"step": 11500
},
{
"epoch": 0.13,
"learning_rate": 4.35239452125765e-05,
"loss": 1.8359,
"step": 12000
},
{
"epoch": 0.13,
"eval_bleu": 7.7276,
"eval_gen_len": 26.5288,
"eval_loss": 3.8721096515655518,
"eval_meteor": 0.2224,
"eval_runtime": 92.6122,
"eval_samples_per_second": 5.615,
"eval_steps_per_second": 0.702,
"step": 12000
},
{
"epoch": 0.13,
"learning_rate": 4.325410959643385e-05,
"loss": 1.774,
"step": 12500
},
{
"epoch": 0.13,
"eval_bleu": 7.3227,
"eval_gen_len": 25.8019,
"eval_loss": 3.852199077606201,
"eval_meteor": 0.2256,
"eval_runtime": 80.7019,
"eval_samples_per_second": 6.443,
"eval_steps_per_second": 0.805,
"step": 12500
},
{
"epoch": 0.14,
"learning_rate": 4.298427398029121e-05,
"loss": 1.8502,
"step": 13000
},
{
"epoch": 0.14,
"eval_bleu": 8.0288,
"eval_gen_len": 25.7327,
"eval_loss": 3.838671922683716,
"eval_meteor": 0.2306,
"eval_runtime": 81.2754,
"eval_samples_per_second": 6.398,
"eval_steps_per_second": 0.8,
"step": 13000
},
{
"epoch": 0.15,
"learning_rate": 4.271443836414856e-05,
"loss": 1.7748,
"step": 13500
},
{
"epoch": 0.15,
"eval_bleu": 7.8972,
"eval_gen_len": 25.8135,
"eval_loss": 3.847898006439209,
"eval_meteor": 0.2289,
"eval_runtime": 77.3621,
"eval_samples_per_second": 6.722,
"eval_steps_per_second": 0.84,
"step": 13500
},
{
"epoch": 0.15,
"learning_rate": 4.244460274800592e-05,
"loss": 1.8212,
"step": 14000
},
{
"epoch": 0.15,
"eval_bleu": 7.617,
"eval_gen_len": 31.2462,
"eval_loss": 3.831773281097412,
"eval_meteor": 0.2249,
"eval_runtime": 133.705,
"eval_samples_per_second": 3.889,
"eval_steps_per_second": 0.486,
"step": 14000
},
{
"epoch": 0.16,
"learning_rate": 4.217476713186327e-05,
"loss": 1.7943,
"step": 14500
},
{
"epoch": 0.16,
"eval_bleu": 6.6778,
"eval_gen_len": 27.7673,
"eval_loss": 3.8318052291870117,
"eval_meteor": 0.2197,
"eval_runtime": 95.8818,
"eval_samples_per_second": 5.423,
"eval_steps_per_second": 0.678,
"step": 14500
},
{
"epoch": 0.16,
"learning_rate": 4.190493151572062e-05,
"loss": 1.7869,
"step": 15000
},
{
"epoch": 0.16,
"eval_bleu": 7.5995,
"eval_gen_len": 25.6712,
"eval_loss": 3.793452262878418,
"eval_meteor": 0.2261,
"eval_runtime": 84.1529,
"eval_samples_per_second": 6.179,
"eval_steps_per_second": 0.772,
"step": 15000
},
{
"epoch": 0.17,
"learning_rate": 4.163509589957798e-05,
"loss": 1.7802,
"step": 15500
},
{
"epoch": 0.17,
"eval_bleu": 6.9484,
"eval_gen_len": 34.8865,
"eval_loss": 3.788137674331665,
"eval_meteor": 0.2214,
"eval_runtime": 159.4169,
"eval_samples_per_second": 3.262,
"eval_steps_per_second": 0.408,
"step": 15500
},
{
"epoch": 0.17,
"learning_rate": 4.136526028343533e-05,
"loss": 1.6783,
"step": 16000
},
{
"epoch": 0.17,
"eval_bleu": 7.1202,
"eval_gen_len": 26.2692,
"eval_loss": 3.7846884727478027,
"eval_meteor": 0.2171,
"eval_runtime": 88.5814,
"eval_samples_per_second": 5.87,
"eval_steps_per_second": 0.734,
"step": 16000
},
{
"epoch": 0.18,
"learning_rate": 4.109542466729269e-05,
"loss": 1.6773,
"step": 16500
},
{
"epoch": 0.18,
"eval_bleu": 8.093,
"eval_gen_len": 26.9346,
"eval_loss": 3.772686243057251,
"eval_meteor": 0.222,
"eval_runtime": 96.6464,
"eval_samples_per_second": 5.38,
"eval_steps_per_second": 0.673,
"step": 16500
},
{
"epoch": 0.18,
"learning_rate": 4.0825589051150044e-05,
"loss": 1.6435,
"step": 17000
},
{
"epoch": 0.18,
"eval_bleu": 7.5486,
"eval_gen_len": 26.6231,
"eval_loss": 3.785611152648926,
"eval_meteor": 0.2136,
"eval_runtime": 89.9499,
"eval_samples_per_second": 5.781,
"eval_steps_per_second": 0.723,
"step": 17000
},
{
"epoch": 0.19,
"learning_rate": 4.055575343500739e-05,
"loss": 1.6892,
"step": 17500
},
{
"epoch": 0.19,
"eval_bleu": 7.2576,
"eval_gen_len": 31.4712,
"eval_loss": 3.778566837310791,
"eval_meteor": 0.2088,
"eval_runtime": 124.5211,
"eval_samples_per_second": 4.176,
"eval_steps_per_second": 0.522,
"step": 17500
},
{
"epoch": 0.19,
"learning_rate": 4.028591781886475e-05,
"loss": 1.7355,
"step": 18000
},
{
"epoch": 0.19,
"eval_bleu": 7.8495,
"eval_gen_len": 27.7846,
"eval_loss": 3.739633798599243,
"eval_meteor": 0.2249,
"eval_runtime": 103.8056,
"eval_samples_per_second": 5.009,
"eval_steps_per_second": 0.626,
"step": 18000
},
{
"epoch": 0.2,
"learning_rate": 4.0016082202722105e-05,
"loss": 1.708,
"step": 18500
},
{
"epoch": 0.2,
"eval_bleu": 8.0278,
"eval_gen_len": 23.0538,
"eval_loss": 3.7668354511260986,
"eval_meteor": 0.2207,
"eval_runtime": 70.0581,
"eval_samples_per_second": 7.422,
"eval_steps_per_second": 0.928,
"step": 18500
},
{
"epoch": 0.21,
"learning_rate": 3.974624658657945e-05,
"loss": 1.6596,
"step": 19000
},
{
"epoch": 0.21,
"eval_bleu": 7.5685,
"eval_gen_len": 28.1115,
"eval_loss": 3.7208786010742188,
"eval_meteor": 0.2287,
"eval_runtime": 102.7576,
"eval_samples_per_second": 5.06,
"eval_steps_per_second": 0.633,
"step": 19000
},
{
"epoch": 0.21,
"learning_rate": 3.947641097043681e-05,
"loss": 1.7048,
"step": 19500
},
{
"epoch": 0.21,
"eval_bleu": 8.1065,
"eval_gen_len": 23.6308,
"eval_loss": 3.7291922569274902,
"eval_meteor": 0.2308,
"eval_runtime": 60.5983,
"eval_samples_per_second": 8.581,
"eval_steps_per_second": 1.073,
"step": 19500
},
{
"epoch": 0.22,
"learning_rate": 3.9206575354294166e-05,
"loss": 1.671,
"step": 20000
},
{
"epoch": 0.22,
"eval_bleu": 8.2527,
"eval_gen_len": 28.6962,
"eval_loss": 3.6946775913238525,
"eval_meteor": 0.2352,
"eval_runtime": 109.8594,
"eval_samples_per_second": 4.733,
"eval_steps_per_second": 0.592,
"step": 20000
},
{
"epoch": 0.22,
"learning_rate": 3.893673973815152e-05,
"loss": 1.7021,
"step": 20500
},
{
"epoch": 0.22,
"eval_bleu": 7.9211,
"eval_gen_len": 25.9538,
"eval_loss": 3.711056709289551,
"eval_meteor": 0.2288,
"eval_runtime": 99.5819,
"eval_samples_per_second": 5.222,
"eval_steps_per_second": 0.653,
"step": 20500
},
{
"epoch": 0.23,
"learning_rate": 3.8666904122008874e-05,
"loss": 1.6431,
"step": 21000
},
{
"epoch": 0.23,
"eval_bleu": 8.1937,
"eval_gen_len": 27.2942,
"eval_loss": 3.7131733894348145,
"eval_meteor": 0.2303,
"eval_runtime": 84.3649,
"eval_samples_per_second": 6.164,
"eval_steps_per_second": 0.77,
"step": 21000
},
{
"epoch": 0.23,
"learning_rate": 3.839706850586623e-05,
"loss": 1.7551,
"step": 21500
},
{
"epoch": 0.23,
"eval_bleu": 8.0326,
"eval_gen_len": 32.4615,
"eval_loss": 3.7119548320770264,
"eval_meteor": 0.2299,
"eval_runtime": 130.1926,
"eval_samples_per_second": 3.994,
"eval_steps_per_second": 0.499,
"step": 21500
},
{
"epoch": 0.24,
"learning_rate": 3.812723288972358e-05,
"loss": 1.672,
"step": 22000
},
{
"epoch": 0.24,
"eval_bleu": 8.1471,
"eval_gen_len": 26.1,
"eval_loss": 3.708371639251709,
"eval_meteor": 0.2274,
"eval_runtime": 101.2958,
"eval_samples_per_second": 5.133,
"eval_steps_per_second": 0.642,
"step": 22000
},
{
"epoch": 0.24,
"learning_rate": 3.7857397273580935e-05,
"loss": 1.6464,
"step": 22500
},
{
"epoch": 0.24,
"eval_bleu": 7.974,
"eval_gen_len": 26.8558,
"eval_loss": 3.677321195602417,
"eval_meteor": 0.2287,
"eval_runtime": 101.1748,
"eval_samples_per_second": 5.14,
"eval_steps_per_second": 0.642,
"step": 22500
},
{
"epoch": 0.25,
"learning_rate": 3.758756165743829e-05,
"loss": 1.6484,
"step": 23000
},
{
"epoch": 0.25,
"eval_bleu": 7.609,
"eval_gen_len": 28.8173,
"eval_loss": 3.6681010723114014,
"eval_meteor": 0.2303,
"eval_runtime": 110.9868,
"eval_samples_per_second": 4.685,
"eval_steps_per_second": 0.586,
"step": 23000
},
{
"epoch": 0.25,
"learning_rate": 3.731772604129564e-05,
"loss": 1.6568,
"step": 23500
},
{
"epoch": 0.25,
"eval_bleu": 7.1469,
"eval_gen_len": 27.2288,
"eval_loss": 3.692307949066162,
"eval_meteor": 0.2255,
"eval_runtime": 94.7667,
"eval_samples_per_second": 5.487,
"eval_steps_per_second": 0.686,
"step": 23500
},
{
"epoch": 0.26,
"learning_rate": 3.7047890425152996e-05,
"loss": 1.6389,
"step": 24000
},
{
"epoch": 0.26,
"eval_bleu": 7.7818,
"eval_gen_len": 24.6692,
"eval_loss": 3.660385847091675,
"eval_meteor": 0.2249,
"eval_runtime": 81.5305,
"eval_samples_per_second": 6.378,
"eval_steps_per_second": 0.797,
"step": 24000
},
{
"epoch": 0.26,
"learning_rate": 3.677805480901036e-05,
"loss": 1.6639,
"step": 24500
},
{
"epoch": 0.26,
"eval_bleu": 8.0474,
"eval_gen_len": 23.9308,
"eval_loss": 3.659407377243042,
"eval_meteor": 0.2284,
"eval_runtime": 73.4805,
"eval_samples_per_second": 7.077,
"eval_steps_per_second": 0.885,
"step": 24500
},
{
"epoch": 0.27,
"learning_rate": 3.6508219192867704e-05,
"loss": 1.6965,
"step": 25000
},
{
"epoch": 0.27,
"eval_bleu": 7.8415,
"eval_gen_len": 29.1077,
"eval_loss": 3.663590431213379,
"eval_meteor": 0.2242,
"eval_runtime": 99.3013,
"eval_samples_per_second": 5.237,
"eval_steps_per_second": 0.655,
"step": 25000
},
{
"epoch": 0.28,
"learning_rate": 3.623838357672506e-05,
"loss": 1.6891,
"step": 25500
},
{
"epoch": 0.28,
"eval_bleu": 7.6988,
"eval_gen_len": 26.9077,
"eval_loss": 3.633793354034424,
"eval_meteor": 0.2278,
"eval_runtime": 87.8219,
"eval_samples_per_second": 5.921,
"eval_steps_per_second": 0.74,
"step": 25500
},
{
"epoch": 0.28,
"learning_rate": 3.596854796058242e-05,
"loss": 1.6518,
"step": 26000
},
{
"epoch": 0.28,
"eval_bleu": 8.0628,
"eval_gen_len": 24.2231,
"eval_loss": 3.6404640674591064,
"eval_meteor": 0.2272,
"eval_runtime": 79.4014,
"eval_samples_per_second": 6.549,
"eval_steps_per_second": 0.819,
"step": 26000
},
{
"epoch": 0.29,
"learning_rate": 3.5698712344439765e-05,
"loss": 1.5915,
"step": 26500
},
{
"epoch": 0.29,
"eval_bleu": 8.4956,
"eval_gen_len": 25.925,
"eval_loss": 3.6168606281280518,
"eval_meteor": 0.2327,
"eval_runtime": 84.1477,
"eval_samples_per_second": 6.18,
"eval_steps_per_second": 0.772,
"step": 26500
},
{
"epoch": 0.29,
"learning_rate": 3.5428876728297126e-05,
"loss": 1.6756,
"step": 27000
},
{
"epoch": 0.29,
"eval_bleu": 7.8645,
"eval_gen_len": 25.7115,
"eval_loss": 3.6374764442443848,
"eval_meteor": 0.2293,
"eval_runtime": 68.4092,
"eval_samples_per_second": 7.601,
"eval_steps_per_second": 0.95,
"step": 27000
},
{
"epoch": 0.3,
"learning_rate": 3.515904111215448e-05,
"loss": 1.6085,
"step": 27500
},
{
"epoch": 0.3,
"eval_bleu": 7.5502,
"eval_gen_len": 27.7,
"eval_loss": 3.6482863426208496,
"eval_meteor": 0.2193,
"eval_runtime": 107.6058,
"eval_samples_per_second": 4.832,
"eval_steps_per_second": 0.604,
"step": 27500
},
{
"epoch": 0.3,
"learning_rate": 3.4889205496011826e-05,
"loss": 1.5557,
"step": 28000
},
{
"epoch": 0.3,
"eval_bleu": 7.4827,
"eval_gen_len": 37.5385,
"eval_loss": 3.6158738136291504,
"eval_meteor": 0.2182,
"eval_runtime": 151.1373,
"eval_samples_per_second": 3.441,
"eval_steps_per_second": 0.43,
"step": 28000
},
{
"epoch": 0.31,
"learning_rate": 3.461936987986919e-05,
"loss": 1.5766,
"step": 28500
},
{
"epoch": 0.31,
"eval_bleu": 7.9417,
"eval_gen_len": 28.1673,
"eval_loss": 3.6332404613494873,
"eval_meteor": 0.2271,
"eval_runtime": 96.6869,
"eval_samples_per_second": 5.378,
"eval_steps_per_second": 0.672,
"step": 28500
},
{
"epoch": 0.31,
"learning_rate": 3.434953426372654e-05,
"loss": 1.6282,
"step": 29000
},
{
"epoch": 0.31,
"eval_bleu": 7.6697,
"eval_gen_len": 27.3019,
"eval_loss": 3.6174378395080566,
"eval_meteor": 0.2208,
"eval_runtime": 89.3468,
"eval_samples_per_second": 5.82,
"eval_steps_per_second": 0.728,
"step": 29000
},
{
"epoch": 0.32,
"learning_rate": 3.407969864758389e-05,
"loss": 1.5901,
"step": 29500
},
{
"epoch": 0.32,
"eval_bleu": 8.211,
"eval_gen_len": 27.4885,
"eval_loss": 3.6009418964385986,
"eval_meteor": 0.2295,
"eval_runtime": 86.4935,
"eval_samples_per_second": 6.012,
"eval_steps_per_second": 0.752,
"step": 29500
},
{
"epoch": 0.32,
"learning_rate": 3.380986303144125e-05,
"loss": 1.618,
"step": 30000
},
{
"epoch": 0.32,
"eval_bleu": 7.8473,
"eval_gen_len": 26.3385,
"eval_loss": 3.597890853881836,
"eval_meteor": 0.2276,
"eval_runtime": 67.1228,
"eval_samples_per_second": 7.747,
"eval_steps_per_second": 0.968,
"step": 30000
},
{
"epoch": 0.33,
"learning_rate": 3.35400274152986e-05,
"loss": 1.5292,
"step": 30500
},
{
"epoch": 0.33,
"eval_bleu": 7.6579,
"eval_gen_len": 28.0481,
"eval_loss": 3.578139543533325,
"eval_meteor": 0.2348,
"eval_runtime": 88.2928,
"eval_samples_per_second": 5.889,
"eval_steps_per_second": 0.736,
"step": 30500
},
{
"epoch": 0.33,
"learning_rate": 3.3270191799155956e-05,
"loss": 1.539,
"step": 31000
},
{
"epoch": 0.33,
"eval_bleu": 7.9101,
"eval_gen_len": 25.1115,
"eval_loss": 3.56876540184021,
"eval_meteor": 0.2294,
"eval_runtime": 78.3276,
"eval_samples_per_second": 6.639,
"eval_steps_per_second": 0.83,
"step": 31000
},
{
"epoch": 0.34,
"learning_rate": 3.300035618301331e-05,
"loss": 1.6394,
"step": 31500
},
{
"epoch": 0.34,
"eval_bleu": 8.1847,
"eval_gen_len": 24.5731,
"eval_loss": 3.5614802837371826,
"eval_meteor": 0.2322,
"eval_runtime": 70.0131,
"eval_samples_per_second": 7.427,
"eval_steps_per_second": 0.928,
"step": 31500
},
{
"epoch": 0.35,
"learning_rate": 3.273052056687066e-05,
"loss": 1.5546,
"step": 32000
},
{
"epoch": 0.35,
"eval_bleu": 7.9997,
"eval_gen_len": 25.1596,
"eval_loss": 3.56968355178833,
"eval_meteor": 0.2339,
"eval_runtime": 68.1359,
"eval_samples_per_second": 7.632,
"eval_steps_per_second": 0.954,
"step": 32000
},
{
"epoch": 0.35,
"learning_rate": 3.246068495072802e-05,
"loss": 1.5173,
"step": 32500
},
{
"epoch": 0.35,
"eval_bleu": 8.1149,
"eval_gen_len": 27.0596,
"eval_loss": 3.5782711505889893,
"eval_meteor": 0.229,
"eval_runtime": 92.9196,
"eval_samples_per_second": 5.596,
"eval_steps_per_second": 0.7,
"step": 32500
},
{
"epoch": 0.36,
"learning_rate": 3.219084933458537e-05,
"loss": 1.6158,
"step": 33000
},
{
"epoch": 0.36,
"eval_bleu": 8.6261,
"eval_gen_len": 25.5346,
"eval_loss": 3.5298867225646973,
"eval_meteor": 0.2419,
"eval_runtime": 64.5727,
"eval_samples_per_second": 8.053,
"eval_steps_per_second": 1.007,
"step": 33000
},
{
"epoch": 0.36,
"learning_rate": 3.1921013718442724e-05,
"loss": 1.5266,
"step": 33500
},
{
"epoch": 0.36,
"eval_bleu": 8.7619,
"eval_gen_len": 31.4077,
"eval_loss": 3.522501230239868,
"eval_meteor": 0.2319,
"eval_runtime": 122.5254,
"eval_samples_per_second": 4.244,
"eval_steps_per_second": 0.531,
"step": 33500
},
{
"epoch": 0.37,
"learning_rate": 3.1651178102300085e-05,
"loss": 1.577,
"step": 34000
},
{
"epoch": 0.37,
"eval_bleu": 8.611,
"eval_gen_len": 26.15,
"eval_loss": 3.542168140411377,
"eval_meteor": 0.2346,
"eval_runtime": 72.838,
"eval_samples_per_second": 7.139,
"eval_steps_per_second": 0.892,
"step": 34000
},
{
"epoch": 0.37,
"learning_rate": 3.138134248615743e-05,
"loss": 1.5787,
"step": 34500
},
{
"epoch": 0.37,
"eval_bleu": 8.0924,
"eval_gen_len": 29.2692,
"eval_loss": 3.5313644409179688,
"eval_meteor": 0.2301,
"eval_runtime": 104.0102,
"eval_samples_per_second": 5.0,
"eval_steps_per_second": 0.625,
"step": 34500
},
{
"epoch": 0.38,
"learning_rate": 3.111150687001479e-05,
"loss": 1.5635,
"step": 35000
},
{
"epoch": 0.38,
"eval_bleu": 8.2094,
"eval_gen_len": 25.4942,
"eval_loss": 3.5328898429870605,
"eval_meteor": 0.2314,
"eval_runtime": 62.8928,
"eval_samples_per_second": 8.268,
"eval_steps_per_second": 1.034,
"step": 35000
},
{
"epoch": 0.38,
"learning_rate": 3.0841671253872146e-05,
"loss": 1.4967,
"step": 35500
},
{
"epoch": 0.38,
"eval_bleu": 7.9224,
"eval_gen_len": 28.1365,
"eval_loss": 3.5347652435302734,
"eval_meteor": 0.2298,
"eval_runtime": 95.4251,
"eval_samples_per_second": 5.449,
"eval_steps_per_second": 0.681,
"step": 35500
},
{
"epoch": 0.39,
"learning_rate": 3.057183563772949e-05,
"loss": 1.5375,
"step": 36000
},
{
"epoch": 0.39,
"eval_bleu": 8.062,
"eval_gen_len": 25.7346,
"eval_loss": 3.5164051055908203,
"eval_meteor": 0.2375,
"eval_runtime": 72.4588,
"eval_samples_per_second": 7.176,
"eval_steps_per_second": 0.897,
"step": 36000
},
{
"epoch": 0.39,
"learning_rate": 3.030200002158685e-05,
"loss": 1.5928,
"step": 36500
},
{
"epoch": 0.39,
"eval_bleu": 8.4845,
"eval_gen_len": 25.8462,
"eval_loss": 3.5118257999420166,
"eval_meteor": 0.2439,
"eval_runtime": 69.4765,
"eval_samples_per_second": 7.485,
"eval_steps_per_second": 0.936,
"step": 36500
},
{
"epoch": 0.4,
"learning_rate": 3.0032164405444208e-05,
"loss": 1.582,
"step": 37000
},
{
"epoch": 0.4,
"eval_bleu": 8.2526,
"eval_gen_len": 25.9327,
"eval_loss": 3.505610227584839,
"eval_meteor": 0.2395,
"eval_runtime": 60.8773,
"eval_samples_per_second": 8.542,
"eval_steps_per_second": 1.068,
"step": 37000
},
{
"epoch": 0.4,
"learning_rate": 2.9762328789301558e-05,
"loss": 1.4958,
"step": 37500
},
{
"epoch": 0.4,
"eval_bleu": 8.3315,
"eval_gen_len": 28.5365,
"eval_loss": 3.517829418182373,
"eval_meteor": 0.2271,
"eval_runtime": 94.0897,
"eval_samples_per_second": 5.527,
"eval_steps_per_second": 0.691,
"step": 37500
},
{
"epoch": 0.41,
"learning_rate": 2.9492493173158915e-05,
"loss": 1.5524,
"step": 38000
},
{
"epoch": 0.41,
"eval_bleu": 8.765,
"eval_gen_len": 26.2865,
"eval_loss": 3.506021738052368,
"eval_meteor": 0.2364,
"eval_runtime": 79.4441,
"eval_samples_per_second": 6.545,
"eval_steps_per_second": 0.818,
"step": 38000
},
{
"epoch": 0.42,
"learning_rate": 2.922265755701627e-05,
"loss": 1.4689,
"step": 38500
},
{
"epoch": 0.42,
"eval_bleu": 8.5295,
"eval_gen_len": 29.5769,
"eval_loss": 3.5012190341949463,
"eval_meteor": 0.2327,
"eval_runtime": 103.4617,
"eval_samples_per_second": 5.026,
"eval_steps_per_second": 0.628,
"step": 38500
},
{
"epoch": 0.42,
"learning_rate": 2.895282194087362e-05,
"loss": 1.5345,
"step": 39000
},
{
"epoch": 0.42,
"eval_bleu": 8.7423,
"eval_gen_len": 25.3269,
"eval_loss": 3.498300313949585,
"eval_meteor": 0.2342,
"eval_runtime": 73.6072,
"eval_samples_per_second": 7.065,
"eval_steps_per_second": 0.883,
"step": 39000
},
{
"epoch": 0.43,
"learning_rate": 2.8682986324730976e-05,
"loss": 1.5282,
"step": 39500
},
{
"epoch": 0.43,
"eval_bleu": 8.9296,
"eval_gen_len": 25.1173,
"eval_loss": 3.4799299240112305,
"eval_meteor": 0.2425,
"eval_runtime": 71.1898,
"eval_samples_per_second": 7.304,
"eval_steps_per_second": 0.913,
"step": 39500
},
{
"epoch": 0.43,
"learning_rate": 2.8413150708588333e-05,
"loss": 1.5021,
"step": 40000
},
{
"epoch": 0.43,
"eval_bleu": 7.9284,
"eval_gen_len": 26.2962,
"eval_loss": 3.4839093685150146,
"eval_meteor": 0.236,
"eval_runtime": 77.544,
"eval_samples_per_second": 6.706,
"eval_steps_per_second": 0.838,
"step": 40000
},
{
"epoch": 0.44,
"learning_rate": 2.8143315092445684e-05,
"loss": 1.5072,
"step": 40500
},
{
"epoch": 0.44,
"eval_bleu": 8.7855,
"eval_gen_len": 25.425,
"eval_loss": 3.4775073528289795,
"eval_meteor": 0.238,
"eval_runtime": 84.0571,
"eval_samples_per_second": 6.186,
"eval_steps_per_second": 0.773,
"step": 40500
},
{
"epoch": 0.44,
"learning_rate": 2.7873479476303038e-05,
"loss": 1.5348,
"step": 41000
},
{
"epoch": 0.44,
"eval_bleu": 8.3592,
"eval_gen_len": 25.4673,
"eval_loss": 3.4518544673919678,
"eval_meteor": 0.24,
"eval_runtime": 81.9583,
"eval_samples_per_second": 6.345,
"eval_steps_per_second": 0.793,
"step": 41000
},
{
"epoch": 0.45,
"learning_rate": 2.7603643860160395e-05,
"loss": 1.5236,
"step": 41500
},
{
"epoch": 0.45,
"eval_bleu": 8.589,
"eval_gen_len": 28.1846,
"eval_loss": 3.466686248779297,
"eval_meteor": 0.2352,
"eval_runtime": 91.2864,
"eval_samples_per_second": 5.696,
"eval_steps_per_second": 0.712,
"step": 41500
},
{
"epoch": 0.45,
"learning_rate": 2.7333808244017745e-05,
"loss": 1.4695,
"step": 42000
},
{
"epoch": 0.45,
"eval_bleu": 8.5467,
"eval_gen_len": 26.9385,
"eval_loss": 3.4435806274414062,
"eval_meteor": 0.2396,
"eval_runtime": 89.9168,
"eval_samples_per_second": 5.783,
"eval_steps_per_second": 0.723,
"step": 42000
},
{
"epoch": 0.46,
"learning_rate": 2.70639726278751e-05,
"loss": 1.4733,
"step": 42500
},
{
"epoch": 0.46,
"eval_bleu": 8.3771,
"eval_gen_len": 26.6096,
"eval_loss": 3.455449342727661,
"eval_meteor": 0.2347,
"eval_runtime": 85.0784,
"eval_samples_per_second": 6.112,
"eval_steps_per_second": 0.764,
"step": 42500
},
{
"epoch": 0.46,
"learning_rate": 2.6794137011732456e-05,
"loss": 1.5398,
"step": 43000
},
{
"epoch": 0.46,
"eval_bleu": 9.2126,
"eval_gen_len": 24.6538,
"eval_loss": 3.436018943786621,
"eval_meteor": 0.2434,
"eval_runtime": 81.8122,
"eval_samples_per_second": 6.356,
"eval_steps_per_second": 0.795,
"step": 43000
},
{
"epoch": 0.47,
"learning_rate": 2.6524301395589806e-05,
"loss": 1.5596,
"step": 43500
},
{
"epoch": 0.47,
"eval_bleu": 9.2745,
"eval_gen_len": 26.1,
"eval_loss": 3.425743579864502,
"eval_meteor": 0.2521,
"eval_runtime": 70.71,
"eval_samples_per_second": 7.354,
"eval_steps_per_second": 0.919,
"step": 43500
},
{
"epoch": 0.47,
"learning_rate": 2.6254465779447164e-05,
"loss": 1.505,
"step": 44000
},
{
"epoch": 0.47,
"eval_bleu": 8.8478,
"eval_gen_len": 24.9731,
"eval_loss": 3.428138017654419,
"eval_meteor": 0.2421,
"eval_runtime": 81.3515,
"eval_samples_per_second": 6.392,
"eval_steps_per_second": 0.799,
"step": 44000
},
{
"epoch": 0.48,
"learning_rate": 2.5984630163304517e-05,
"loss": 1.48,
"step": 44500
},
{
"epoch": 0.48,
"eval_bleu": 9.237,
"eval_gen_len": 24.2058,
"eval_loss": 3.465346336364746,
"eval_meteor": 0.2392,
"eval_runtime": 78.2893,
"eval_samples_per_second": 6.642,
"eval_steps_per_second": 0.83,
"step": 44500
},
{
"epoch": 0.49,
"learning_rate": 2.5714794547161868e-05,
"loss": 1.5167,
"step": 45000
},
{
"epoch": 0.49,
"eval_bleu": 8.8344,
"eval_gen_len": 25.8192,
"eval_loss": 3.440796375274658,
"eval_meteor": 0.2355,
"eval_runtime": 73.3566,
"eval_samples_per_second": 7.089,
"eval_steps_per_second": 0.886,
"step": 45000
},
{
"epoch": 0.49,
"learning_rate": 2.5444958931019225e-05,
"loss": 1.4691,
"step": 45500
},
{
"epoch": 0.49,
"eval_bleu": 9.0831,
"eval_gen_len": 25.8577,
"eval_loss": 3.424842596054077,
"eval_meteor": 0.2449,
"eval_runtime": 83.8868,
"eval_samples_per_second": 6.199,
"eval_steps_per_second": 0.775,
"step": 45500
},
{
"epoch": 0.5,
"learning_rate": 2.5175123314876582e-05,
"loss": 1.4734,
"step": 46000
},
{
"epoch": 0.5,
"eval_bleu": 8.6622,
"eval_gen_len": 26.0385,
"eval_loss": 3.406123399734497,
"eval_meteor": 0.2472,
"eval_runtime": 87.066,
"eval_samples_per_second": 5.972,
"eval_steps_per_second": 0.747,
"step": 46000
},
{
"epoch": 0.5,
"learning_rate": 2.4905287698733932e-05,
"loss": 1.4354,
"step": 46500
},
{
"epoch": 0.5,
"eval_bleu": 9.3409,
"eval_gen_len": 26.0077,
"eval_loss": 3.4149692058563232,
"eval_meteor": 0.2462,
"eval_runtime": 75.3366,
"eval_samples_per_second": 6.902,
"eval_steps_per_second": 0.863,
"step": 46500
},
{
"epoch": 0.51,
"learning_rate": 2.4635452082591286e-05,
"loss": 1.4841,
"step": 47000
},
{
"epoch": 0.51,
"eval_bleu": 8.3645,
"eval_gen_len": 27.2692,
"eval_loss": 3.410163640975952,
"eval_meteor": 0.2377,
"eval_runtime": 88.8645,
"eval_samples_per_second": 5.852,
"eval_steps_per_second": 0.731,
"step": 47000
},
{
"epoch": 0.51,
"learning_rate": 2.436561646644864e-05,
"loss": 1.4163,
"step": 47500
},
{
"epoch": 0.51,
"eval_bleu": 8.7482,
"eval_gen_len": 25.1442,
"eval_loss": 3.4322900772094727,
"eval_meteor": 0.2329,
"eval_runtime": 80.4665,
"eval_samples_per_second": 6.462,
"eval_steps_per_second": 0.808,
"step": 47500
},
{
"epoch": 0.52,
"learning_rate": 2.4095780850305994e-05,
"loss": 1.4859,
"step": 48000
},
{
"epoch": 0.52,
"eval_bleu": 9.0356,
"eval_gen_len": 27.9096,
"eval_loss": 3.409453868865967,
"eval_meteor": 0.238,
"eval_runtime": 97.3658,
"eval_samples_per_second": 5.341,
"eval_steps_per_second": 0.668,
"step": 48000
},
{
"epoch": 0.52,
"learning_rate": 2.382594523416335e-05,
"loss": 1.5179,
"step": 48500
},
{
"epoch": 0.52,
"eval_bleu": 8.9689,
"eval_gen_len": 25.9173,
"eval_loss": 3.4041757583618164,
"eval_meteor": 0.2426,
"eval_runtime": 66.1422,
"eval_samples_per_second": 7.862,
"eval_steps_per_second": 0.983,
"step": 48500
},
{
"epoch": 0.53,
"learning_rate": 2.35561096180207e-05,
"loss": 1.4451,
"step": 49000
},
{
"epoch": 0.53,
"eval_bleu": 9.0897,
"eval_gen_len": 24.6654,
"eval_loss": 3.383331537246704,
"eval_meteor": 0.244,
"eval_runtime": 60.7417,
"eval_samples_per_second": 8.561,
"eval_steps_per_second": 1.07,
"step": 49000
},
{
"epoch": 0.53,
"learning_rate": 2.3286274001878055e-05,
"loss": 1.4377,
"step": 49500
},
{
"epoch": 0.53,
"eval_bleu": 9.0223,
"eval_gen_len": 25.3788,
"eval_loss": 3.3942527770996094,
"eval_meteor": 0.2481,
"eval_runtime": 71.1329,
"eval_samples_per_second": 7.31,
"eval_steps_per_second": 0.914,
"step": 49500
},
{
"epoch": 0.54,
"learning_rate": 2.3016438385735412e-05,
"loss": 1.4162,
"step": 50000
},
{
"epoch": 0.54,
"eval_bleu": 9.5501,
"eval_gen_len": 25.05,
"eval_loss": 3.3857383728027344,
"eval_meteor": 0.2468,
"eval_runtime": 77.3856,
"eval_samples_per_second": 6.72,
"eval_steps_per_second": 0.84,
"step": 50000
},
{
"epoch": 0.55,
"learning_rate": 2.2746602769592766e-05,
"loss": 1.5209,
"step": 50500
},
{
"epoch": 0.55,
"eval_bleu": 8.9994,
"eval_gen_len": 25.1058,
"eval_loss": 3.3768198490142822,
"eval_meteor": 0.2424,
"eval_runtime": 72.911,
"eval_samples_per_second": 7.132,
"eval_steps_per_second": 0.891,
"step": 50500
},
{
"epoch": 0.55,
"learning_rate": 2.247676715345012e-05,
"loss": 1.426,
"step": 51000
},
{
"epoch": 0.55,
"eval_bleu": 9.2556,
"eval_gen_len": 25.3865,
"eval_loss": 3.3841092586517334,
"eval_meteor": 0.2443,
"eval_runtime": 62.6291,
"eval_samples_per_second": 8.303,
"eval_steps_per_second": 1.038,
"step": 51000
},
{
"epoch": 0.56,
"learning_rate": 2.2206931537307473e-05,
"loss": 1.4453,
"step": 51500
},
{
"epoch": 0.56,
"eval_bleu": 9.583,
"eval_gen_len": 26.3269,
"eval_loss": 3.3895211219787598,
"eval_meteor": 0.2448,
"eval_runtime": 85.3314,
"eval_samples_per_second": 6.094,
"eval_steps_per_second": 0.762,
"step": 51500
},
{
"epoch": 0.56,
"learning_rate": 2.1937095921164827e-05,
"loss": 1.4162,
"step": 52000
},
{
"epoch": 0.56,
"eval_bleu": 9.0803,
"eval_gen_len": 25.5231,
"eval_loss": 3.385866165161133,
"eval_meteor": 0.2413,
"eval_runtime": 75.8979,
"eval_samples_per_second": 6.851,
"eval_steps_per_second": 0.856,
"step": 52000
},
{
"epoch": 0.57,
"learning_rate": 2.1667260305022184e-05,
"loss": 1.4107,
"step": 52500
},
{
"epoch": 0.57,
"eval_bleu": 8.9249,
"eval_gen_len": 24.8615,
"eval_loss": 3.3849904537200928,
"eval_meteor": 0.241,
"eval_runtime": 72.4747,
"eval_samples_per_second": 7.175,
"eval_steps_per_second": 0.897,
"step": 52500
},
{
"epoch": 0.57,
"learning_rate": 2.1397424688879535e-05,
"loss": 1.4474,
"step": 53000
},
{
"epoch": 0.57,
"eval_bleu": 9.5048,
"eval_gen_len": 23.975,
"eval_loss": 3.3705201148986816,
"eval_meteor": 0.2474,
"eval_runtime": 76.3998,
"eval_samples_per_second": 6.806,
"eval_steps_per_second": 0.851,
"step": 53000
},
{
"epoch": 0.58,
"learning_rate": 2.1127589072736888e-05,
"loss": 1.4336,
"step": 53500
},
{
"epoch": 0.58,
"eval_bleu": 9.7945,
"eval_gen_len": 24.3885,
"eval_loss": 3.349461078643799,
"eval_meteor": 0.2549,
"eval_runtime": 69.7201,
"eval_samples_per_second": 7.458,
"eval_steps_per_second": 0.932,
"step": 53500
},
{
"epoch": 0.58,
"learning_rate": 2.0857753456594245e-05,
"loss": 1.4261,
"step": 54000
},
{
"epoch": 0.58,
"eval_bleu": 9.5374,
"eval_gen_len": 24.2692,
"eval_loss": 3.369290351867676,
"eval_meteor": 0.2444,
"eval_runtime": 67.4768,
"eval_samples_per_second": 7.706,
"eval_steps_per_second": 0.963,
"step": 54000
},
{
"epoch": 0.59,
"learning_rate": 2.05879178404516e-05,
"loss": 1.409,
"step": 54500
},
{
"epoch": 0.59,
"eval_bleu": 9.3034,
"eval_gen_len": 24.625,
"eval_loss": 3.3803343772888184,
"eval_meteor": 0.2436,
"eval_runtime": 68.3273,
"eval_samples_per_second": 7.61,
"eval_steps_per_second": 0.951,
"step": 54500
},
{
"epoch": 0.59,
"learning_rate": 2.0318082224308953e-05,
"loss": 1.4364,
"step": 55000
},
{
"epoch": 0.59,
"eval_bleu": 9.6554,
"eval_gen_len": 25.5654,
"eval_loss": 3.371992349624634,
"eval_meteor": 0.2457,
"eval_runtime": 82.9163,
"eval_samples_per_second": 6.271,
"eval_steps_per_second": 0.784,
"step": 55000
},
{
"epoch": 0.6,
"learning_rate": 2.0048246608166307e-05,
"loss": 1.4184,
"step": 55500
},
{
"epoch": 0.6,
"eval_bleu": 9.4698,
"eval_gen_len": 27.6,
"eval_loss": 3.3737027645111084,
"eval_meteor": 0.2414,
"eval_runtime": 99.5873,
"eval_samples_per_second": 5.222,
"eval_steps_per_second": 0.653,
"step": 55500
},
{
"epoch": 0.6,
"learning_rate": 1.977841099202366e-05,
"loss": 1.4417,
"step": 56000
},
{
"epoch": 0.6,
"eval_bleu": 9.3662,
"eval_gen_len": 25.1481,
"eval_loss": 3.3708438873291016,
"eval_meteor": 0.2446,
"eval_runtime": 87.8071,
"eval_samples_per_second": 5.922,
"eval_steps_per_second": 0.74,
"step": 56000
},
{
"epoch": 0.61,
"learning_rate": 1.9508575375881014e-05,
"loss": 1.3421,
"step": 56500
},
{
"epoch": 0.61,
"eval_bleu": 9.3448,
"eval_gen_len": 24.8404,
"eval_loss": 3.3396267890930176,
"eval_meteor": 0.2478,
"eval_runtime": 73.0359,
"eval_samples_per_second": 7.12,
"eval_steps_per_second": 0.89,
"step": 56500
},
{
"epoch": 0.62,
"learning_rate": 1.9238739759738368e-05,
"loss": 1.396,
"step": 57000
},
{
"epoch": 0.62,
"eval_bleu": 9.0765,
"eval_gen_len": 24.7731,
"eval_loss": 3.3500242233276367,
"eval_meteor": 0.2413,
"eval_runtime": 71.4703,
"eval_samples_per_second": 7.276,
"eval_steps_per_second": 0.909,
"step": 57000
},
{
"epoch": 0.62,
"learning_rate": 1.8968904143595722e-05,
"loss": 1.4152,
"step": 57500
},
{
"epoch": 0.62,
"eval_bleu": 9.4934,
"eval_gen_len": 24.7885,
"eval_loss": 3.335568904876709,
"eval_meteor": 0.2496,
"eval_runtime": 67.659,
"eval_samples_per_second": 7.686,
"eval_steps_per_second": 0.961,
"step": 57500
},
{
"epoch": 0.63,
"learning_rate": 1.8699068527453075e-05,
"loss": 1.4245,
"step": 58000
},
{
"epoch": 0.63,
"eval_bleu": 9.7477,
"eval_gen_len": 24.0096,
"eval_loss": 3.344538688659668,
"eval_meteor": 0.2494,
"eval_runtime": 69.4763,
"eval_samples_per_second": 7.485,
"eval_steps_per_second": 0.936,
"step": 58000
},
{
"epoch": 0.63,
"learning_rate": 1.8429232911310433e-05,
"loss": 1.4661,
"step": 58500
},
{
"epoch": 0.63,
"eval_bleu": 9.2664,
"eval_gen_len": 24.1404,
"eval_loss": 3.3510961532592773,
"eval_meteor": 0.2416,
"eval_runtime": 71.3645,
"eval_samples_per_second": 7.287,
"eval_steps_per_second": 0.911,
"step": 58500
},
{
"epoch": 0.64,
"learning_rate": 1.8159397295167786e-05,
"loss": 1.4219,
"step": 59000
},
{
"epoch": 0.64,
"eval_bleu": 9.5738,
"eval_gen_len": 24.6769,
"eval_loss": 3.3354907035827637,
"eval_meteor": 0.2436,
"eval_runtime": 76.9335,
"eval_samples_per_second": 6.759,
"eval_steps_per_second": 0.845,
"step": 59000
},
{
"epoch": 0.64,
"learning_rate": 1.7889561679025137e-05,
"loss": 1.4051,
"step": 59500
},
{
"epoch": 0.64,
"eval_bleu": 9.4506,
"eval_gen_len": 24.2615,
"eval_loss": 3.3152685165405273,
"eval_meteor": 0.2495,
"eval_runtime": 68.6589,
"eval_samples_per_second": 7.574,
"eval_steps_per_second": 0.947,
"step": 59500
},
{
"epoch": 0.65,
"learning_rate": 1.7619726062882494e-05,
"loss": 1.4743,
"step": 60000
},
{
"epoch": 0.65,
"eval_bleu": 9.6423,
"eval_gen_len": 24.4865,
"eval_loss": 3.3143715858459473,
"eval_meteor": 0.2463,
"eval_runtime": 71.0116,
"eval_samples_per_second": 7.323,
"eval_steps_per_second": 0.915,
"step": 60000
},
{
"epoch": 0.65,
"learning_rate": 1.7349890446739848e-05,
"loss": 1.3961,
"step": 60500
},
{
"epoch": 0.65,
"eval_bleu": 9.7681,
"eval_gen_len": 24.4615,
"eval_loss": 3.3041951656341553,
"eval_meteor": 0.2526,
"eval_runtime": 66.5982,
"eval_samples_per_second": 7.808,
"eval_steps_per_second": 0.976,
"step": 60500
},
{
"epoch": 0.66,
"learning_rate": 1.70800548305972e-05,
"loss": 1.447,
"step": 61000
},
{
"epoch": 0.66,
"eval_bleu": 9.6435,
"eval_gen_len": 24.6365,
"eval_loss": 3.3025214672088623,
"eval_meteor": 0.2507,
"eval_runtime": 73.4548,
"eval_samples_per_second": 7.079,
"eval_steps_per_second": 0.885,
"step": 61000
},
{
"epoch": 0.66,
"learning_rate": 1.6810219214454555e-05,
"loss": 1.4102,
"step": 61500
},
{
"epoch": 0.66,
"eval_bleu": 9.6897,
"eval_gen_len": 24.8731,
"eval_loss": 3.3014609813690186,
"eval_meteor": 0.25,
"eval_runtime": 77.7893,
"eval_samples_per_second": 6.685,
"eval_steps_per_second": 0.836,
"step": 61500
},
{
"epoch": 0.67,
"learning_rate": 1.654038359831191e-05,
"loss": 1.3739,
"step": 62000
},
{
"epoch": 0.67,
"eval_bleu": 9.2607,
"eval_gen_len": 24.7885,
"eval_loss": 3.313209056854248,
"eval_meteor": 0.2447,
"eval_runtime": 70.1449,
"eval_samples_per_second": 7.413,
"eval_steps_per_second": 0.927,
"step": 62000
},
{
"epoch": 0.67,
"learning_rate": 1.6270547982169263e-05,
"loss": 1.3794,
"step": 62500
},
{
"epoch": 0.67,
"eval_bleu": 9.6355,
"eval_gen_len": 24.4365,
"eval_loss": 3.3016440868377686,
"eval_meteor": 0.2488,
"eval_runtime": 72.723,
"eval_samples_per_second": 7.15,
"eval_steps_per_second": 0.894,
"step": 62500
},
{
"epoch": 0.68,
"learning_rate": 1.600071236602662e-05,
"loss": 1.3819,
"step": 63000
},
{
"epoch": 0.68,
"eval_bleu": 9.9125,
"eval_gen_len": 24.5038,
"eval_loss": 3.298640489578247,
"eval_meteor": 0.248,
"eval_runtime": 71.6443,
"eval_samples_per_second": 7.258,
"eval_steps_per_second": 0.907,
"step": 63000
},
{
"epoch": 0.69,
"learning_rate": 1.573087674988397e-05,
"loss": 1.3598,
"step": 63500
},
{
"epoch": 0.69,
"eval_bleu": 10.2032,
"eval_gen_len": 24.6558,
"eval_loss": 3.2921295166015625,
"eval_meteor": 0.2534,
"eval_runtime": 74.7575,
"eval_samples_per_second": 6.956,
"eval_steps_per_second": 0.869,
"step": 63500
},
{
"epoch": 0.69,
"learning_rate": 1.5461041133741324e-05,
"loss": 1.3846,
"step": 64000
},
{
"epoch": 0.69,
"eval_bleu": 9.9342,
"eval_gen_len": 23.9923,
"eval_loss": 3.2913033962249756,
"eval_meteor": 0.2468,
"eval_runtime": 68.6789,
"eval_samples_per_second": 7.571,
"eval_steps_per_second": 0.946,
"step": 64000
},
{
"epoch": 0.7,
"learning_rate": 1.5191205517598681e-05,
"loss": 1.4024,
"step": 64500
},
{
"epoch": 0.7,
"eval_bleu": 9.445,
"eval_gen_len": 25.3865,
"eval_loss": 3.2889387607574463,
"eval_meteor": 0.2426,
"eval_runtime": 78.1546,
"eval_samples_per_second": 6.653,
"eval_steps_per_second": 0.832,
"step": 64500
},
{
"epoch": 0.7,
"learning_rate": 1.4921369901456033e-05,
"loss": 1.3775,
"step": 65000
},
{
"epoch": 0.7,
"eval_bleu": 9.711,
"eval_gen_len": 25.5769,
"eval_loss": 3.2912492752075195,
"eval_meteor": 0.2459,
"eval_runtime": 102.461,
"eval_samples_per_second": 5.075,
"eval_steps_per_second": 0.634,
"step": 65000
},
{
"epoch": 0.71,
"learning_rate": 1.4651534285313387e-05,
"loss": 1.3192,
"step": 65500
},
{
"epoch": 0.71,
"eval_bleu": 9.5794,
"eval_gen_len": 25.2038,
"eval_loss": 3.288285970687866,
"eval_meteor": 0.2438,
"eval_runtime": 81.3849,
"eval_samples_per_second": 6.389,
"eval_steps_per_second": 0.799,
"step": 65500
},
{
"epoch": 0.71,
"learning_rate": 1.4381698669170742e-05,
"loss": 1.3426,
"step": 66000
},
{
"epoch": 0.71,
"eval_bleu": 9.9763,
"eval_gen_len": 25.1942,
"eval_loss": 3.282227039337158,
"eval_meteor": 0.2473,
"eval_runtime": 81.2601,
"eval_samples_per_second": 6.399,
"eval_steps_per_second": 0.8,
"step": 66000
},
{
"epoch": 0.72,
"learning_rate": 1.4111863053028096e-05,
"loss": 1.3669,
"step": 66500
},
{
"epoch": 0.72,
"eval_bleu": 9.4662,
"eval_gen_len": 23.9865,
"eval_loss": 3.2904016971588135,
"eval_meteor": 0.2483,
"eval_runtime": 66.1145,
"eval_samples_per_second": 7.865,
"eval_steps_per_second": 0.983,
"step": 66500
},
{
"epoch": 0.72,
"learning_rate": 1.384202743688545e-05,
"loss": 1.3624,
"step": 67000
},
{
"epoch": 0.72,
"eval_bleu": 9.5602,
"eval_gen_len": 24.1096,
"eval_loss": 3.299604654312134,
"eval_meteor": 0.2452,
"eval_runtime": 72.3193,
"eval_samples_per_second": 7.19,
"eval_steps_per_second": 0.899,
"step": 67000
},
{
"epoch": 0.73,
"learning_rate": 1.3572191820742805e-05,
"loss": 1.3435,
"step": 67500
},
{
"epoch": 0.73,
"eval_bleu": 9.756,
"eval_gen_len": 25.4038,
"eval_loss": 3.2783830165863037,
"eval_meteor": 0.2482,
"eval_runtime": 64.4382,
"eval_samples_per_second": 8.07,
"eval_steps_per_second": 1.009,
"step": 67500
},
{
"epoch": 0.73,
"learning_rate": 1.3302356204600157e-05,
"loss": 1.3668,
"step": 68000
},
{
"epoch": 0.73,
"eval_bleu": 9.9278,
"eval_gen_len": 24.3115,
"eval_loss": 3.2847084999084473,
"eval_meteor": 0.246,
"eval_runtime": 74.6646,
"eval_samples_per_second": 6.964,
"eval_steps_per_second": 0.871,
"step": 68000
},
{
"epoch": 0.74,
"learning_rate": 1.3032520588457511e-05,
"loss": 1.4025,
"step": 68500
},
{
"epoch": 0.74,
"eval_bleu": 9.8915,
"eval_gen_len": 24.4904,
"eval_loss": 3.2775745391845703,
"eval_meteor": 0.2502,
"eval_runtime": 76.4639,
"eval_samples_per_second": 6.801,
"eval_steps_per_second": 0.85,
"step": 68500
},
{
"epoch": 0.74,
"learning_rate": 1.2762684972314867e-05,
"loss": 1.334,
"step": 69000
},
{
"epoch": 0.74,
"eval_bleu": 10.1572,
"eval_gen_len": 25.5462,
"eval_loss": 3.2732129096984863,
"eval_meteor": 0.2489,
"eval_runtime": 88.0016,
"eval_samples_per_second": 5.909,
"eval_steps_per_second": 0.739,
"step": 69000
},
{
"epoch": 0.75,
"learning_rate": 1.249284935617222e-05,
"loss": 1.407,
"step": 69500
},
{
"epoch": 0.75,
"eval_bleu": 9.9716,
"eval_gen_len": 24.8692,
"eval_loss": 3.2843334674835205,
"eval_meteor": 0.2503,
"eval_runtime": 76.3651,
"eval_samples_per_second": 6.809,
"eval_steps_per_second": 0.851,
"step": 69500
},
{
"epoch": 0.76,
"learning_rate": 1.2223013740029574e-05,
"loss": 1.449,
"step": 70000
},
{
"epoch": 0.76,
"eval_bleu": 9.7516,
"eval_gen_len": 24.6115,
"eval_loss": 3.26999831199646,
"eval_meteor": 0.2446,
"eval_runtime": 57.4151,
"eval_samples_per_second": 9.057,
"eval_steps_per_second": 1.132,
"step": 70000
},
{
"epoch": 0.76,
"learning_rate": 1.1953178123886928e-05,
"loss": 1.3357,
"step": 70500
},
{
"epoch": 0.76,
"eval_bleu": 9.7942,
"eval_gen_len": 25.0538,
"eval_loss": 3.2684452533721924,
"eval_meteor": 0.2478,
"eval_runtime": 69.1367,
"eval_samples_per_second": 7.521,
"eval_steps_per_second": 0.94,
"step": 70500
},
{
"epoch": 0.77,
"learning_rate": 1.1683342507744283e-05,
"loss": 1.3437,
"step": 71000
},
{
"epoch": 0.77,
"eval_bleu": 9.9427,
"eval_gen_len": 24.1538,
"eval_loss": 3.2602343559265137,
"eval_meteor": 0.2486,
"eval_runtime": 72.8541,
"eval_samples_per_second": 7.138,
"eval_steps_per_second": 0.892,
"step": 71000
},
{
"epoch": 0.77,
"learning_rate": 1.1413506891601637e-05,
"loss": 1.3518,
"step": 71500
},
{
"epoch": 0.77,
"eval_bleu": 9.8195,
"eval_gen_len": 25.35,
"eval_loss": 3.2659668922424316,
"eval_meteor": 0.2466,
"eval_runtime": 68.8561,
"eval_samples_per_second": 7.552,
"eval_steps_per_second": 0.944,
"step": 71500
},
{
"epoch": 0.78,
"learning_rate": 1.1143671275458991e-05,
"loss": 1.3762,
"step": 72000
},
{
"epoch": 0.78,
"eval_bleu": 10.0553,
"eval_gen_len": 24.8635,
"eval_loss": 3.257364511489868,
"eval_meteor": 0.2516,
"eval_runtime": 81.5604,
"eval_samples_per_second": 6.376,
"eval_steps_per_second": 0.797,
"step": 72000
},
{
"epoch": 0.78,
"learning_rate": 1.0873835659316345e-05,
"loss": 1.3404,
"step": 72500
},
{
"epoch": 0.78,
"eval_bleu": 9.9627,
"eval_gen_len": 23.9154,
"eval_loss": 3.2508692741394043,
"eval_meteor": 0.2509,
"eval_runtime": 55.132,
"eval_samples_per_second": 9.432,
"eval_steps_per_second": 1.179,
"step": 72500
},
{
"epoch": 0.79,
"learning_rate": 1.06040000431737e-05,
"loss": 1.3548,
"step": 73000
},
{
"epoch": 0.79,
"eval_bleu": 9.8402,
"eval_gen_len": 24.7827,
"eval_loss": 3.260327100753784,
"eval_meteor": 0.2499,
"eval_runtime": 75.4748,
"eval_samples_per_second": 6.89,
"eval_steps_per_second": 0.861,
"step": 73000
},
{
"epoch": 0.79,
"learning_rate": 1.0334164427031052e-05,
"loss": 1.3642,
"step": 73500
},
{
"epoch": 0.79,
"eval_bleu": 9.952,
"eval_gen_len": 24.5327,
"eval_loss": 3.2416298389434814,
"eval_meteor": 0.2534,
"eval_runtime": 74.6757,
"eval_samples_per_second": 6.963,
"eval_steps_per_second": 0.87,
"step": 73500
},
{
"epoch": 0.8,
"learning_rate": 1.0064328810888408e-05,
"loss": 1.3284,
"step": 74000
},
{
"epoch": 0.8,
"eval_bleu": 9.8073,
"eval_gen_len": 25.0212,
"eval_loss": 3.2586073875427246,
"eval_meteor": 0.2499,
"eval_runtime": 76.8596,
"eval_samples_per_second": 6.766,
"eval_steps_per_second": 0.846,
"step": 74000
},
{
"epoch": 0.8,
"learning_rate": 9.794493194745761e-06,
"loss": 1.3697,
"step": 74500
},
{
"epoch": 0.8,
"eval_bleu": 10.0465,
"eval_gen_len": 24.95,
"eval_loss": 3.2522428035736084,
"eval_meteor": 0.249,
"eval_runtime": 86.1073,
"eval_samples_per_second": 6.039,
"eval_steps_per_second": 0.755,
"step": 74500
},
{
"epoch": 0.81,
"learning_rate": 9.524657578603115e-06,
"loss": 1.3718,
"step": 75000
},
{
"epoch": 0.81,
"eval_bleu": 9.8922,
"eval_gen_len": 25.0846,
"eval_loss": 3.246650457382202,
"eval_meteor": 0.2499,
"eval_runtime": 78.8353,
"eval_samples_per_second": 6.596,
"eval_steps_per_second": 0.825,
"step": 75000
},
{
"epoch": 0.81,
"learning_rate": 9.254821962460469e-06,
"loss": 1.3333,
"step": 75500
},
{
"epoch": 0.81,
"eval_bleu": 9.5919,
"eval_gen_len": 24.5365,
"eval_loss": 3.244452953338623,
"eval_meteor": 0.2509,
"eval_runtime": 69.8377,
"eval_samples_per_second": 7.446,
"eval_steps_per_second": 0.931,
"step": 75500
},
{
"epoch": 0.82,
"learning_rate": 8.984986346317824e-06,
"loss": 1.4192,
"step": 76000
},
{
"epoch": 0.82,
"eval_bleu": 10.1256,
"eval_gen_len": 24.8904,
"eval_loss": 3.230175733566284,
"eval_meteor": 0.2524,
"eval_runtime": 66.1729,
"eval_samples_per_second": 7.858,
"eval_steps_per_second": 0.982,
"step": 76000
},
{
"epoch": 0.83,
"learning_rate": 8.715150730175178e-06,
"loss": 1.3068,
"step": 76500
},
{
"epoch": 0.83,
"eval_bleu": 10.1132,
"eval_gen_len": 24.5346,
"eval_loss": 3.241743326187134,
"eval_meteor": 0.2533,
"eval_runtime": 80.1867,
"eval_samples_per_second": 6.485,
"eval_steps_per_second": 0.811,
"step": 76500
},
{
"epoch": 0.83,
"learning_rate": 8.445315114032532e-06,
"loss": 1.4282,
"step": 77000
},
{
"epoch": 0.83,
"eval_bleu": 9.95,
"eval_gen_len": 24.3462,
"eval_loss": 3.242541551589966,
"eval_meteor": 0.2509,
"eval_runtime": 61.0641,
"eval_samples_per_second": 8.516,
"eval_steps_per_second": 1.064,
"step": 77000
},
{
"epoch": 0.84,
"learning_rate": 8.175479497889886e-06,
"loss": 1.4073,
"step": 77500
},
{
"epoch": 0.84,
"eval_bleu": 10.0739,
"eval_gen_len": 25.3173,
"eval_loss": 3.234609842300415,
"eval_meteor": 0.2523,
"eval_runtime": 76.5129,
"eval_samples_per_second": 6.796,
"eval_steps_per_second": 0.85,
"step": 77500
},
{
"epoch": 0.84,
"learning_rate": 7.905643881747241e-06,
"loss": 1.3717,
"step": 78000
},
{
"epoch": 0.84,
"eval_bleu": 10.2478,
"eval_gen_len": 24.7577,
"eval_loss": 3.2297909259796143,
"eval_meteor": 0.2543,
"eval_runtime": 79.2758,
"eval_samples_per_second": 6.559,
"eval_steps_per_second": 0.82,
"step": 78000
},
{
"epoch": 0.85,
"learning_rate": 7.635808265604593e-06,
"loss": 1.3212,
"step": 78500
},
{
"epoch": 0.85,
"eval_bleu": 9.9788,
"eval_gen_len": 24.3962,
"eval_loss": 3.2253217697143555,
"eval_meteor": 0.2558,
"eval_runtime": 74.6105,
"eval_samples_per_second": 6.97,
"eval_steps_per_second": 0.871,
"step": 78500
},
{
"epoch": 0.85,
"learning_rate": 7.3659726494619485e-06,
"loss": 1.3623,
"step": 79000
},
{
"epoch": 0.85,
"eval_bleu": 10.1533,
"eval_gen_len": 24.2692,
"eval_loss": 3.225541830062866,
"eval_meteor": 0.2516,
"eval_runtime": 76.3252,
"eval_samples_per_second": 6.813,
"eval_steps_per_second": 0.852,
"step": 79000
},
{
"epoch": 0.86,
"learning_rate": 7.096137033319303e-06,
"loss": 1.2651,
"step": 79500
},
{
"epoch": 0.86,
"eval_bleu": 10.1154,
"eval_gen_len": 24.7058,
"eval_loss": 3.2230679988861084,
"eval_meteor": 0.2535,
"eval_runtime": 78.0816,
"eval_samples_per_second": 6.66,
"eval_steps_per_second": 0.832,
"step": 79500
},
{
"epoch": 0.86,
"learning_rate": 6.826301417176656e-06,
"loss": 1.3287,
"step": 80000
},
{
"epoch": 0.86,
"eval_bleu": 10.0863,
"eval_gen_len": 24.5962,
"eval_loss": 3.2264089584350586,
"eval_meteor": 0.2544,
"eval_runtime": 100.662,
"eval_samples_per_second": 5.166,
"eval_steps_per_second": 0.646,
"step": 80000
},
{
"epoch": 0.87,
"learning_rate": 6.556465801034011e-06,
"loss": 1.33,
"step": 80500
},
{
"epoch": 0.87,
"eval_bleu": 10.1522,
"eval_gen_len": 25.4885,
"eval_loss": 3.221473217010498,
"eval_meteor": 0.2513,
"eval_runtime": 70.9756,
"eval_samples_per_second": 7.326,
"eval_steps_per_second": 0.916,
"step": 80500
},
{
"epoch": 0.87,
"learning_rate": 6.286630184891365e-06,
"loss": 1.2862,
"step": 81000
},
{
"epoch": 0.87,
"eval_bleu": 10.1425,
"eval_gen_len": 25.1442,
"eval_loss": 3.226158618927002,
"eval_meteor": 0.2538,
"eval_runtime": 74.1814,
"eval_samples_per_second": 7.01,
"eval_steps_per_second": 0.876,
"step": 81000
},
{
"epoch": 0.88,
"learning_rate": 6.016794568748719e-06,
"loss": 1.3738,
"step": 81500
},
{
"epoch": 0.88,
"eval_bleu": 10.1001,
"eval_gen_len": 25.1846,
"eval_loss": 3.2149925231933594,
"eval_meteor": 0.255,
"eval_runtime": 81.4268,
"eval_samples_per_second": 6.386,
"eval_steps_per_second": 0.798,
"step": 81500
},
{
"epoch": 0.89,
"learning_rate": 5.746958952606073e-06,
"loss": 1.3141,
"step": 82000
},
{
"epoch": 0.89,
"eval_bleu": 10.2016,
"eval_gen_len": 24.6769,
"eval_loss": 3.217388391494751,
"eval_meteor": 0.2549,
"eval_runtime": 75.3151,
"eval_samples_per_second": 6.904,
"eval_steps_per_second": 0.863,
"step": 82000
},
{
"epoch": 0.89,
"learning_rate": 5.4771233364634265e-06,
"loss": 1.3326,
"step": 82500
},
{
"epoch": 0.89,
"eval_bleu": 10.2847,
"eval_gen_len": 24.2038,
"eval_loss": 3.2135159969329834,
"eval_meteor": 0.2529,
"eval_runtime": 74.3155,
"eval_samples_per_second": 6.997,
"eval_steps_per_second": 0.875,
"step": 82500
},
{
"epoch": 0.9,
"learning_rate": 5.207287720320781e-06,
"loss": 1.3112,
"step": 83000
},
{
"epoch": 0.9,
"eval_bleu": 10.1374,
"eval_gen_len": 24.7308,
"eval_loss": 3.2098002433776855,
"eval_meteor": 0.2572,
"eval_runtime": 71.2774,
"eval_samples_per_second": 7.295,
"eval_steps_per_second": 0.912,
"step": 83000
},
{
"epoch": 0.9,
"learning_rate": 4.937452104178135e-06,
"loss": 1.3101,
"step": 83500
},
{
"epoch": 0.9,
"eval_bleu": 10.0941,
"eval_gen_len": 24.6654,
"eval_loss": 3.206242084503174,
"eval_meteor": 0.256,
"eval_runtime": 68.2494,
"eval_samples_per_second": 7.619,
"eval_steps_per_second": 0.952,
"step": 83500
},
{
"epoch": 0.91,
"learning_rate": 4.667616488035489e-06,
"loss": 1.3403,
"step": 84000
},
{
"epoch": 0.91,
"eval_bleu": 10.1955,
"eval_gen_len": 24.8115,
"eval_loss": 3.2050940990448,
"eval_meteor": 0.2569,
"eval_runtime": 67.097,
"eval_samples_per_second": 7.75,
"eval_steps_per_second": 0.969,
"step": 84000
},
{
"epoch": 0.91,
"learning_rate": 4.397780871892843e-06,
"loss": 1.3651,
"step": 84500
},
{
"epoch": 0.91,
"eval_bleu": 10.0488,
"eval_gen_len": 25.5019,
"eval_loss": 3.209751605987549,
"eval_meteor": 0.2581,
"eval_runtime": 61.2968,
"eval_samples_per_second": 8.483,
"eval_steps_per_second": 1.06,
"step": 84500
},
{
"epoch": 0.92,
"learning_rate": 4.127945255750197e-06,
"loss": 1.3243,
"step": 85000
},
{
"epoch": 0.92,
"eval_bleu": 10.1076,
"eval_gen_len": 24.7385,
"eval_loss": 3.202362298965454,
"eval_meteor": 0.2567,
"eval_runtime": 68.0409,
"eval_samples_per_second": 7.642,
"eval_steps_per_second": 0.955,
"step": 85000
},
{
"epoch": 0.92,
"learning_rate": 3.858109639607551e-06,
"loss": 1.3147,
"step": 85500
},
{
"epoch": 0.92,
"eval_bleu": 10.0392,
"eval_gen_len": 25.1269,
"eval_loss": 3.2062337398529053,
"eval_meteor": 0.2544,
"eval_runtime": 71.9112,
"eval_samples_per_second": 7.231,
"eval_steps_per_second": 0.904,
"step": 85500
},
{
"epoch": 0.93,
"learning_rate": 3.5882740234649054e-06,
"loss": 1.2663,
"step": 86000
},
{
"epoch": 0.93,
"eval_bleu": 10.2884,
"eval_gen_len": 24.925,
"eval_loss": 3.2030699253082275,
"eval_meteor": 0.2588,
"eval_runtime": 70.7104,
"eval_samples_per_second": 7.354,
"eval_steps_per_second": 0.919,
"step": 86000
},
{
"epoch": 0.93,
"learning_rate": 3.3184384073222596e-06,
"loss": 1.3497,
"step": 86500
},
{
"epoch": 0.93,
"eval_bleu": 10.1993,
"eval_gen_len": 25.0269,
"eval_loss": 3.208573579788208,
"eval_meteor": 0.2542,
"eval_runtime": 73.9279,
"eval_samples_per_second": 7.034,
"eval_steps_per_second": 0.879,
"step": 86500
},
{
"epoch": 0.94,
"learning_rate": 3.0486027911796137e-06,
"loss": 1.3252,
"step": 87000
},
{
"epoch": 0.94,
"eval_bleu": 10.1155,
"eval_gen_len": 24.7173,
"eval_loss": 3.2008321285247803,
"eval_meteor": 0.2575,
"eval_runtime": 68.983,
"eval_samples_per_second": 7.538,
"eval_steps_per_second": 0.942,
"step": 87000
},
{
"epoch": 0.94,
"learning_rate": 2.778767175036968e-06,
"loss": 1.277,
"step": 87500
},
{
"epoch": 0.94,
"eval_bleu": 10.3075,
"eval_gen_len": 24.5346,
"eval_loss": 3.2011780738830566,
"eval_meteor": 0.2592,
"eval_runtime": 67.2436,
"eval_samples_per_second": 7.733,
"eval_steps_per_second": 0.967,
"step": 87500
},
{
"epoch": 0.95,
"learning_rate": 2.5089315588943217e-06,
"loss": 1.3134,
"step": 88000
},
{
"epoch": 0.95,
"eval_bleu": 10.31,
"eval_gen_len": 24.4,
"eval_loss": 3.2086844444274902,
"eval_meteor": 0.2565,
"eval_runtime": 90.4691,
"eval_samples_per_second": 5.748,
"eval_steps_per_second": 0.718,
"step": 88000
},
{
"epoch": 0.96,
"learning_rate": 2.239095942751676e-06,
"loss": 1.371,
"step": 88500
},
{
"epoch": 0.96,
"eval_bleu": 10.3715,
"eval_gen_len": 24.3385,
"eval_loss": 3.206083297729492,
"eval_meteor": 0.2597,
"eval_runtime": 67.5985,
"eval_samples_per_second": 7.692,
"eval_steps_per_second": 0.962,
"step": 88500
},
{
"epoch": 0.96,
"learning_rate": 1.96926032660903e-06,
"loss": 1.2951,
"step": 89000
},
{
"epoch": 0.96,
"eval_bleu": 10.415,
"eval_gen_len": 24.4481,
"eval_loss": 3.2051799297332764,
"eval_meteor": 0.2597,
"eval_runtime": 70.0734,
"eval_samples_per_second": 7.421,
"eval_steps_per_second": 0.928,
"step": 89000
},
{
"epoch": 0.97,
"learning_rate": 1.6994247104663838e-06,
"loss": 1.2891,
"step": 89500
},
{
"epoch": 0.97,
"eval_bleu": 10.3082,
"eval_gen_len": 24.3154,
"eval_loss": 3.2073869705200195,
"eval_meteor": 0.2566,
"eval_runtime": 67.5441,
"eval_samples_per_second": 7.699,
"eval_steps_per_second": 0.962,
"step": 89500
},
{
"epoch": 0.97,
"learning_rate": 1.429589094323738e-06,
"loss": 1.3057,
"step": 90000
},
{
"epoch": 0.97,
"eval_bleu": 10.3117,
"eval_gen_len": 24.7635,
"eval_loss": 3.204622745513916,
"eval_meteor": 0.2582,
"eval_runtime": 67.7948,
"eval_samples_per_second": 7.67,
"eval_steps_per_second": 0.959,
"step": 90000
},
{
"epoch": 0.98,
"learning_rate": 1.1597534781810922e-06,
"loss": 1.294,
"step": 90500
},
{
"epoch": 0.98,
"eval_bleu": 10.4707,
"eval_gen_len": 24.8135,
"eval_loss": 3.204620838165283,
"eval_meteor": 0.2593,
"eval_runtime": 69.6992,
"eval_samples_per_second": 7.461,
"eval_steps_per_second": 0.933,
"step": 90500
},
{
"epoch": 0.98,
"learning_rate": 8.899178620384462e-07,
"loss": 1.2979,
"step": 91000
},
{
"epoch": 0.98,
"eval_bleu": 10.179,
"eval_gen_len": 24.8058,
"eval_loss": 3.2011451721191406,
"eval_meteor": 0.2588,
"eval_runtime": 76.3465,
"eval_samples_per_second": 6.811,
"eval_steps_per_second": 0.851,
"step": 91000
},
{
"epoch": 0.99,
"learning_rate": 6.200822458958003e-07,
"loss": 1.3096,
"step": 91500
},
{
"epoch": 0.99,
"eval_bleu": 10.407,
"eval_gen_len": 24.5077,
"eval_loss": 3.199586868286133,
"eval_meteor": 0.2588,
"eval_runtime": 68.5257,
"eval_samples_per_second": 7.588,
"eval_steps_per_second": 0.949,
"step": 91500
},
{
"epoch": 0.99,
"learning_rate": 3.502466297531544e-07,
"loss": 1.3779,
"step": 92000
},
{
"epoch": 0.99,
"eval_bleu": 10.2969,
"eval_gen_len": 24.6481,
"eval_loss": 3.2000153064727783,
"eval_meteor": 0.2567,
"eval_runtime": 66.7091,
"eval_samples_per_second": 7.795,
"eval_steps_per_second": 0.974,
"step": 92000
},
{
"epoch": 1.0,
"learning_rate": 8.041101361050848e-08,
"loss": 1.3578,
"step": 92500
},
{
"epoch": 1.0,
"eval_bleu": 10.295,
"eval_gen_len": 24.6423,
"eval_loss": 3.2001755237579346,
"eval_meteor": 0.2573,
"eval_runtime": 74.0674,
"eval_samples_per_second": 7.021,
"eval_steps_per_second": 0.878,
"step": 92500
}
],
"max_steps": 92649,
"num_train_epochs": 1,
"total_flos": 1.0046721126039552e+16,
"trial_name": null,
"trial_params": null
}