|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9983917797277898, |
|
"global_step": 92500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9730164383857356e-05, |
|
"loss": 2.1298, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_bleu": 6.9284, |
|
"eval_gen_len": 23.9635, |
|
"eval_loss": 4.2180352210998535, |
|
"eval_meteor": 0.2087, |
|
"eval_runtime": 55.8494, |
|
"eval_samples_per_second": 9.311, |
|
"eval_steps_per_second": 1.164, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.946032876771471e-05, |
|
"loss": 2.0441, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_bleu": 6.89, |
|
"eval_gen_len": 35.1442, |
|
"eval_loss": 4.191596508026123, |
|
"eval_meteor": 0.2133, |
|
"eval_runtime": 113.3213, |
|
"eval_samples_per_second": 4.589, |
|
"eval_steps_per_second": 0.574, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9190493151572064e-05, |
|
"loss": 2.0547, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_bleu": 6.5842, |
|
"eval_gen_len": 26.2904, |
|
"eval_loss": 4.191189289093018, |
|
"eval_meteor": 0.214, |
|
"eval_runtime": 68.7752, |
|
"eval_samples_per_second": 7.561, |
|
"eval_steps_per_second": 0.945, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.892065753542942e-05, |
|
"loss": 1.9988, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_bleu": 6.3595, |
|
"eval_gen_len": 43.4173, |
|
"eval_loss": 4.144649982452393, |
|
"eval_meteor": 0.206, |
|
"eval_runtime": 168.802, |
|
"eval_samples_per_second": 3.081, |
|
"eval_steps_per_second": 0.385, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.865082191928677e-05, |
|
"loss": 1.9714, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_bleu": 6.0327, |
|
"eval_gen_len": 35.4712, |
|
"eval_loss": 4.147974491119385, |
|
"eval_meteor": 0.2087, |
|
"eval_runtime": 124.6806, |
|
"eval_samples_per_second": 4.171, |
|
"eval_steps_per_second": 0.521, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.8380986303144125e-05, |
|
"loss": 2.0021, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_bleu": 6.626, |
|
"eval_gen_len": 28.4385, |
|
"eval_loss": 4.138648509979248, |
|
"eval_meteor": 0.2004, |
|
"eval_runtime": 79.9371, |
|
"eval_samples_per_second": 6.505, |
|
"eval_steps_per_second": 0.813, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.8111150687001485e-05, |
|
"loss": 1.9541, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_bleu": 7.3794, |
|
"eval_gen_len": 32.2192, |
|
"eval_loss": 4.102533340454102, |
|
"eval_meteor": 0.2092, |
|
"eval_runtime": 104.2846, |
|
"eval_samples_per_second": 4.986, |
|
"eval_steps_per_second": 0.623, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.784131507085883e-05, |
|
"loss": 1.9843, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_bleu": 6.8478, |
|
"eval_gen_len": 28.3692, |
|
"eval_loss": 4.0901265144348145, |
|
"eval_meteor": 0.2026, |
|
"eval_runtime": 84.5232, |
|
"eval_samples_per_second": 6.152, |
|
"eval_steps_per_second": 0.769, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.7571479454716186e-05, |
|
"loss": 2.0089, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_bleu": 7.3969, |
|
"eval_gen_len": 32.6115, |
|
"eval_loss": 4.115621089935303, |
|
"eval_meteor": 0.2061, |
|
"eval_runtime": 108.942, |
|
"eval_samples_per_second": 4.773, |
|
"eval_steps_per_second": 0.597, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.730164383857355e-05, |
|
"loss": 1.8734, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_bleu": 6.4957, |
|
"eval_gen_len": 53.5423, |
|
"eval_loss": 4.048145771026611, |
|
"eval_meteor": 0.2047, |
|
"eval_runtime": 248.2193, |
|
"eval_samples_per_second": 2.095, |
|
"eval_steps_per_second": 0.262, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.7031808222430894e-05, |
|
"loss": 1.959, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_bleu": 6.7135, |
|
"eval_gen_len": 28.0327, |
|
"eval_loss": 4.0486321449279785, |
|
"eval_meteor": 0.1982, |
|
"eval_runtime": 78.1961, |
|
"eval_samples_per_second": 6.65, |
|
"eval_steps_per_second": 0.831, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.6761972606288254e-05, |
|
"loss": 1.9442, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_bleu": 7.8174, |
|
"eval_gen_len": 30.275, |
|
"eval_loss": 4.008225440979004, |
|
"eval_meteor": 0.2121, |
|
"eval_runtime": 88.4227, |
|
"eval_samples_per_second": 5.881, |
|
"eval_steps_per_second": 0.735, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.649213699014561e-05, |
|
"loss": 1.8855, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_bleu": 7.0941, |
|
"eval_gen_len": 30.8077, |
|
"eval_loss": 4.006565570831299, |
|
"eval_meteor": 0.2117, |
|
"eval_runtime": 90.6701, |
|
"eval_samples_per_second": 5.735, |
|
"eval_steps_per_second": 0.717, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.6222301374002955e-05, |
|
"loss": 1.873, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_bleu": 7.0348, |
|
"eval_gen_len": 34.7673, |
|
"eval_loss": 4.008674621582031, |
|
"eval_meteor": 0.2117, |
|
"eval_runtime": 140.0997, |
|
"eval_samples_per_second": 3.712, |
|
"eval_steps_per_second": 0.464, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.5952465757860316e-05, |
|
"loss": 1.9126, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_bleu": 7.2617, |
|
"eval_gen_len": 31.5173, |
|
"eval_loss": 3.955263614654541, |
|
"eval_meteor": 0.2188, |
|
"eval_runtime": 135.15, |
|
"eval_samples_per_second": 3.848, |
|
"eval_steps_per_second": 0.481, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.568263014171767e-05, |
|
"loss": 1.8181, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_bleu": 7.0348, |
|
"eval_gen_len": 29.8442, |
|
"eval_loss": 3.9835426807403564, |
|
"eval_meteor": 0.211, |
|
"eval_runtime": 112.2816, |
|
"eval_samples_per_second": 4.631, |
|
"eval_steps_per_second": 0.579, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.541279452557502e-05, |
|
"loss": 1.8553, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_bleu": 6.9695, |
|
"eval_gen_len": 31.0673, |
|
"eval_loss": 3.934365749359131, |
|
"eval_meteor": 0.2167, |
|
"eval_runtime": 126.5544, |
|
"eval_samples_per_second": 4.109, |
|
"eval_steps_per_second": 0.514, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.514295890943238e-05, |
|
"loss": 1.8856, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_bleu": 7.8161, |
|
"eval_gen_len": 25.7885, |
|
"eval_loss": 3.9472157955169678, |
|
"eval_meteor": 0.2202, |
|
"eval_runtime": 90.2192, |
|
"eval_samples_per_second": 5.764, |
|
"eval_steps_per_second": 0.72, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.487312329328973e-05, |
|
"loss": 1.8375, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_bleu": 7.3226, |
|
"eval_gen_len": 31.6615, |
|
"eval_loss": 3.9212045669555664, |
|
"eval_meteor": 0.2131, |
|
"eval_runtime": 122.1859, |
|
"eval_samples_per_second": 4.256, |
|
"eval_steps_per_second": 0.532, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.4603287677147084e-05, |
|
"loss": 1.8245, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_bleu": 7.5927, |
|
"eval_gen_len": 25.6827, |
|
"eval_loss": 3.9012672901153564, |
|
"eval_meteor": 0.2162, |
|
"eval_runtime": 87.2113, |
|
"eval_samples_per_second": 5.963, |
|
"eval_steps_per_second": 0.745, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.433345206100444e-05, |
|
"loss": 1.7563, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_bleu": 7.6493, |
|
"eval_gen_len": 24.7904, |
|
"eval_loss": 3.9165987968444824, |
|
"eval_meteor": 0.2225, |
|
"eval_runtime": 71.5438, |
|
"eval_samples_per_second": 7.268, |
|
"eval_steps_per_second": 0.909, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.406361644486179e-05, |
|
"loss": 1.7739, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_bleu": 7.0986, |
|
"eval_gen_len": 32.6269, |
|
"eval_loss": 3.892530679702759, |
|
"eval_meteor": 0.2162, |
|
"eval_runtime": 135.1518, |
|
"eval_samples_per_second": 3.848, |
|
"eval_steps_per_second": 0.481, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.3793780828719146e-05, |
|
"loss": 1.8389, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_bleu": 7.355, |
|
"eval_gen_len": 29.1173, |
|
"eval_loss": 3.891714572906494, |
|
"eval_meteor": 0.222, |
|
"eval_runtime": 103.9412, |
|
"eval_samples_per_second": 5.003, |
|
"eval_steps_per_second": 0.625, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.35239452125765e-05, |
|
"loss": 1.8359, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_bleu": 7.7276, |
|
"eval_gen_len": 26.5288, |
|
"eval_loss": 3.8721096515655518, |
|
"eval_meteor": 0.2224, |
|
"eval_runtime": 92.6122, |
|
"eval_samples_per_second": 5.615, |
|
"eval_steps_per_second": 0.702, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.325410959643385e-05, |
|
"loss": 1.774, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_bleu": 7.3227, |
|
"eval_gen_len": 25.8019, |
|
"eval_loss": 3.852199077606201, |
|
"eval_meteor": 0.2256, |
|
"eval_runtime": 80.7019, |
|
"eval_samples_per_second": 6.443, |
|
"eval_steps_per_second": 0.805, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.298427398029121e-05, |
|
"loss": 1.8502, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_bleu": 8.0288, |
|
"eval_gen_len": 25.7327, |
|
"eval_loss": 3.838671922683716, |
|
"eval_meteor": 0.2306, |
|
"eval_runtime": 81.2754, |
|
"eval_samples_per_second": 6.398, |
|
"eval_steps_per_second": 0.8, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.271443836414856e-05, |
|
"loss": 1.7748, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_bleu": 7.8972, |
|
"eval_gen_len": 25.8135, |
|
"eval_loss": 3.847898006439209, |
|
"eval_meteor": 0.2289, |
|
"eval_runtime": 77.3621, |
|
"eval_samples_per_second": 6.722, |
|
"eval_steps_per_second": 0.84, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.244460274800592e-05, |
|
"loss": 1.8212, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_bleu": 7.617, |
|
"eval_gen_len": 31.2462, |
|
"eval_loss": 3.831773281097412, |
|
"eval_meteor": 0.2249, |
|
"eval_runtime": 133.705, |
|
"eval_samples_per_second": 3.889, |
|
"eval_steps_per_second": 0.486, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.217476713186327e-05, |
|
"loss": 1.7943, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_bleu": 6.6778, |
|
"eval_gen_len": 27.7673, |
|
"eval_loss": 3.8318052291870117, |
|
"eval_meteor": 0.2197, |
|
"eval_runtime": 95.8818, |
|
"eval_samples_per_second": 5.423, |
|
"eval_steps_per_second": 0.678, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.190493151572062e-05, |
|
"loss": 1.7869, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_bleu": 7.5995, |
|
"eval_gen_len": 25.6712, |
|
"eval_loss": 3.793452262878418, |
|
"eval_meteor": 0.2261, |
|
"eval_runtime": 84.1529, |
|
"eval_samples_per_second": 6.179, |
|
"eval_steps_per_second": 0.772, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.163509589957798e-05, |
|
"loss": 1.7802, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_bleu": 6.9484, |
|
"eval_gen_len": 34.8865, |
|
"eval_loss": 3.788137674331665, |
|
"eval_meteor": 0.2214, |
|
"eval_runtime": 159.4169, |
|
"eval_samples_per_second": 3.262, |
|
"eval_steps_per_second": 0.408, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.136526028343533e-05, |
|
"loss": 1.6783, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_bleu": 7.1202, |
|
"eval_gen_len": 26.2692, |
|
"eval_loss": 3.7846884727478027, |
|
"eval_meteor": 0.2171, |
|
"eval_runtime": 88.5814, |
|
"eval_samples_per_second": 5.87, |
|
"eval_steps_per_second": 0.734, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.109542466729269e-05, |
|
"loss": 1.6773, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_bleu": 8.093, |
|
"eval_gen_len": 26.9346, |
|
"eval_loss": 3.772686243057251, |
|
"eval_meteor": 0.222, |
|
"eval_runtime": 96.6464, |
|
"eval_samples_per_second": 5.38, |
|
"eval_steps_per_second": 0.673, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.0825589051150044e-05, |
|
"loss": 1.6435, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_bleu": 7.5486, |
|
"eval_gen_len": 26.6231, |
|
"eval_loss": 3.785611152648926, |
|
"eval_meteor": 0.2136, |
|
"eval_runtime": 89.9499, |
|
"eval_samples_per_second": 5.781, |
|
"eval_steps_per_second": 0.723, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.055575343500739e-05, |
|
"loss": 1.6892, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_bleu": 7.2576, |
|
"eval_gen_len": 31.4712, |
|
"eval_loss": 3.778566837310791, |
|
"eval_meteor": 0.2088, |
|
"eval_runtime": 124.5211, |
|
"eval_samples_per_second": 4.176, |
|
"eval_steps_per_second": 0.522, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.028591781886475e-05, |
|
"loss": 1.7355, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_bleu": 7.8495, |
|
"eval_gen_len": 27.7846, |
|
"eval_loss": 3.739633798599243, |
|
"eval_meteor": 0.2249, |
|
"eval_runtime": 103.8056, |
|
"eval_samples_per_second": 5.009, |
|
"eval_steps_per_second": 0.626, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.0016082202722105e-05, |
|
"loss": 1.708, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_bleu": 8.0278, |
|
"eval_gen_len": 23.0538, |
|
"eval_loss": 3.7668354511260986, |
|
"eval_meteor": 0.2207, |
|
"eval_runtime": 70.0581, |
|
"eval_samples_per_second": 7.422, |
|
"eval_steps_per_second": 0.928, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.974624658657945e-05, |
|
"loss": 1.6596, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_bleu": 7.5685, |
|
"eval_gen_len": 28.1115, |
|
"eval_loss": 3.7208786010742188, |
|
"eval_meteor": 0.2287, |
|
"eval_runtime": 102.7576, |
|
"eval_samples_per_second": 5.06, |
|
"eval_steps_per_second": 0.633, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.947641097043681e-05, |
|
"loss": 1.7048, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_bleu": 8.1065, |
|
"eval_gen_len": 23.6308, |
|
"eval_loss": 3.7291922569274902, |
|
"eval_meteor": 0.2308, |
|
"eval_runtime": 60.5983, |
|
"eval_samples_per_second": 8.581, |
|
"eval_steps_per_second": 1.073, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.9206575354294166e-05, |
|
"loss": 1.671, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_bleu": 8.2527, |
|
"eval_gen_len": 28.6962, |
|
"eval_loss": 3.6946775913238525, |
|
"eval_meteor": 0.2352, |
|
"eval_runtime": 109.8594, |
|
"eval_samples_per_second": 4.733, |
|
"eval_steps_per_second": 0.592, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.893673973815152e-05, |
|
"loss": 1.7021, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_bleu": 7.9211, |
|
"eval_gen_len": 25.9538, |
|
"eval_loss": 3.711056709289551, |
|
"eval_meteor": 0.2288, |
|
"eval_runtime": 99.5819, |
|
"eval_samples_per_second": 5.222, |
|
"eval_steps_per_second": 0.653, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.8666904122008874e-05, |
|
"loss": 1.6431, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_bleu": 8.1937, |
|
"eval_gen_len": 27.2942, |
|
"eval_loss": 3.7131733894348145, |
|
"eval_meteor": 0.2303, |
|
"eval_runtime": 84.3649, |
|
"eval_samples_per_second": 6.164, |
|
"eval_steps_per_second": 0.77, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.839706850586623e-05, |
|
"loss": 1.7551, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_bleu": 8.0326, |
|
"eval_gen_len": 32.4615, |
|
"eval_loss": 3.7119548320770264, |
|
"eval_meteor": 0.2299, |
|
"eval_runtime": 130.1926, |
|
"eval_samples_per_second": 3.994, |
|
"eval_steps_per_second": 0.499, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.812723288972358e-05, |
|
"loss": 1.672, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_bleu": 8.1471, |
|
"eval_gen_len": 26.1, |
|
"eval_loss": 3.708371639251709, |
|
"eval_meteor": 0.2274, |
|
"eval_runtime": 101.2958, |
|
"eval_samples_per_second": 5.133, |
|
"eval_steps_per_second": 0.642, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.7857397273580935e-05, |
|
"loss": 1.6464, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_bleu": 7.974, |
|
"eval_gen_len": 26.8558, |
|
"eval_loss": 3.677321195602417, |
|
"eval_meteor": 0.2287, |
|
"eval_runtime": 101.1748, |
|
"eval_samples_per_second": 5.14, |
|
"eval_steps_per_second": 0.642, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.758756165743829e-05, |
|
"loss": 1.6484, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_bleu": 7.609, |
|
"eval_gen_len": 28.8173, |
|
"eval_loss": 3.6681010723114014, |
|
"eval_meteor": 0.2303, |
|
"eval_runtime": 110.9868, |
|
"eval_samples_per_second": 4.685, |
|
"eval_steps_per_second": 0.586, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.731772604129564e-05, |
|
"loss": 1.6568, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_bleu": 7.1469, |
|
"eval_gen_len": 27.2288, |
|
"eval_loss": 3.692307949066162, |
|
"eval_meteor": 0.2255, |
|
"eval_runtime": 94.7667, |
|
"eval_samples_per_second": 5.487, |
|
"eval_steps_per_second": 0.686, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.7047890425152996e-05, |
|
"loss": 1.6389, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_bleu": 7.7818, |
|
"eval_gen_len": 24.6692, |
|
"eval_loss": 3.660385847091675, |
|
"eval_meteor": 0.2249, |
|
"eval_runtime": 81.5305, |
|
"eval_samples_per_second": 6.378, |
|
"eval_steps_per_second": 0.797, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.677805480901036e-05, |
|
"loss": 1.6639, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_bleu": 8.0474, |
|
"eval_gen_len": 23.9308, |
|
"eval_loss": 3.659407377243042, |
|
"eval_meteor": 0.2284, |
|
"eval_runtime": 73.4805, |
|
"eval_samples_per_second": 7.077, |
|
"eval_steps_per_second": 0.885, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.6508219192867704e-05, |
|
"loss": 1.6965, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_bleu": 7.8415, |
|
"eval_gen_len": 29.1077, |
|
"eval_loss": 3.663590431213379, |
|
"eval_meteor": 0.2242, |
|
"eval_runtime": 99.3013, |
|
"eval_samples_per_second": 5.237, |
|
"eval_steps_per_second": 0.655, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.623838357672506e-05, |
|
"loss": 1.6891, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_bleu": 7.6988, |
|
"eval_gen_len": 26.9077, |
|
"eval_loss": 3.633793354034424, |
|
"eval_meteor": 0.2278, |
|
"eval_runtime": 87.8219, |
|
"eval_samples_per_second": 5.921, |
|
"eval_steps_per_second": 0.74, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.596854796058242e-05, |
|
"loss": 1.6518, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_bleu": 8.0628, |
|
"eval_gen_len": 24.2231, |
|
"eval_loss": 3.6404640674591064, |
|
"eval_meteor": 0.2272, |
|
"eval_runtime": 79.4014, |
|
"eval_samples_per_second": 6.549, |
|
"eval_steps_per_second": 0.819, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.5698712344439765e-05, |
|
"loss": 1.5915, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_bleu": 8.4956, |
|
"eval_gen_len": 25.925, |
|
"eval_loss": 3.6168606281280518, |
|
"eval_meteor": 0.2327, |
|
"eval_runtime": 84.1477, |
|
"eval_samples_per_second": 6.18, |
|
"eval_steps_per_second": 0.772, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.5428876728297126e-05, |
|
"loss": 1.6756, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_bleu": 7.8645, |
|
"eval_gen_len": 25.7115, |
|
"eval_loss": 3.6374764442443848, |
|
"eval_meteor": 0.2293, |
|
"eval_runtime": 68.4092, |
|
"eval_samples_per_second": 7.601, |
|
"eval_steps_per_second": 0.95, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.515904111215448e-05, |
|
"loss": 1.6085, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_bleu": 7.5502, |
|
"eval_gen_len": 27.7, |
|
"eval_loss": 3.6482863426208496, |
|
"eval_meteor": 0.2193, |
|
"eval_runtime": 107.6058, |
|
"eval_samples_per_second": 4.832, |
|
"eval_steps_per_second": 0.604, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.4889205496011826e-05, |
|
"loss": 1.5557, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_bleu": 7.4827, |
|
"eval_gen_len": 37.5385, |
|
"eval_loss": 3.6158738136291504, |
|
"eval_meteor": 0.2182, |
|
"eval_runtime": 151.1373, |
|
"eval_samples_per_second": 3.441, |
|
"eval_steps_per_second": 0.43, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.461936987986919e-05, |
|
"loss": 1.5766, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_bleu": 7.9417, |
|
"eval_gen_len": 28.1673, |
|
"eval_loss": 3.6332404613494873, |
|
"eval_meteor": 0.2271, |
|
"eval_runtime": 96.6869, |
|
"eval_samples_per_second": 5.378, |
|
"eval_steps_per_second": 0.672, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.434953426372654e-05, |
|
"loss": 1.6282, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_bleu": 7.6697, |
|
"eval_gen_len": 27.3019, |
|
"eval_loss": 3.6174378395080566, |
|
"eval_meteor": 0.2208, |
|
"eval_runtime": 89.3468, |
|
"eval_samples_per_second": 5.82, |
|
"eval_steps_per_second": 0.728, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.407969864758389e-05, |
|
"loss": 1.5901, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_bleu": 8.211, |
|
"eval_gen_len": 27.4885, |
|
"eval_loss": 3.6009418964385986, |
|
"eval_meteor": 0.2295, |
|
"eval_runtime": 86.4935, |
|
"eval_samples_per_second": 6.012, |
|
"eval_steps_per_second": 0.752, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.380986303144125e-05, |
|
"loss": 1.618, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_bleu": 7.8473, |
|
"eval_gen_len": 26.3385, |
|
"eval_loss": 3.597890853881836, |
|
"eval_meteor": 0.2276, |
|
"eval_runtime": 67.1228, |
|
"eval_samples_per_second": 7.747, |
|
"eval_steps_per_second": 0.968, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.35400274152986e-05, |
|
"loss": 1.5292, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_bleu": 7.6579, |
|
"eval_gen_len": 28.0481, |
|
"eval_loss": 3.578139543533325, |
|
"eval_meteor": 0.2348, |
|
"eval_runtime": 88.2928, |
|
"eval_samples_per_second": 5.889, |
|
"eval_steps_per_second": 0.736, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.3270191799155956e-05, |
|
"loss": 1.539, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_bleu": 7.9101, |
|
"eval_gen_len": 25.1115, |
|
"eval_loss": 3.56876540184021, |
|
"eval_meteor": 0.2294, |
|
"eval_runtime": 78.3276, |
|
"eval_samples_per_second": 6.639, |
|
"eval_steps_per_second": 0.83, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.300035618301331e-05, |
|
"loss": 1.6394, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_bleu": 8.1847, |
|
"eval_gen_len": 24.5731, |
|
"eval_loss": 3.5614802837371826, |
|
"eval_meteor": 0.2322, |
|
"eval_runtime": 70.0131, |
|
"eval_samples_per_second": 7.427, |
|
"eval_steps_per_second": 0.928, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.273052056687066e-05, |
|
"loss": 1.5546, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_bleu": 7.9997, |
|
"eval_gen_len": 25.1596, |
|
"eval_loss": 3.56968355178833, |
|
"eval_meteor": 0.2339, |
|
"eval_runtime": 68.1359, |
|
"eval_samples_per_second": 7.632, |
|
"eval_steps_per_second": 0.954, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.246068495072802e-05, |
|
"loss": 1.5173, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_bleu": 8.1149, |
|
"eval_gen_len": 27.0596, |
|
"eval_loss": 3.5782711505889893, |
|
"eval_meteor": 0.229, |
|
"eval_runtime": 92.9196, |
|
"eval_samples_per_second": 5.596, |
|
"eval_steps_per_second": 0.7, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.219084933458537e-05, |
|
"loss": 1.6158, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_bleu": 8.6261, |
|
"eval_gen_len": 25.5346, |
|
"eval_loss": 3.5298867225646973, |
|
"eval_meteor": 0.2419, |
|
"eval_runtime": 64.5727, |
|
"eval_samples_per_second": 8.053, |
|
"eval_steps_per_second": 1.007, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.1921013718442724e-05, |
|
"loss": 1.5266, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_bleu": 8.7619, |
|
"eval_gen_len": 31.4077, |
|
"eval_loss": 3.522501230239868, |
|
"eval_meteor": 0.2319, |
|
"eval_runtime": 122.5254, |
|
"eval_samples_per_second": 4.244, |
|
"eval_steps_per_second": 0.531, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.1651178102300085e-05, |
|
"loss": 1.577, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_bleu": 8.611, |
|
"eval_gen_len": 26.15, |
|
"eval_loss": 3.542168140411377, |
|
"eval_meteor": 0.2346, |
|
"eval_runtime": 72.838, |
|
"eval_samples_per_second": 7.139, |
|
"eval_steps_per_second": 0.892, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.138134248615743e-05, |
|
"loss": 1.5787, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_bleu": 8.0924, |
|
"eval_gen_len": 29.2692, |
|
"eval_loss": 3.5313644409179688, |
|
"eval_meteor": 0.2301, |
|
"eval_runtime": 104.0102, |
|
"eval_samples_per_second": 5.0, |
|
"eval_steps_per_second": 0.625, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.111150687001479e-05, |
|
"loss": 1.5635, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_bleu": 8.2094, |
|
"eval_gen_len": 25.4942, |
|
"eval_loss": 3.5328898429870605, |
|
"eval_meteor": 0.2314, |
|
"eval_runtime": 62.8928, |
|
"eval_samples_per_second": 8.268, |
|
"eval_steps_per_second": 1.034, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.0841671253872146e-05, |
|
"loss": 1.4967, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_bleu": 7.9224, |
|
"eval_gen_len": 28.1365, |
|
"eval_loss": 3.5347652435302734, |
|
"eval_meteor": 0.2298, |
|
"eval_runtime": 95.4251, |
|
"eval_samples_per_second": 5.449, |
|
"eval_steps_per_second": 0.681, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.057183563772949e-05, |
|
"loss": 1.5375, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_bleu": 8.062, |
|
"eval_gen_len": 25.7346, |
|
"eval_loss": 3.5164051055908203, |
|
"eval_meteor": 0.2375, |
|
"eval_runtime": 72.4588, |
|
"eval_samples_per_second": 7.176, |
|
"eval_steps_per_second": 0.897, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.030200002158685e-05, |
|
"loss": 1.5928, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_bleu": 8.4845, |
|
"eval_gen_len": 25.8462, |
|
"eval_loss": 3.5118257999420166, |
|
"eval_meteor": 0.2439, |
|
"eval_runtime": 69.4765, |
|
"eval_samples_per_second": 7.485, |
|
"eval_steps_per_second": 0.936, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.0032164405444208e-05, |
|
"loss": 1.582, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_bleu": 8.2526, |
|
"eval_gen_len": 25.9327, |
|
"eval_loss": 3.505610227584839, |
|
"eval_meteor": 0.2395, |
|
"eval_runtime": 60.8773, |
|
"eval_samples_per_second": 8.542, |
|
"eval_steps_per_second": 1.068, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.9762328789301558e-05, |
|
"loss": 1.4958, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_bleu": 8.3315, |
|
"eval_gen_len": 28.5365, |
|
"eval_loss": 3.517829418182373, |
|
"eval_meteor": 0.2271, |
|
"eval_runtime": 94.0897, |
|
"eval_samples_per_second": 5.527, |
|
"eval_steps_per_second": 0.691, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.9492493173158915e-05, |
|
"loss": 1.5524, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_bleu": 8.765, |
|
"eval_gen_len": 26.2865, |
|
"eval_loss": 3.506021738052368, |
|
"eval_meteor": 0.2364, |
|
"eval_runtime": 79.4441, |
|
"eval_samples_per_second": 6.545, |
|
"eval_steps_per_second": 0.818, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.922265755701627e-05, |
|
"loss": 1.4689, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_bleu": 8.5295, |
|
"eval_gen_len": 29.5769, |
|
"eval_loss": 3.5012190341949463, |
|
"eval_meteor": 0.2327, |
|
"eval_runtime": 103.4617, |
|
"eval_samples_per_second": 5.026, |
|
"eval_steps_per_second": 0.628, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.895282194087362e-05, |
|
"loss": 1.5345, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_bleu": 8.7423, |
|
"eval_gen_len": 25.3269, |
|
"eval_loss": 3.498300313949585, |
|
"eval_meteor": 0.2342, |
|
"eval_runtime": 73.6072, |
|
"eval_samples_per_second": 7.065, |
|
"eval_steps_per_second": 0.883, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.8682986324730976e-05, |
|
"loss": 1.5282, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_bleu": 8.9296, |
|
"eval_gen_len": 25.1173, |
|
"eval_loss": 3.4799299240112305, |
|
"eval_meteor": 0.2425, |
|
"eval_runtime": 71.1898, |
|
"eval_samples_per_second": 7.304, |
|
"eval_steps_per_second": 0.913, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.8413150708588333e-05, |
|
"loss": 1.5021, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_bleu": 7.9284, |
|
"eval_gen_len": 26.2962, |
|
"eval_loss": 3.4839093685150146, |
|
"eval_meteor": 0.236, |
|
"eval_runtime": 77.544, |
|
"eval_samples_per_second": 6.706, |
|
"eval_steps_per_second": 0.838, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.8143315092445684e-05, |
|
"loss": 1.5072, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_bleu": 8.7855, |
|
"eval_gen_len": 25.425, |
|
"eval_loss": 3.4775073528289795, |
|
"eval_meteor": 0.238, |
|
"eval_runtime": 84.0571, |
|
"eval_samples_per_second": 6.186, |
|
"eval_steps_per_second": 0.773, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.7873479476303038e-05, |
|
"loss": 1.5348, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_bleu": 8.3592, |
|
"eval_gen_len": 25.4673, |
|
"eval_loss": 3.4518544673919678, |
|
"eval_meteor": 0.24, |
|
"eval_runtime": 81.9583, |
|
"eval_samples_per_second": 6.345, |
|
"eval_steps_per_second": 0.793, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7603643860160395e-05, |
|
"loss": 1.5236, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_bleu": 8.589, |
|
"eval_gen_len": 28.1846, |
|
"eval_loss": 3.466686248779297, |
|
"eval_meteor": 0.2352, |
|
"eval_runtime": 91.2864, |
|
"eval_samples_per_second": 5.696, |
|
"eval_steps_per_second": 0.712, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.7333808244017745e-05, |
|
"loss": 1.4695, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_bleu": 8.5467, |
|
"eval_gen_len": 26.9385, |
|
"eval_loss": 3.4435806274414062, |
|
"eval_meteor": 0.2396, |
|
"eval_runtime": 89.9168, |
|
"eval_samples_per_second": 5.783, |
|
"eval_steps_per_second": 0.723, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.70639726278751e-05, |
|
"loss": 1.4733, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_bleu": 8.3771, |
|
"eval_gen_len": 26.6096, |
|
"eval_loss": 3.455449342727661, |
|
"eval_meteor": 0.2347, |
|
"eval_runtime": 85.0784, |
|
"eval_samples_per_second": 6.112, |
|
"eval_steps_per_second": 0.764, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.6794137011732456e-05, |
|
"loss": 1.5398, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_bleu": 9.2126, |
|
"eval_gen_len": 24.6538, |
|
"eval_loss": 3.436018943786621, |
|
"eval_meteor": 0.2434, |
|
"eval_runtime": 81.8122, |
|
"eval_samples_per_second": 6.356, |
|
"eval_steps_per_second": 0.795, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6524301395589806e-05, |
|
"loss": 1.5596, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_bleu": 9.2745, |
|
"eval_gen_len": 26.1, |
|
"eval_loss": 3.425743579864502, |
|
"eval_meteor": 0.2521, |
|
"eval_runtime": 70.71, |
|
"eval_samples_per_second": 7.354, |
|
"eval_steps_per_second": 0.919, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6254465779447164e-05, |
|
"loss": 1.505, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_bleu": 8.8478, |
|
"eval_gen_len": 24.9731, |
|
"eval_loss": 3.428138017654419, |
|
"eval_meteor": 0.2421, |
|
"eval_runtime": 81.3515, |
|
"eval_samples_per_second": 6.392, |
|
"eval_steps_per_second": 0.799, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.5984630163304517e-05, |
|
"loss": 1.48, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_bleu": 9.237, |
|
"eval_gen_len": 24.2058, |
|
"eval_loss": 3.465346336364746, |
|
"eval_meteor": 0.2392, |
|
"eval_runtime": 78.2893, |
|
"eval_samples_per_second": 6.642, |
|
"eval_steps_per_second": 0.83, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5714794547161868e-05, |
|
"loss": 1.5167, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_bleu": 8.8344, |
|
"eval_gen_len": 25.8192, |
|
"eval_loss": 3.440796375274658, |
|
"eval_meteor": 0.2355, |
|
"eval_runtime": 73.3566, |
|
"eval_samples_per_second": 7.089, |
|
"eval_steps_per_second": 0.886, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5444958931019225e-05, |
|
"loss": 1.4691, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_bleu": 9.0831, |
|
"eval_gen_len": 25.8577, |
|
"eval_loss": 3.424842596054077, |
|
"eval_meteor": 0.2449, |
|
"eval_runtime": 83.8868, |
|
"eval_samples_per_second": 6.199, |
|
"eval_steps_per_second": 0.775, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.5175123314876582e-05, |
|
"loss": 1.4734, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_bleu": 8.6622, |
|
"eval_gen_len": 26.0385, |
|
"eval_loss": 3.406123399734497, |
|
"eval_meteor": 0.2472, |
|
"eval_runtime": 87.066, |
|
"eval_samples_per_second": 5.972, |
|
"eval_steps_per_second": 0.747, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.4905287698733932e-05, |
|
"loss": 1.4354, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_bleu": 9.3409, |
|
"eval_gen_len": 26.0077, |
|
"eval_loss": 3.4149692058563232, |
|
"eval_meteor": 0.2462, |
|
"eval_runtime": 75.3366, |
|
"eval_samples_per_second": 6.902, |
|
"eval_steps_per_second": 0.863, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4635452082591286e-05, |
|
"loss": 1.4841, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_bleu": 8.3645, |
|
"eval_gen_len": 27.2692, |
|
"eval_loss": 3.410163640975952, |
|
"eval_meteor": 0.2377, |
|
"eval_runtime": 88.8645, |
|
"eval_samples_per_second": 5.852, |
|
"eval_steps_per_second": 0.731, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.436561646644864e-05, |
|
"loss": 1.4163, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_bleu": 8.7482, |
|
"eval_gen_len": 25.1442, |
|
"eval_loss": 3.4322900772094727, |
|
"eval_meteor": 0.2329, |
|
"eval_runtime": 80.4665, |
|
"eval_samples_per_second": 6.462, |
|
"eval_steps_per_second": 0.808, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4095780850305994e-05, |
|
"loss": 1.4859, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_bleu": 9.0356, |
|
"eval_gen_len": 27.9096, |
|
"eval_loss": 3.409453868865967, |
|
"eval_meteor": 0.238, |
|
"eval_runtime": 97.3658, |
|
"eval_samples_per_second": 5.341, |
|
"eval_steps_per_second": 0.668, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.382594523416335e-05, |
|
"loss": 1.5179, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_bleu": 8.9689, |
|
"eval_gen_len": 25.9173, |
|
"eval_loss": 3.4041757583618164, |
|
"eval_meteor": 0.2426, |
|
"eval_runtime": 66.1422, |
|
"eval_samples_per_second": 7.862, |
|
"eval_steps_per_second": 0.983, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.35561096180207e-05, |
|
"loss": 1.4451, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_bleu": 9.0897, |
|
"eval_gen_len": 24.6654, |
|
"eval_loss": 3.383331537246704, |
|
"eval_meteor": 0.244, |
|
"eval_runtime": 60.7417, |
|
"eval_samples_per_second": 8.561, |
|
"eval_steps_per_second": 1.07, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.3286274001878055e-05, |
|
"loss": 1.4377, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_bleu": 9.0223, |
|
"eval_gen_len": 25.3788, |
|
"eval_loss": 3.3942527770996094, |
|
"eval_meteor": 0.2481, |
|
"eval_runtime": 71.1329, |
|
"eval_samples_per_second": 7.31, |
|
"eval_steps_per_second": 0.914, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.3016438385735412e-05, |
|
"loss": 1.4162, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_bleu": 9.5501, |
|
"eval_gen_len": 25.05, |
|
"eval_loss": 3.3857383728027344, |
|
"eval_meteor": 0.2468, |
|
"eval_runtime": 77.3856, |
|
"eval_samples_per_second": 6.72, |
|
"eval_steps_per_second": 0.84, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2746602769592766e-05, |
|
"loss": 1.5209, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_bleu": 8.9994, |
|
"eval_gen_len": 25.1058, |
|
"eval_loss": 3.3768198490142822, |
|
"eval_meteor": 0.2424, |
|
"eval_runtime": 72.911, |
|
"eval_samples_per_second": 7.132, |
|
"eval_steps_per_second": 0.891, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.247676715345012e-05, |
|
"loss": 1.426, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_bleu": 9.2556, |
|
"eval_gen_len": 25.3865, |
|
"eval_loss": 3.3841092586517334, |
|
"eval_meteor": 0.2443, |
|
"eval_runtime": 62.6291, |
|
"eval_samples_per_second": 8.303, |
|
"eval_steps_per_second": 1.038, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.2206931537307473e-05, |
|
"loss": 1.4453, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_bleu": 9.583, |
|
"eval_gen_len": 26.3269, |
|
"eval_loss": 3.3895211219787598, |
|
"eval_meteor": 0.2448, |
|
"eval_runtime": 85.3314, |
|
"eval_samples_per_second": 6.094, |
|
"eval_steps_per_second": 0.762, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.1937095921164827e-05, |
|
"loss": 1.4162, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_bleu": 9.0803, |
|
"eval_gen_len": 25.5231, |
|
"eval_loss": 3.385866165161133, |
|
"eval_meteor": 0.2413, |
|
"eval_runtime": 75.8979, |
|
"eval_samples_per_second": 6.851, |
|
"eval_steps_per_second": 0.856, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1667260305022184e-05, |
|
"loss": 1.4107, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_bleu": 8.9249, |
|
"eval_gen_len": 24.8615, |
|
"eval_loss": 3.3849904537200928, |
|
"eval_meteor": 0.241, |
|
"eval_runtime": 72.4747, |
|
"eval_samples_per_second": 7.175, |
|
"eval_steps_per_second": 0.897, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1397424688879535e-05, |
|
"loss": 1.4474, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_bleu": 9.5048, |
|
"eval_gen_len": 23.975, |
|
"eval_loss": 3.3705201148986816, |
|
"eval_meteor": 0.2474, |
|
"eval_runtime": 76.3998, |
|
"eval_samples_per_second": 6.806, |
|
"eval_steps_per_second": 0.851, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.1127589072736888e-05, |
|
"loss": 1.4336, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_bleu": 9.7945, |
|
"eval_gen_len": 24.3885, |
|
"eval_loss": 3.349461078643799, |
|
"eval_meteor": 0.2549, |
|
"eval_runtime": 69.7201, |
|
"eval_samples_per_second": 7.458, |
|
"eval_steps_per_second": 0.932, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.0857753456594245e-05, |
|
"loss": 1.4261, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_bleu": 9.5374, |
|
"eval_gen_len": 24.2692, |
|
"eval_loss": 3.369290351867676, |
|
"eval_meteor": 0.2444, |
|
"eval_runtime": 67.4768, |
|
"eval_samples_per_second": 7.706, |
|
"eval_steps_per_second": 0.963, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.05879178404516e-05, |
|
"loss": 1.409, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_bleu": 9.3034, |
|
"eval_gen_len": 24.625, |
|
"eval_loss": 3.3803343772888184, |
|
"eval_meteor": 0.2436, |
|
"eval_runtime": 68.3273, |
|
"eval_samples_per_second": 7.61, |
|
"eval_steps_per_second": 0.951, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0318082224308953e-05, |
|
"loss": 1.4364, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_bleu": 9.6554, |
|
"eval_gen_len": 25.5654, |
|
"eval_loss": 3.371992349624634, |
|
"eval_meteor": 0.2457, |
|
"eval_runtime": 82.9163, |
|
"eval_samples_per_second": 6.271, |
|
"eval_steps_per_second": 0.784, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0048246608166307e-05, |
|
"loss": 1.4184, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_bleu": 9.4698, |
|
"eval_gen_len": 27.6, |
|
"eval_loss": 3.3737027645111084, |
|
"eval_meteor": 0.2414, |
|
"eval_runtime": 99.5873, |
|
"eval_samples_per_second": 5.222, |
|
"eval_steps_per_second": 0.653, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.977841099202366e-05, |
|
"loss": 1.4417, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_bleu": 9.3662, |
|
"eval_gen_len": 25.1481, |
|
"eval_loss": 3.3708438873291016, |
|
"eval_meteor": 0.2446, |
|
"eval_runtime": 87.8071, |
|
"eval_samples_per_second": 5.922, |
|
"eval_steps_per_second": 0.74, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9508575375881014e-05, |
|
"loss": 1.3421, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_bleu": 9.3448, |
|
"eval_gen_len": 24.8404, |
|
"eval_loss": 3.3396267890930176, |
|
"eval_meteor": 0.2478, |
|
"eval_runtime": 73.0359, |
|
"eval_samples_per_second": 7.12, |
|
"eval_steps_per_second": 0.89, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9238739759738368e-05, |
|
"loss": 1.396, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_bleu": 9.0765, |
|
"eval_gen_len": 24.7731, |
|
"eval_loss": 3.3500242233276367, |
|
"eval_meteor": 0.2413, |
|
"eval_runtime": 71.4703, |
|
"eval_samples_per_second": 7.276, |
|
"eval_steps_per_second": 0.909, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8968904143595722e-05, |
|
"loss": 1.4152, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_bleu": 9.4934, |
|
"eval_gen_len": 24.7885, |
|
"eval_loss": 3.335568904876709, |
|
"eval_meteor": 0.2496, |
|
"eval_runtime": 67.659, |
|
"eval_samples_per_second": 7.686, |
|
"eval_steps_per_second": 0.961, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8699068527453075e-05, |
|
"loss": 1.4245, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_bleu": 9.7477, |
|
"eval_gen_len": 24.0096, |
|
"eval_loss": 3.344538688659668, |
|
"eval_meteor": 0.2494, |
|
"eval_runtime": 69.4763, |
|
"eval_samples_per_second": 7.485, |
|
"eval_steps_per_second": 0.936, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8429232911310433e-05, |
|
"loss": 1.4661, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_bleu": 9.2664, |
|
"eval_gen_len": 24.1404, |
|
"eval_loss": 3.3510961532592773, |
|
"eval_meteor": 0.2416, |
|
"eval_runtime": 71.3645, |
|
"eval_samples_per_second": 7.287, |
|
"eval_steps_per_second": 0.911, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.8159397295167786e-05, |
|
"loss": 1.4219, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_bleu": 9.5738, |
|
"eval_gen_len": 24.6769, |
|
"eval_loss": 3.3354907035827637, |
|
"eval_meteor": 0.2436, |
|
"eval_runtime": 76.9335, |
|
"eval_samples_per_second": 6.759, |
|
"eval_steps_per_second": 0.845, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7889561679025137e-05, |
|
"loss": 1.4051, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_bleu": 9.4506, |
|
"eval_gen_len": 24.2615, |
|
"eval_loss": 3.3152685165405273, |
|
"eval_meteor": 0.2495, |
|
"eval_runtime": 68.6589, |
|
"eval_samples_per_second": 7.574, |
|
"eval_steps_per_second": 0.947, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7619726062882494e-05, |
|
"loss": 1.4743, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_bleu": 9.6423, |
|
"eval_gen_len": 24.4865, |
|
"eval_loss": 3.3143715858459473, |
|
"eval_meteor": 0.2463, |
|
"eval_runtime": 71.0116, |
|
"eval_samples_per_second": 7.323, |
|
"eval_steps_per_second": 0.915, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.7349890446739848e-05, |
|
"loss": 1.3961, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_bleu": 9.7681, |
|
"eval_gen_len": 24.4615, |
|
"eval_loss": 3.3041951656341553, |
|
"eval_meteor": 0.2526, |
|
"eval_runtime": 66.5982, |
|
"eval_samples_per_second": 7.808, |
|
"eval_steps_per_second": 0.976, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.70800548305972e-05, |
|
"loss": 1.447, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_bleu": 9.6435, |
|
"eval_gen_len": 24.6365, |
|
"eval_loss": 3.3025214672088623, |
|
"eval_meteor": 0.2507, |
|
"eval_runtime": 73.4548, |
|
"eval_samples_per_second": 7.079, |
|
"eval_steps_per_second": 0.885, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6810219214454555e-05, |
|
"loss": 1.4102, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_bleu": 9.6897, |
|
"eval_gen_len": 24.8731, |
|
"eval_loss": 3.3014609813690186, |
|
"eval_meteor": 0.25, |
|
"eval_runtime": 77.7893, |
|
"eval_samples_per_second": 6.685, |
|
"eval_steps_per_second": 0.836, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.654038359831191e-05, |
|
"loss": 1.3739, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_bleu": 9.2607, |
|
"eval_gen_len": 24.7885, |
|
"eval_loss": 3.313209056854248, |
|
"eval_meteor": 0.2447, |
|
"eval_runtime": 70.1449, |
|
"eval_samples_per_second": 7.413, |
|
"eval_steps_per_second": 0.927, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.6270547982169263e-05, |
|
"loss": 1.3794, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_bleu": 9.6355, |
|
"eval_gen_len": 24.4365, |
|
"eval_loss": 3.3016440868377686, |
|
"eval_meteor": 0.2488, |
|
"eval_runtime": 72.723, |
|
"eval_samples_per_second": 7.15, |
|
"eval_steps_per_second": 0.894, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.600071236602662e-05, |
|
"loss": 1.3819, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_bleu": 9.9125, |
|
"eval_gen_len": 24.5038, |
|
"eval_loss": 3.298640489578247, |
|
"eval_meteor": 0.248, |
|
"eval_runtime": 71.6443, |
|
"eval_samples_per_second": 7.258, |
|
"eval_steps_per_second": 0.907, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.573087674988397e-05, |
|
"loss": 1.3598, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_bleu": 10.2032, |
|
"eval_gen_len": 24.6558, |
|
"eval_loss": 3.2921295166015625, |
|
"eval_meteor": 0.2534, |
|
"eval_runtime": 74.7575, |
|
"eval_samples_per_second": 6.956, |
|
"eval_steps_per_second": 0.869, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.5461041133741324e-05, |
|
"loss": 1.3846, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_bleu": 9.9342, |
|
"eval_gen_len": 23.9923, |
|
"eval_loss": 3.2913033962249756, |
|
"eval_meteor": 0.2468, |
|
"eval_runtime": 68.6789, |
|
"eval_samples_per_second": 7.571, |
|
"eval_steps_per_second": 0.946, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.5191205517598681e-05, |
|
"loss": 1.4024, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_bleu": 9.445, |
|
"eval_gen_len": 25.3865, |
|
"eval_loss": 3.2889387607574463, |
|
"eval_meteor": 0.2426, |
|
"eval_runtime": 78.1546, |
|
"eval_samples_per_second": 6.653, |
|
"eval_steps_per_second": 0.832, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.4921369901456033e-05, |
|
"loss": 1.3775, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_bleu": 9.711, |
|
"eval_gen_len": 25.5769, |
|
"eval_loss": 3.2912492752075195, |
|
"eval_meteor": 0.2459, |
|
"eval_runtime": 102.461, |
|
"eval_samples_per_second": 5.075, |
|
"eval_steps_per_second": 0.634, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4651534285313387e-05, |
|
"loss": 1.3192, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_bleu": 9.5794, |
|
"eval_gen_len": 25.2038, |
|
"eval_loss": 3.288285970687866, |
|
"eval_meteor": 0.2438, |
|
"eval_runtime": 81.3849, |
|
"eval_samples_per_second": 6.389, |
|
"eval_steps_per_second": 0.799, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4381698669170742e-05, |
|
"loss": 1.3426, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_bleu": 9.9763, |
|
"eval_gen_len": 25.1942, |
|
"eval_loss": 3.282227039337158, |
|
"eval_meteor": 0.2473, |
|
"eval_runtime": 81.2601, |
|
"eval_samples_per_second": 6.399, |
|
"eval_steps_per_second": 0.8, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4111863053028096e-05, |
|
"loss": 1.3669, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_bleu": 9.4662, |
|
"eval_gen_len": 23.9865, |
|
"eval_loss": 3.2904016971588135, |
|
"eval_meteor": 0.2483, |
|
"eval_runtime": 66.1145, |
|
"eval_samples_per_second": 7.865, |
|
"eval_steps_per_second": 0.983, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.384202743688545e-05, |
|
"loss": 1.3624, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_bleu": 9.5602, |
|
"eval_gen_len": 24.1096, |
|
"eval_loss": 3.299604654312134, |
|
"eval_meteor": 0.2452, |
|
"eval_runtime": 72.3193, |
|
"eval_samples_per_second": 7.19, |
|
"eval_steps_per_second": 0.899, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3572191820742805e-05, |
|
"loss": 1.3435, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_bleu": 9.756, |
|
"eval_gen_len": 25.4038, |
|
"eval_loss": 3.2783830165863037, |
|
"eval_meteor": 0.2482, |
|
"eval_runtime": 64.4382, |
|
"eval_samples_per_second": 8.07, |
|
"eval_steps_per_second": 1.009, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.3302356204600157e-05, |
|
"loss": 1.3668, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_bleu": 9.9278, |
|
"eval_gen_len": 24.3115, |
|
"eval_loss": 3.2847084999084473, |
|
"eval_meteor": 0.246, |
|
"eval_runtime": 74.6646, |
|
"eval_samples_per_second": 6.964, |
|
"eval_steps_per_second": 0.871, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.3032520588457511e-05, |
|
"loss": 1.4025, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_bleu": 9.8915, |
|
"eval_gen_len": 24.4904, |
|
"eval_loss": 3.2775745391845703, |
|
"eval_meteor": 0.2502, |
|
"eval_runtime": 76.4639, |
|
"eval_samples_per_second": 6.801, |
|
"eval_steps_per_second": 0.85, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.2762684972314867e-05, |
|
"loss": 1.334, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_bleu": 10.1572, |
|
"eval_gen_len": 25.5462, |
|
"eval_loss": 3.2732129096984863, |
|
"eval_meteor": 0.2489, |
|
"eval_runtime": 88.0016, |
|
"eval_samples_per_second": 5.909, |
|
"eval_steps_per_second": 0.739, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.249284935617222e-05, |
|
"loss": 1.407, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_bleu": 9.9716, |
|
"eval_gen_len": 24.8692, |
|
"eval_loss": 3.2843334674835205, |
|
"eval_meteor": 0.2503, |
|
"eval_runtime": 76.3651, |
|
"eval_samples_per_second": 6.809, |
|
"eval_steps_per_second": 0.851, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.2223013740029574e-05, |
|
"loss": 1.449, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_bleu": 9.7516, |
|
"eval_gen_len": 24.6115, |
|
"eval_loss": 3.26999831199646, |
|
"eval_meteor": 0.2446, |
|
"eval_runtime": 57.4151, |
|
"eval_samples_per_second": 9.057, |
|
"eval_steps_per_second": 1.132, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.1953178123886928e-05, |
|
"loss": 1.3357, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_bleu": 9.7942, |
|
"eval_gen_len": 25.0538, |
|
"eval_loss": 3.2684452533721924, |
|
"eval_meteor": 0.2478, |
|
"eval_runtime": 69.1367, |
|
"eval_samples_per_second": 7.521, |
|
"eval_steps_per_second": 0.94, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1683342507744283e-05, |
|
"loss": 1.3437, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_bleu": 9.9427, |
|
"eval_gen_len": 24.1538, |
|
"eval_loss": 3.2602343559265137, |
|
"eval_meteor": 0.2486, |
|
"eval_runtime": 72.8541, |
|
"eval_samples_per_second": 7.138, |
|
"eval_steps_per_second": 0.892, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.1413506891601637e-05, |
|
"loss": 1.3518, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_bleu": 9.8195, |
|
"eval_gen_len": 25.35, |
|
"eval_loss": 3.2659668922424316, |
|
"eval_meteor": 0.2466, |
|
"eval_runtime": 68.8561, |
|
"eval_samples_per_second": 7.552, |
|
"eval_steps_per_second": 0.944, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.1143671275458991e-05, |
|
"loss": 1.3762, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_bleu": 10.0553, |
|
"eval_gen_len": 24.8635, |
|
"eval_loss": 3.257364511489868, |
|
"eval_meteor": 0.2516, |
|
"eval_runtime": 81.5604, |
|
"eval_samples_per_second": 6.376, |
|
"eval_steps_per_second": 0.797, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.0873835659316345e-05, |
|
"loss": 1.3404, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_bleu": 9.9627, |
|
"eval_gen_len": 23.9154, |
|
"eval_loss": 3.2508692741394043, |
|
"eval_meteor": 0.2509, |
|
"eval_runtime": 55.132, |
|
"eval_samples_per_second": 9.432, |
|
"eval_steps_per_second": 1.179, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.06040000431737e-05, |
|
"loss": 1.3548, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_bleu": 9.8402, |
|
"eval_gen_len": 24.7827, |
|
"eval_loss": 3.260327100753784, |
|
"eval_meteor": 0.2499, |
|
"eval_runtime": 75.4748, |
|
"eval_samples_per_second": 6.89, |
|
"eval_steps_per_second": 0.861, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0334164427031052e-05, |
|
"loss": 1.3642, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_bleu": 9.952, |
|
"eval_gen_len": 24.5327, |
|
"eval_loss": 3.2416298389434814, |
|
"eval_meteor": 0.2534, |
|
"eval_runtime": 74.6757, |
|
"eval_samples_per_second": 6.963, |
|
"eval_steps_per_second": 0.87, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0064328810888408e-05, |
|
"loss": 1.3284, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_bleu": 9.8073, |
|
"eval_gen_len": 25.0212, |
|
"eval_loss": 3.2586073875427246, |
|
"eval_meteor": 0.2499, |
|
"eval_runtime": 76.8596, |
|
"eval_samples_per_second": 6.766, |
|
"eval_steps_per_second": 0.846, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.794493194745761e-06, |
|
"loss": 1.3697, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_bleu": 10.0465, |
|
"eval_gen_len": 24.95, |
|
"eval_loss": 3.2522428035736084, |
|
"eval_meteor": 0.249, |
|
"eval_runtime": 86.1073, |
|
"eval_samples_per_second": 6.039, |
|
"eval_steps_per_second": 0.755, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.524657578603115e-06, |
|
"loss": 1.3718, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_bleu": 9.8922, |
|
"eval_gen_len": 25.0846, |
|
"eval_loss": 3.246650457382202, |
|
"eval_meteor": 0.2499, |
|
"eval_runtime": 78.8353, |
|
"eval_samples_per_second": 6.596, |
|
"eval_steps_per_second": 0.825, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.254821962460469e-06, |
|
"loss": 1.3333, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_bleu": 9.5919, |
|
"eval_gen_len": 24.5365, |
|
"eval_loss": 3.244452953338623, |
|
"eval_meteor": 0.2509, |
|
"eval_runtime": 69.8377, |
|
"eval_samples_per_second": 7.446, |
|
"eval_steps_per_second": 0.931, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.984986346317824e-06, |
|
"loss": 1.4192, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_bleu": 10.1256, |
|
"eval_gen_len": 24.8904, |
|
"eval_loss": 3.230175733566284, |
|
"eval_meteor": 0.2524, |
|
"eval_runtime": 66.1729, |
|
"eval_samples_per_second": 7.858, |
|
"eval_steps_per_second": 0.982, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.715150730175178e-06, |
|
"loss": 1.3068, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_bleu": 10.1132, |
|
"eval_gen_len": 24.5346, |
|
"eval_loss": 3.241743326187134, |
|
"eval_meteor": 0.2533, |
|
"eval_runtime": 80.1867, |
|
"eval_samples_per_second": 6.485, |
|
"eval_steps_per_second": 0.811, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 8.445315114032532e-06, |
|
"loss": 1.4282, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_bleu": 9.95, |
|
"eval_gen_len": 24.3462, |
|
"eval_loss": 3.242541551589966, |
|
"eval_meteor": 0.2509, |
|
"eval_runtime": 61.0641, |
|
"eval_samples_per_second": 8.516, |
|
"eval_steps_per_second": 1.064, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.175479497889886e-06, |
|
"loss": 1.4073, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_bleu": 10.0739, |
|
"eval_gen_len": 25.3173, |
|
"eval_loss": 3.234609842300415, |
|
"eval_meteor": 0.2523, |
|
"eval_runtime": 76.5129, |
|
"eval_samples_per_second": 6.796, |
|
"eval_steps_per_second": 0.85, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 7.905643881747241e-06, |
|
"loss": 1.3717, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_bleu": 10.2478, |
|
"eval_gen_len": 24.7577, |
|
"eval_loss": 3.2297909259796143, |
|
"eval_meteor": 0.2543, |
|
"eval_runtime": 79.2758, |
|
"eval_samples_per_second": 6.559, |
|
"eval_steps_per_second": 0.82, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.635808265604593e-06, |
|
"loss": 1.3212, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_bleu": 9.9788, |
|
"eval_gen_len": 24.3962, |
|
"eval_loss": 3.2253217697143555, |
|
"eval_meteor": 0.2558, |
|
"eval_runtime": 74.6105, |
|
"eval_samples_per_second": 6.97, |
|
"eval_steps_per_second": 0.871, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 7.3659726494619485e-06, |
|
"loss": 1.3623, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_bleu": 10.1533, |
|
"eval_gen_len": 24.2692, |
|
"eval_loss": 3.225541830062866, |
|
"eval_meteor": 0.2516, |
|
"eval_runtime": 76.3252, |
|
"eval_samples_per_second": 6.813, |
|
"eval_steps_per_second": 0.852, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.096137033319303e-06, |
|
"loss": 1.2651, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_bleu": 10.1154, |
|
"eval_gen_len": 24.7058, |
|
"eval_loss": 3.2230679988861084, |
|
"eval_meteor": 0.2535, |
|
"eval_runtime": 78.0816, |
|
"eval_samples_per_second": 6.66, |
|
"eval_steps_per_second": 0.832, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.826301417176656e-06, |
|
"loss": 1.3287, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_bleu": 10.0863, |
|
"eval_gen_len": 24.5962, |
|
"eval_loss": 3.2264089584350586, |
|
"eval_meteor": 0.2544, |
|
"eval_runtime": 100.662, |
|
"eval_samples_per_second": 5.166, |
|
"eval_steps_per_second": 0.646, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.556465801034011e-06, |
|
"loss": 1.33, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_bleu": 10.1522, |
|
"eval_gen_len": 25.4885, |
|
"eval_loss": 3.221473217010498, |
|
"eval_meteor": 0.2513, |
|
"eval_runtime": 70.9756, |
|
"eval_samples_per_second": 7.326, |
|
"eval_steps_per_second": 0.916, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.286630184891365e-06, |
|
"loss": 1.2862, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_bleu": 10.1425, |
|
"eval_gen_len": 25.1442, |
|
"eval_loss": 3.226158618927002, |
|
"eval_meteor": 0.2538, |
|
"eval_runtime": 74.1814, |
|
"eval_samples_per_second": 7.01, |
|
"eval_steps_per_second": 0.876, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.016794568748719e-06, |
|
"loss": 1.3738, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_bleu": 10.1001, |
|
"eval_gen_len": 25.1846, |
|
"eval_loss": 3.2149925231933594, |
|
"eval_meteor": 0.255, |
|
"eval_runtime": 81.4268, |
|
"eval_samples_per_second": 6.386, |
|
"eval_steps_per_second": 0.798, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.746958952606073e-06, |
|
"loss": 1.3141, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_bleu": 10.2016, |
|
"eval_gen_len": 24.6769, |
|
"eval_loss": 3.217388391494751, |
|
"eval_meteor": 0.2549, |
|
"eval_runtime": 75.3151, |
|
"eval_samples_per_second": 6.904, |
|
"eval_steps_per_second": 0.863, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.4771233364634265e-06, |
|
"loss": 1.3326, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_bleu": 10.2847, |
|
"eval_gen_len": 24.2038, |
|
"eval_loss": 3.2135159969329834, |
|
"eval_meteor": 0.2529, |
|
"eval_runtime": 74.3155, |
|
"eval_samples_per_second": 6.997, |
|
"eval_steps_per_second": 0.875, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.207287720320781e-06, |
|
"loss": 1.3112, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_bleu": 10.1374, |
|
"eval_gen_len": 24.7308, |
|
"eval_loss": 3.2098002433776855, |
|
"eval_meteor": 0.2572, |
|
"eval_runtime": 71.2774, |
|
"eval_samples_per_second": 7.295, |
|
"eval_steps_per_second": 0.912, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.937452104178135e-06, |
|
"loss": 1.3101, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_bleu": 10.0941, |
|
"eval_gen_len": 24.6654, |
|
"eval_loss": 3.206242084503174, |
|
"eval_meteor": 0.256, |
|
"eval_runtime": 68.2494, |
|
"eval_samples_per_second": 7.619, |
|
"eval_steps_per_second": 0.952, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.667616488035489e-06, |
|
"loss": 1.3403, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_bleu": 10.1955, |
|
"eval_gen_len": 24.8115, |
|
"eval_loss": 3.2050940990448, |
|
"eval_meteor": 0.2569, |
|
"eval_runtime": 67.097, |
|
"eval_samples_per_second": 7.75, |
|
"eval_steps_per_second": 0.969, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.397780871892843e-06, |
|
"loss": 1.3651, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_bleu": 10.0488, |
|
"eval_gen_len": 25.5019, |
|
"eval_loss": 3.209751605987549, |
|
"eval_meteor": 0.2581, |
|
"eval_runtime": 61.2968, |
|
"eval_samples_per_second": 8.483, |
|
"eval_steps_per_second": 1.06, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.127945255750197e-06, |
|
"loss": 1.3243, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_bleu": 10.1076, |
|
"eval_gen_len": 24.7385, |
|
"eval_loss": 3.202362298965454, |
|
"eval_meteor": 0.2567, |
|
"eval_runtime": 68.0409, |
|
"eval_samples_per_second": 7.642, |
|
"eval_steps_per_second": 0.955, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.858109639607551e-06, |
|
"loss": 1.3147, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_bleu": 10.0392, |
|
"eval_gen_len": 25.1269, |
|
"eval_loss": 3.2062337398529053, |
|
"eval_meteor": 0.2544, |
|
"eval_runtime": 71.9112, |
|
"eval_samples_per_second": 7.231, |
|
"eval_steps_per_second": 0.904, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.5882740234649054e-06, |
|
"loss": 1.2663, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_bleu": 10.2884, |
|
"eval_gen_len": 24.925, |
|
"eval_loss": 3.2030699253082275, |
|
"eval_meteor": 0.2588, |
|
"eval_runtime": 70.7104, |
|
"eval_samples_per_second": 7.354, |
|
"eval_steps_per_second": 0.919, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.3184384073222596e-06, |
|
"loss": 1.3497, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_bleu": 10.1993, |
|
"eval_gen_len": 25.0269, |
|
"eval_loss": 3.208573579788208, |
|
"eval_meteor": 0.2542, |
|
"eval_runtime": 73.9279, |
|
"eval_samples_per_second": 7.034, |
|
"eval_steps_per_second": 0.879, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.0486027911796137e-06, |
|
"loss": 1.3252, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_bleu": 10.1155, |
|
"eval_gen_len": 24.7173, |
|
"eval_loss": 3.2008321285247803, |
|
"eval_meteor": 0.2575, |
|
"eval_runtime": 68.983, |
|
"eval_samples_per_second": 7.538, |
|
"eval_steps_per_second": 0.942, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.778767175036968e-06, |
|
"loss": 1.277, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_bleu": 10.3075, |
|
"eval_gen_len": 24.5346, |
|
"eval_loss": 3.2011780738830566, |
|
"eval_meteor": 0.2592, |
|
"eval_runtime": 67.2436, |
|
"eval_samples_per_second": 7.733, |
|
"eval_steps_per_second": 0.967, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.5089315588943217e-06, |
|
"loss": 1.3134, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_bleu": 10.31, |
|
"eval_gen_len": 24.4, |
|
"eval_loss": 3.2086844444274902, |
|
"eval_meteor": 0.2565, |
|
"eval_runtime": 90.4691, |
|
"eval_samples_per_second": 5.748, |
|
"eval_steps_per_second": 0.718, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.239095942751676e-06, |
|
"loss": 1.371, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_bleu": 10.3715, |
|
"eval_gen_len": 24.3385, |
|
"eval_loss": 3.206083297729492, |
|
"eval_meteor": 0.2597, |
|
"eval_runtime": 67.5985, |
|
"eval_samples_per_second": 7.692, |
|
"eval_steps_per_second": 0.962, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.96926032660903e-06, |
|
"loss": 1.2951, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_bleu": 10.415, |
|
"eval_gen_len": 24.4481, |
|
"eval_loss": 3.2051799297332764, |
|
"eval_meteor": 0.2597, |
|
"eval_runtime": 70.0734, |
|
"eval_samples_per_second": 7.421, |
|
"eval_steps_per_second": 0.928, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.6994247104663838e-06, |
|
"loss": 1.2891, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_bleu": 10.3082, |
|
"eval_gen_len": 24.3154, |
|
"eval_loss": 3.2073869705200195, |
|
"eval_meteor": 0.2566, |
|
"eval_runtime": 67.5441, |
|
"eval_samples_per_second": 7.699, |
|
"eval_steps_per_second": 0.962, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.429589094323738e-06, |
|
"loss": 1.3057, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_bleu": 10.3117, |
|
"eval_gen_len": 24.7635, |
|
"eval_loss": 3.204622745513916, |
|
"eval_meteor": 0.2582, |
|
"eval_runtime": 67.7948, |
|
"eval_samples_per_second": 7.67, |
|
"eval_steps_per_second": 0.959, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1597534781810922e-06, |
|
"loss": 1.294, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_bleu": 10.4707, |
|
"eval_gen_len": 24.8135, |
|
"eval_loss": 3.204620838165283, |
|
"eval_meteor": 0.2593, |
|
"eval_runtime": 69.6992, |
|
"eval_samples_per_second": 7.461, |
|
"eval_steps_per_second": 0.933, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.899178620384462e-07, |
|
"loss": 1.2979, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_bleu": 10.179, |
|
"eval_gen_len": 24.8058, |
|
"eval_loss": 3.2011451721191406, |
|
"eval_meteor": 0.2588, |
|
"eval_runtime": 76.3465, |
|
"eval_samples_per_second": 6.811, |
|
"eval_steps_per_second": 0.851, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.200822458958003e-07, |
|
"loss": 1.3096, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_bleu": 10.407, |
|
"eval_gen_len": 24.5077, |
|
"eval_loss": 3.199586868286133, |
|
"eval_meteor": 0.2588, |
|
"eval_runtime": 68.5257, |
|
"eval_samples_per_second": 7.588, |
|
"eval_steps_per_second": 0.949, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.502466297531544e-07, |
|
"loss": 1.3779, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_bleu": 10.2969, |
|
"eval_gen_len": 24.6481, |
|
"eval_loss": 3.2000153064727783, |
|
"eval_meteor": 0.2567, |
|
"eval_runtime": 66.7091, |
|
"eval_samples_per_second": 7.795, |
|
"eval_steps_per_second": 0.974, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.041101361050848e-08, |
|
"loss": 1.3578, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 10.295, |
|
"eval_gen_len": 24.6423, |
|
"eval_loss": 3.2001755237579346, |
|
"eval_meteor": 0.2573, |
|
"eval_runtime": 74.0674, |
|
"eval_samples_per_second": 7.021, |
|
"eval_steps_per_second": 0.878, |
|
"step": 92500 |
|
} |
|
], |
|
"max_steps": 92649, |
|
"num_train_epochs": 1, |
|
"total_flos": 1.0046721126039552e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|