|
{ |
|
"best_metric": 0.280742088680976, |
|
"best_model_checkpoint": "ru_mbart_logs/checkpoint-14000", |
|
"epoch": 2.9804727646454268, |
|
"global_step": 14500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.555555555555555e-05, |
|
"loss": 4.9731, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001111111111111111, |
|
"loss": 1.2784, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00016666666666666666, |
|
"loss": 1.313, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002222222222222222, |
|
"loss": 1.1394, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002777777777777778, |
|
"loss": 1.1607, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0003333333333333333, |
|
"loss": 3.2347, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0003888888888888889, |
|
"loss": 2.2791, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004444444444444444, |
|
"loss": 8.2138, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005, |
|
"loss": 4.9492, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999994136267871, |
|
"loss": 3.2816, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_bleu": 0.0, |
|
"eval_loss": 3.028618335723877, |
|
"eval_meteor": 0.045991140537292335, |
|
"eval_rouge1": 0.083013175538275, |
|
"eval_rouge2": 0.009363175378045967, |
|
"eval_rougeL": 0.06624194157798126, |
|
"eval_rougeLsum": 0.06621718230538194, |
|
"eval_runtime": 2492.2951, |
|
"eval_samples_per_second": 0.585, |
|
"eval_steps_per_second": 0.073, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499997654509899, |
|
"loss": 3.1454, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499994722657588, |
|
"loss": 2.9606, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999906180836069, |
|
"loss": 2.6635, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999853408072106, |
|
"loss": 2.7116, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999788908531545, |
|
"loss": 2.6618, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999712682516954, |
|
"loss": 2.6988, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004999624730385907, |
|
"loss": 2.4658, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004999525052550985, |
|
"loss": 2.317, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004999413649479778, |
|
"loss": 4.1231, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004999290521694876, |
|
"loss": 2.4733, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_bleu": 0.0, |
|
"eval_loss": 2.3232309818267822, |
|
"eval_meteor": 0.0005584369361758735, |
|
"eval_rouge1": 0.1220294006089088, |
|
"eval_rouge2": 0.038510079219977386, |
|
"eval_rougeL": 0.12194457729623655, |
|
"eval_rougeLsum": 0.12202969830945165, |
|
"eval_runtime": 1904.551, |
|
"eval_samples_per_second": 0.766, |
|
"eval_steps_per_second": 0.096, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004999155669773867, |
|
"loss": 2.3789, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004999009094349342, |
|
"loss": 2.2094, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004998850796108883, |
|
"loss": 2.2732, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004998680775795066, |
|
"loss": 2.1383, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004998499034205451, |
|
"loss": 2.0062, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000499830557219259, |
|
"loss": 1.857, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004998100390664006, |
|
"loss": 1.672, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004997883490582204, |
|
"loss": 1.5426, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000499765487296466, |
|
"loss": 1.7417, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004997414538883816, |
|
"loss": 1.539, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_bleu": 0.0, |
|
"eval_loss": 1.5221240520477295, |
|
"eval_meteor": 0.0005584369361758735, |
|
"eval_rouge1": 0.1220294006089088, |
|
"eval_rouge2": 0.038510079219977386, |
|
"eval_rougeL": 0.12194457729623655, |
|
"eval_rougeLsum": 0.12202969830945165, |
|
"eval_runtime": 390.81, |
|
"eval_samples_per_second": 3.731, |
|
"eval_steps_per_second": 0.468, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004997162489467074, |
|
"loss": 1.5757, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004996898725896796, |
|
"loss": 1.4948, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004996623249410294, |
|
"loss": 1.5425, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004996336061299821, |
|
"loss": 1.4704, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004996037162912575, |
|
"loss": 1.5135, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004995726555650683, |
|
"loss": 1.3678, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00049954042409712, |
|
"loss": 1.3672, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004995070220386097, |
|
"loss": 1.3784, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004994724495462264, |
|
"loss": 1.3445, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004994367067821489, |
|
"loss": 1.4009, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_bleu": 0.0028638180772932727, |
|
"eval_loss": 1.3569462299346924, |
|
"eval_meteor": 0.022276797735191377, |
|
"eval_rouge1": 0.1502204594660534, |
|
"eval_rouge2": 0.059776740808855415, |
|
"eval_rougeL": 0.14355713867260528, |
|
"eval_rougeLsum": 0.14362003627082465, |
|
"eval_runtime": 407.6791, |
|
"eval_samples_per_second": 3.576, |
|
"eval_steps_per_second": 0.449, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000499399793914046, |
|
"loss": 1.3495, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004993617111150755, |
|
"loss": 1.3356, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004993224585638832, |
|
"loss": 1.3655, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004992820364446023, |
|
"loss": 1.2685, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004992404449468526, |
|
"loss": 1.3353, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004991976842657387, |
|
"loss": 1.3435, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004991537546018509, |
|
"loss": 1.3172, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004991086561612622, |
|
"loss": 1.2647, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000499062389155529, |
|
"loss": 1.3039, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004990149538016889, |
|
"loss": 1.2847, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_bleu": 0.009859969701517706, |
|
"eval_loss": 1.2978057861328125, |
|
"eval_meteor": 0.043245724856651784, |
|
"eval_rouge1": 0.15900718322863383, |
|
"eval_rouge2": 0.060765617909845526, |
|
"eval_rougeL": 0.14802442112090847, |
|
"eval_rougeLsum": 0.14816745409778714, |
|
"eval_runtime": 562.9643, |
|
"eval_samples_per_second": 2.59, |
|
"eval_steps_per_second": 0.325, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004989663503222609, |
|
"loss": 1.2559, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004989165789452427, |
|
"loss": 1.3557, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004988656399041116, |
|
"loss": 1.2848, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004988135334378217, |
|
"loss": 1.2924, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004987602597908037, |
|
"loss": 1.2525, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004987058192129634, |
|
"loss": 1.3187, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004986502119596809, |
|
"loss": 1.2166, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004985934382918091, |
|
"loss": 1.2256, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004985354984756722, |
|
"loss": 1.2151, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004984763927830654, |
|
"loss": 1.183, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_bleu": 0.01313359470552915, |
|
"eval_loss": 1.232086420059204, |
|
"eval_meteor": 0.04097914816255519, |
|
"eval_rouge1": 0.15507333555153613, |
|
"eval_rouge2": 0.051065754965557514, |
|
"eval_rougeL": 0.1387392246882742, |
|
"eval_rougeLsum": 0.1390655908145888, |
|
"eval_runtime": 636.0052, |
|
"eval_samples_per_second": 2.292, |
|
"eval_steps_per_second": 0.288, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0004984161214912524, |
|
"loss": 1.15, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0004983546848829651, |
|
"loss": 1.2152, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0004982920832464016, |
|
"loss": 1.2307, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0004982283168752255, |
|
"loss": 1.2264, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0004981633860685638, |
|
"loss": 1.2326, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0004980972911310059, |
|
"loss": 1.3449, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0004980300323726024, |
|
"loss": 1.2571, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0004979616101088631, |
|
"loss": 1.1583, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0004978920246607557, |
|
"loss": 1.19, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0004978212763547049, |
|
"loss": 1.2162, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_bleu": 0.009657587648715391, |
|
"eval_loss": 1.2419497966766357, |
|
"eval_meteor": 0.035793586448626104, |
|
"eval_rouge1": 0.16630455945332606, |
|
"eval_rouge2": 0.05151895138829819, |
|
"eval_rougeL": 0.1442260549339967, |
|
"eval_rougeLsum": 0.14434908123875545, |
|
"eval_runtime": 654.0521, |
|
"eval_samples_per_second": 2.229, |
|
"eval_steps_per_second": 0.28, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0004977493655225898, |
|
"loss": 1.1948, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0004976762925017428, |
|
"loss": 1.2191, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0004976020576349489, |
|
"loss": 1.2921, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0004975266612704425, |
|
"loss": 1.214, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0004974501037619069, |
|
"loss": 1.1724, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004973723854684723, |
|
"loss": 1.2448, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004972935067547141, |
|
"loss": 1.2486, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004972134679906513, |
|
"loss": 1.2516, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004971322695517445, |
|
"loss": 1.3022, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004970499118188944, |
|
"loss": 1.2272, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_bleu": 0.0062536707895871615, |
|
"eval_loss": 1.2070457935333252, |
|
"eval_meteor": 0.03694972977014645, |
|
"eval_rouge1": 0.1627443709613266, |
|
"eval_rouge2": 0.054809622326817145, |
|
"eval_rougeL": 0.1526303543192113, |
|
"eval_rougeLsum": 0.15279303223888713, |
|
"eval_runtime": 485.0336, |
|
"eval_samples_per_second": 3.006, |
|
"eval_steps_per_second": 0.377, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00049696639517844, |
|
"loss": 1.2424, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0004968817200221567, |
|
"loss": 1.2365, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0004967958867472544, |
|
"loss": 1.1935, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0004967088957563758, |
|
"loss": 1.2296, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0004966207474575943, |
|
"loss": 1.2383, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0004965314422644124, |
|
"loss": 1.1781, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0004964409805957594, |
|
"loss": 1.2159, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0004963493628759899, |
|
"loss": 1.1701, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.000496256589534881, |
|
"loss": 1.2066, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0004961626610076314, |
|
"loss": 1.1393, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_bleu": 0.010000616116201889, |
|
"eval_loss": 1.1901487112045288, |
|
"eval_meteor": 0.04520784243559117, |
|
"eval_rouge1": 0.17628204600152494, |
|
"eval_rouge2": 0.0588475469962297, |
|
"eval_rougeL": 0.15722604946522803, |
|
"eval_rougeLsum": 0.15731352231060353, |
|
"eval_runtime": 679.7977, |
|
"eval_samples_per_second": 2.145, |
|
"eval_steps_per_second": 0.269, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0004960675777348583, |
|
"loss": 1.1844, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000495971340162596, |
|
"loss": 1.176, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0004958739487422937, |
|
"loss": 1.1228, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000495775403930813, |
|
"loss": 1.1687, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0004956757061904263, |
|
"loss": 1.2342, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0004955748559888143, |
|
"loss": 1.1426, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0004954728537990637, |
|
"loss": 1.2027, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0004953697000996656, |
|
"loss": 1.1136, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.000495265395374512, |
|
"loss": 1.1333, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0004951599401128955, |
|
"loss": 1.243, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_bleu": 0.012845950838959942, |
|
"eval_loss": 1.1657328605651855, |
|
"eval_meteor": 0.05173192798918986, |
|
"eval_rouge1": 0.182811631461606, |
|
"eval_rouge2": 0.06330065615029906, |
|
"eval_rougeL": 0.1628025320417283, |
|
"eval_rougeLsum": 0.16301681919606037, |
|
"eval_runtime": 633.5783, |
|
"eval_samples_per_second": 2.301, |
|
"eval_steps_per_second": 0.289, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.000495053334809505, |
|
"loss": 1.1282, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0004949455799644242, |
|
"loss": 1.1576, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0004948366760831298, |
|
"loss": 1.1904, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0004947266236764883, |
|
"loss": 1.1469, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004946154232607539, |
|
"loss": 1.1207, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004945030753575663, |
|
"loss": 1.1862, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004943895804939478, |
|
"loss": 1.1074, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004942749392023012, |
|
"loss": 1.1467, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0004941591520204071, |
|
"loss": 1.1898, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004940422194914215, |
|
"loss": 1.143, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_bleu": 0.018382069123601224, |
|
"eval_loss": 1.1688802242279053, |
|
"eval_meteor": 0.05501822075827951, |
|
"eval_rouge1": 0.17577633666706594, |
|
"eval_rouge2": 0.05577900637209712, |
|
"eval_rougeL": 0.15110984514022763, |
|
"eval_rougeLsum": 0.15134591565121136, |
|
"eval_runtime": 766.6313, |
|
"eval_samples_per_second": 1.902, |
|
"eval_steps_per_second": 0.239, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004939241421638735, |
|
"loss": 1.2294, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004938049205916619, |
|
"loss": 1.1228, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004936845553340535, |
|
"loss": 1.1522, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0004935630469556798, |
|
"loss": 1.1155, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.000493440396026535, |
|
"loss": 1.1141, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.000493316603121973, |
|
"loss": 1.157, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0004931916688227043, |
|
"loss": 1.1317, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0004930655937147941, |
|
"loss": 1.2138, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0004929383783896589, |
|
"loss": 1.2032, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0004928100234440637, |
|
"loss": 1.1688, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_bleu": 0.008582583334062774, |
|
"eval_loss": 1.1682066917419434, |
|
"eval_meteor": 0.031457014797108926, |
|
"eval_rouge1": 0.17121377190147105, |
|
"eval_rouge2": 0.057846020337606815, |
|
"eval_rougeL": 0.15422867590713454, |
|
"eval_rougeLsum": 0.1543636089792762, |
|
"eval_runtime": 570.9807, |
|
"eval_samples_per_second": 2.554, |
|
"eval_steps_per_second": 0.321, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0004926805294801202, |
|
"loss": 1.2093, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0004925498971052823, |
|
"loss": 1.1794, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0004924181269323448, |
|
"loss": 1.1593, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0004922852195794399, |
|
"loss": 1.1601, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0004921511756700337, |
|
"loss": 1.171, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0004920159958329243, |
|
"loss": 1.1814, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0004918796807022387, |
|
"loss": 1.0879, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0004917422309174289, |
|
"loss": 1.1527, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0004916036471232702, |
|
"loss": 1.1435, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0004914639299698568, |
|
"loss": 1.119, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_bleu": 0.01477460828823043, |
|
"eval_loss": 1.148748755455017, |
|
"eval_meteor": 0.05210128540388615, |
|
"eval_rouge1": 0.18454727697727336, |
|
"eval_rouge2": 0.06442555829241428, |
|
"eval_rougeL": 0.1643417771219982, |
|
"eval_rougeLsum": 0.16463861663679646, |
|
"eval_runtime": 684.638, |
|
"eval_samples_per_second": 2.13, |
|
"eval_steps_per_second": 0.267, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0004913230801126003, |
|
"loss": 1.132, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.000491181098212225, |
|
"loss": 1.117, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0004910379849347662, |
|
"loss": 1.1357, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.000490893740951566, |
|
"loss": 1.1272, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0004907483669392711, |
|
"loss": 1.1355, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0004906018635798289, |
|
"loss": 1.1075, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0004904542315604844, |
|
"loss": 1.1385, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0004903054715737774, |
|
"loss": 1.1424, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000490155584317539, |
|
"loss": 1.06, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0004900045704948878, |
|
"loss": 1.1614, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_bleu": 0.023573391639295775, |
|
"eval_loss": 1.1484849452972412, |
|
"eval_meteor": 0.06942264573323907, |
|
"eval_rouge1": 0.18902812961792426, |
|
"eval_rouge2": 0.05690246981223483, |
|
"eval_rougeL": 0.1550334852621951, |
|
"eval_rougeLsum": 0.1552537149483944, |
|
"eval_runtime": 988.684, |
|
"eval_samples_per_second": 1.475, |
|
"eval_steps_per_second": 0.185, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000489852430814228, |
|
"loss": 1.1194, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0004896991659892442, |
|
"loss": 1.1006, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0004895447767388996, |
|
"loss": 1.1342, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004893892637874321, |
|
"loss": 1.1826, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004892326278643507, |
|
"loss": 1.137, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004890748697044324, |
|
"loss": 1.0829, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004889159900477182, |
|
"loss": 1.086, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0004887559896395104, |
|
"loss": 1.1076, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0004885948692303687, |
|
"loss": 1.1521, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0004884326295761066, |
|
"loss": 1.0416, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_bleu": 0.02023032338072932, |
|
"eval_loss": 1.1363707780838013, |
|
"eval_meteor": 0.060080221459470236, |
|
"eval_rouge1": 0.199062276310761, |
|
"eval_rouge2": 0.06488216735306115, |
|
"eval_rougeL": 0.1691458689038896, |
|
"eval_rougeLsum": 0.1694674376703953, |
|
"eval_runtime": 815.0153, |
|
"eval_samples_per_second": 1.789, |
|
"eval_steps_per_second": 0.225, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0004882692714377881, |
|
"loss": 1.102, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00048810479558172366, |
|
"loss": 1.1374, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00048793920277946727, |
|
"loss": 1.0137, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00048777249380781254, |
|
"loss": 1.1253, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00048760466944878866, |
|
"loss": 1.1808, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0004874357304896574, |
|
"loss": 1.1696, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00048726567772290895, |
|
"loss": 1.109, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00048709451194625853, |
|
"loss": 1.1289, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0004869222339626422, |
|
"loss": 1.1529, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0004867488445802137, |
|
"loss": 1.1087, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_bleu": 0.021715410751103448, |
|
"eval_loss": 1.1409450769424438, |
|
"eval_meteor": 0.06723604872210975, |
|
"eval_rouge1": 0.19551341975142214, |
|
"eval_rouge2": 0.06091614991260254, |
|
"eval_rougeL": 0.16373059838285065, |
|
"eval_rougeLsum": 0.16395559757301714, |
|
"eval_runtime": 932.3291, |
|
"eval_samples_per_second": 1.564, |
|
"eval_steps_per_second": 0.196, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00048657434461233995, |
|
"loss": 1.1298, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00048639873487759793, |
|
"loss": 1.1784, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00048622201619977033, |
|
"loss": 1.0544, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.000486044189407842, |
|
"loss": 1.1239, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00048586525533599577, |
|
"loss": 1.0703, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00048568521482360896, |
|
"loss": 1.1142, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0004855040687152489, |
|
"loss": 1.1749, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0004853218178606695, |
|
"loss": 1.1147, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0004851384631148069, |
|
"loss": 1.0897, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0004849540053377756, |
|
"loss": 1.1196, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_bleu": 0.025043350736850607, |
|
"eval_loss": 1.1154279708862305, |
|
"eval_meteor": 0.07231447401740981, |
|
"eval_rouge1": 0.20616954143165644, |
|
"eval_rouge2": 0.06344537120205161, |
|
"eval_rougeL": 0.1701292313379799, |
|
"eval_rougeLsum": 0.1703542897990762, |
|
"eval_runtime": 874.5506, |
|
"eval_samples_per_second": 1.667, |
|
"eval_steps_per_second": 0.209, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00048476844539486433, |
|
"loss": 1.0944, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00048458178415653207, |
|
"loss": 1.1292, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00048439402249840415, |
|
"loss": 1.096, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0004842051613012678, |
|
"loss": 1.0937, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0004840152014510682, |
|
"loss": 1.0264, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00048382414383890427, |
|
"loss": 1.0931, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0004836319893610245, |
|
"loss": 1.1553, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0004834387389188229, |
|
"loss": 1.1353, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0004832443934188344, |
|
"loss": 1.0572, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00048304895377273107, |
|
"loss": 1.0778, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_bleu": 0.02264090954365281, |
|
"eval_loss": 1.1103206872940063, |
|
"eval_meteor": 0.07031187093630956, |
|
"eval_rouge1": 0.20903350895884776, |
|
"eval_rouge2": 0.0679566179794408, |
|
"eval_rougeL": 0.17674379685925287, |
|
"eval_rougeLsum": 0.176879851870817, |
|
"eval_runtime": 770.6794, |
|
"eval_samples_per_second": 1.892, |
|
"eval_steps_per_second": 0.237, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00048285242089731746, |
|
"loss": 1.0599, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0004826547957145264, |
|
"loss": 1.1592, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.000482456079151415, |
|
"loss": 1.0627, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0004822562721401596, |
|
"loss": 1.1204, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00048205537561805213, |
|
"loss": 1.1346, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00048185339052749535, |
|
"loss": 1.149, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0004816503178159983, |
|
"loss": 1.1071, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0004814461584361723, |
|
"loss": 1.0793, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.000481240913345726, |
|
"loss": 1.131, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0004810345835074612, |
|
"loss": 1.0848, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_bleu": 0.027063067858456365, |
|
"eval_loss": 1.0963716506958008, |
|
"eval_meteor": 0.07338060906025744, |
|
"eval_rouge1": 0.2107393337255915, |
|
"eval_rouge2": 0.06681650920596519, |
|
"eval_rougeL": 0.17354803894931026, |
|
"eval_rougeLsum": 0.1737987890813374, |
|
"eval_runtime": 874.0014, |
|
"eval_samples_per_second": 1.668, |
|
"eval_steps_per_second": 0.209, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0004808271698892682, |
|
"loss": 1.0827, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0004806186734641215, |
|
"loss": 1.041, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0004804090952100745, |
|
"loss": 1.0852, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00048019843611025613, |
|
"loss": 1.0835, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.000479986697152865, |
|
"loss": 1.1008, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0004797738793311657, |
|
"loss": 1.13, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0004795599836434834, |
|
"loss": 1.0689, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0004793450110931998, |
|
"loss": 1.0651, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00047912896268874816, |
|
"loss": 1.1109, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0004789118394436083, |
|
"loss": 1.0336, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_bleu": 0.027231353849381447, |
|
"eval_loss": 1.094221830368042, |
|
"eval_meteor": 0.0753991656515833, |
|
"eval_rouge1": 0.21374757345629292, |
|
"eval_rouge2": 0.06791530453078388, |
|
"eval_rougeL": 0.17498156235223256, |
|
"eval_rougeLsum": 0.17516252143383396, |
|
"eval_runtime": 936.8848, |
|
"eval_samples_per_second": 1.556, |
|
"eval_steps_per_second": 0.195, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0004786936423763024, |
|
"loss": 1.063, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0004784743725103896, |
|
"loss": 1.0337, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00047825403087446196, |
|
"loss": 1.076, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00047803261850213864, |
|
"loss": 1.1623, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00047781013643206216, |
|
"loss": 1.1571, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0004775865857078927, |
|
"loss": 1.0718, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00047736196737830337, |
|
"loss": 1.0702, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0004771362824969756, |
|
"loss": 0.9846, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.000476909532122594, |
|
"loss": 1.0808, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0004766817173188414, |
|
"loss": 1.1061, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_bleu": 0.022198000912556214, |
|
"eval_loss": 1.0857281684875488, |
|
"eval_meteor": 0.06192256854979087, |
|
"eval_rouge1": 0.20738858318255726, |
|
"eval_rouge2": 0.07232459830507468, |
|
"eval_rougeL": 0.17725116971849175, |
|
"eval_rougeLsum": 0.17743755907754893, |
|
"eval_runtime": 788.3103, |
|
"eval_samples_per_second": 1.85, |
|
"eval_steps_per_second": 0.232, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0004764528391543935, |
|
"loss": 1.06, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00047622289870291487, |
|
"loss": 1.045, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00047599189704305257, |
|
"loss": 1.0685, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00047575983525843227, |
|
"loss": 1.0746, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00047552671443765236, |
|
"loss": 1.043, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00047529253567427945, |
|
"loss": 1.1152, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0004750573000668426, |
|
"loss": 1.0859, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00047482100871882877, |
|
"loss": 1.056, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00047458366273867724, |
|
"loss": 1.0762, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0004743452632397747, |
|
"loss": 1.09, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_bleu": 0.027841748781480522, |
|
"eval_loss": 1.0797735452651978, |
|
"eval_meteor": 0.07640824352785222, |
|
"eval_rouge1": 0.2190990298244696, |
|
"eval_rouge2": 0.07340646071616252, |
|
"eval_rougeL": 0.1813701022286835, |
|
"eval_rougeLsum": 0.18162416265454912, |
|
"eval_runtime": 858.6907, |
|
"eval_samples_per_second": 1.698, |
|
"eval_steps_per_second": 0.213, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0004741058113404497, |
|
"loss": 1.1197, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00047386530816396767, |
|
"loss": 0.9979, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0004736237548385256, |
|
"loss": 1.1263, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0004733811524972468, |
|
"loss": 1.0688, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00047313750227817515, |
|
"loss": 1.0257, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0004728928053242705, |
|
"loss": 1.0151, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0004726470627834028, |
|
"loss": 1.0518, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0004724002758083467, |
|
"loss": 1.0517, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0004721524455567764, |
|
"loss": 1.0592, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0004719035731912601, |
|
"loss": 0.9817, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_bleu": 0.023255094924167417, |
|
"eval_loss": 1.0640439987182617, |
|
"eval_meteor": 0.0694763133950018, |
|
"eval_rouge1": 0.21614880424035815, |
|
"eval_rouge2": 0.07107065078607677, |
|
"eval_rougeL": 0.18083940465396262, |
|
"eval_rougeLsum": 0.1811207425056119, |
|
"eval_runtime": 783.0888, |
|
"eval_samples_per_second": 1.862, |
|
"eval_steps_per_second": 0.234, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0004716536598792545, |
|
"loss": 1.0138, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00047140270679309925, |
|
"loss": 1.0165, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00047115071511001183, |
|
"loss": 1.0509, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0004708976860120817, |
|
"loss": 1.0364, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0004706436206862645, |
|
"loss": 1.0691, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0004703885203243772, |
|
"loss": 1.0763, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00047013238612309195, |
|
"loss": 1.1142, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0004698752192839305, |
|
"loss": 0.99, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.000469617021013259, |
|
"loss": 1.0117, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00046935779252228174, |
|
"loss": 0.9907, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_bleu": 0.024217670789048625, |
|
"eval_loss": 1.0585440397262573, |
|
"eval_meteor": 0.06668694853033096, |
|
"eval_rouge1": 0.21910640475976334, |
|
"eval_rouge2": 0.0780574669597971, |
|
"eval_rougeL": 0.18597736462001008, |
|
"eval_rougeLsum": 0.1862125937949618, |
|
"eval_runtime": 751.5448, |
|
"eval_samples_per_second": 1.94, |
|
"eval_steps_per_second": 0.243, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0004690975350270359, |
|
"loss": 1.0281, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00046883624974838564, |
|
"loss": 1.0084, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0004685739379120164, |
|
"loss": 1.0314, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00046831060074842946, |
|
"loss": 1.0628, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0004680462394929355, |
|
"loss": 1.0563, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0004677808553856494, |
|
"loss": 1.0643, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0004675144496714844, |
|
"loss": 1.038, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0004672470236001456, |
|
"loss": 1.0334, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00046697857842612514, |
|
"loss": 1.0858, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00046670911540869534, |
|
"loss": 1.0473, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_bleu": 0.024852517002557785, |
|
"eval_loss": 1.051284909248352, |
|
"eval_meteor": 0.062184071090472644, |
|
"eval_rouge1": 0.21454487375054443, |
|
"eval_rouge2": 0.07246544163519567, |
|
"eval_rougeL": 0.18065289430837428, |
|
"eval_rougeLsum": 0.181002308977942, |
|
"eval_runtime": 856.5121, |
|
"eval_samples_per_second": 1.702, |
|
"eval_steps_per_second": 0.214, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004664386358119034, |
|
"loss": 0.9952, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00046616714090456533, |
|
"loss": 1.0792, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00046589463196025974, |
|
"loss": 1.0859, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00046562111025732225, |
|
"loss": 1.0132, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0004653465770788392, |
|
"loss": 1.0836, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004650710337126419, |
|
"loss": 1.0403, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004647944814513003, |
|
"loss": 1.0854, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.000464516921592117, |
|
"loss": 1.048, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004642383554371216, |
|
"loss": 1.1175, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004639587842930636, |
|
"loss": 1.0709, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_bleu": 0.028186088600621776, |
|
"eval_loss": 1.0453029870986938, |
|
"eval_meteor": 0.07052568569699236, |
|
"eval_rouge1": 0.21831314585047235, |
|
"eval_rouge2": 0.07380432726916894, |
|
"eval_rougeL": 0.18131202674580268, |
|
"eval_rougeLsum": 0.1815820450711532, |
|
"eval_runtime": 832.8159, |
|
"eval_samples_per_second": 1.751, |
|
"eval_steps_per_second": 0.22, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0004636782094714075, |
|
"loss": 1.0227, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0004633966322883256, |
|
"loss": 1.0453, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0004631140540646927, |
|
"loss": 1.0547, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00046283047612607895, |
|
"loss": 1.1098, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0004625458998027445, |
|
"loss": 0.978, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00046226032642963276, |
|
"loss": 0.9954, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0004619737573463645, |
|
"loss": 1.0457, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.000461686193897231, |
|
"loss": 1.0613, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0004613976374311884, |
|
"loss": 1.0245, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0004611080893018509, |
|
"loss": 1.0285, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_bleu": 0.029932193757661832, |
|
"eval_loss": 1.039459466934204, |
|
"eval_meteor": 0.07824173395990583, |
|
"eval_rouge1": 0.22874163066451977, |
|
"eval_rouge2": 0.07857632543797217, |
|
"eval_rougeL": 0.18880908522870882, |
|
"eval_rougeLsum": 0.1890899008389733, |
|
"eval_runtime": 857.0196, |
|
"eval_samples_per_second": 1.701, |
|
"eval_steps_per_second": 0.214, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0004608175508674847, |
|
"loss": 0.9318, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00046052602349100136, |
|
"loss": 1.0465, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00046023350853995174, |
|
"loss": 0.9737, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0004599400073865192, |
|
"loss": 1.0431, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0004596455214075135, |
|
"loss": 1.0382, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0004593500519843642, |
|
"loss": 1.0855, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00045905360050311393, |
|
"loss": 1.0788, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00045875616835441266, |
|
"loss": 0.955, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0004584577569335101, |
|
"loss": 0.9988, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00045815836764025007, |
|
"loss": 1.0472, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_bleu": 0.02986106932510522, |
|
"eval_loss": 1.0356390476226807, |
|
"eval_meteor": 0.07730963136212383, |
|
"eval_rouge1": 0.2338534992658089, |
|
"eval_rouge2": 0.0838961375809737, |
|
"eval_rougeL": 0.19496007003043636, |
|
"eval_rougeLsum": 0.19521593536741283, |
|
"eval_runtime": 776.9039, |
|
"eval_samples_per_second": 1.877, |
|
"eval_steps_per_second": 0.236, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00045785800187906353, |
|
"loss": 0.9511, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0004575566610589618, |
|
"loss": 1.0426, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0004572543465935305, |
|
"loss": 1.0583, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0004569510599009223, |
|
"loss": 0.9801, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00045664680240385105, |
|
"loss": 1.05, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.000456341575529584, |
|
"loss": 0.9823, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00045603538070993616, |
|
"loss": 0.9807, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00045572821938126307, |
|
"loss": 1.0122, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0004554200929844541, |
|
"loss": 0.9791, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0004551110029649259, |
|
"loss": 1.0085, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_bleu": 0.023249223602803164, |
|
"eval_loss": 1.0316581726074219, |
|
"eval_meteor": 0.06405091237508881, |
|
"eval_rouge1": 0.21994383476031285, |
|
"eval_rouge2": 0.08071486866342525, |
|
"eval_rougeL": 0.1884203659102755, |
|
"eval_rougeLsum": 0.18857817215258604, |
|
"eval_runtime": 733.4469, |
|
"eval_samples_per_second": 1.988, |
|
"eval_steps_per_second": 0.25, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0004548009507726152, |
|
"loss": 1.0169, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00045448993786197236, |
|
"loss": 1.0768, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00045417796569195457, |
|
"loss": 1.0437, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0004538650357260189, |
|
"loss": 1.0569, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00045355114943211524, |
|
"loss": 1.0492, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0004532363082826797, |
|
"loss": 0.9732, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00045292051375462765, |
|
"loss": 0.9821, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0004526037673293467, |
|
"loss": 0.9964, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00045228607049268985, |
|
"loss": 0.9645, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00045196742473496834, |
|
"loss": 1.0429, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_bleu": 0.02666619110845256, |
|
"eval_loss": 1.0260474681854248, |
|
"eval_meteor": 0.07279933533873019, |
|
"eval_rouge1": 0.22889959445417035, |
|
"eval_rouge2": 0.08251982627177057, |
|
"eval_rougeL": 0.1923967901789438, |
|
"eval_rougeLsum": 0.19277978747675056, |
|
"eval_runtime": 829.1517, |
|
"eval_samples_per_second": 1.758, |
|
"eval_steps_per_second": 0.221, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00045164783155094486, |
|
"loss": 1.0148, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0004513272924398265, |
|
"loss": 1.0331, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0004510058089052577, |
|
"loss": 0.9852, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00045068338245531303, |
|
"loss": 1.0747, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0004503600146024904, |
|
"loss": 0.9761, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00045003570686370385, |
|
"loss": 1.0009, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00044971046076027626, |
|
"loss": 0.9755, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00044938427781793244, |
|
"loss": 0.9712, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00044905715956679196, |
|
"loss": 1.0083, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00044872910754136186, |
|
"loss": 0.9769, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_bleu": 0.03629028019989286, |
|
"eval_loss": 1.0205575227737427, |
|
"eval_meteor": 0.08062802277727196, |
|
"eval_rouge1": 0.2373168942060636, |
|
"eval_rouge2": 0.08397456354626406, |
|
"eval_rougeL": 0.19367527611532537, |
|
"eval_rougeLsum": 0.19379246722013438, |
|
"eval_runtime": 842.9832, |
|
"eval_samples_per_second": 1.73, |
|
"eval_steps_per_second": 0.217, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00044840012328052945, |
|
"loss": 0.9834, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0004480702083275552, |
|
"loss": 1.0006, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0004477393642300655, |
|
"loss": 0.9908, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0004474075925400453, |
|
"loss": 0.981, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00044707489481383067, |
|
"loss": 1.0066, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00044674127261210207, |
|
"loss": 0.9922, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00044640672749987647, |
|
"loss": 0.9334, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0004460712610465001, |
|
"loss": 0.9876, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00044573487482564144, |
|
"loss": 1.0439, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0004453975704152833, |
|
"loss": 1.0442, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_bleu": 0.03442605522861199, |
|
"eval_loss": 1.0128087997436523, |
|
"eval_meteor": 0.07743263612450016, |
|
"eval_rouge1": 0.2350931975317044, |
|
"eval_rouge2": 0.08525222999697704, |
|
"eval_rougeL": 0.1939595347628182, |
|
"eval_rougeLsum": 0.19413776767042884, |
|
"eval_runtime": 870.6693, |
|
"eval_samples_per_second": 1.675, |
|
"eval_steps_per_second": 0.21, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.000445059349397716, |
|
"loss": 0.983, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0004447202133595293, |
|
"loss": 0.9959, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0004443801638916056, |
|
"loss": 0.9758, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0004440392025891122, |
|
"loss": 0.9418, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00044369733105149357, |
|
"loss": 0.9904, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00044335455088246424, |
|
"loss": 1.0094, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00044301086369000106, |
|
"loss": 1.0386, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0004426662710863357, |
|
"loss": 0.992, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0004423207746879473, |
|
"loss": 0.9713, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00044197437611555437, |
|
"loss": 1.037, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_bleu": 0.03691456932983121, |
|
"eval_loss": 1.0009561777114868, |
|
"eval_meteor": 0.08988756663330699, |
|
"eval_rouge1": 0.25002084506336486, |
|
"eval_rouge2": 0.09044063658897722, |
|
"eval_rougeL": 0.2018421807608705, |
|
"eval_rougeLsum": 0.20209381538309434, |
|
"eval_runtime": 818.8478, |
|
"eval_samples_per_second": 1.781, |
|
"eval_steps_per_second": 0.223, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00044162707699410775, |
|
"loss": 0.9397, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0004412788789527825, |
|
"loss": 0.9861, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00044092978362497096, |
|
"loss": 0.9345, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.000440579792648274, |
|
"loss": 1.0052, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0004402289076644944, |
|
"loss": 1.0374, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0004398771303196286, |
|
"loss": 0.954, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00043952446226385925, |
|
"loss": 1.0079, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0004391709051515468, |
|
"loss": 0.9765, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00043881646064122305, |
|
"loss": 0.9257, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00043846113039558177, |
|
"loss": 0.9114, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_bleu": 0.040516146242377336, |
|
"eval_loss": 0.9958982467651367, |
|
"eval_meteor": 0.09468638323821374, |
|
"eval_rouge1": 0.255635747075612, |
|
"eval_rouge2": 0.09469177905086457, |
|
"eval_rougeL": 0.2072211092387325, |
|
"eval_rougeLsum": 0.2074042922359028, |
|
"eval_runtime": 825.6959, |
|
"eval_samples_per_second": 1.766, |
|
"eval_steps_per_second": 0.222, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00043810491608147227, |
|
"loss": 0.9774, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0004377478193698907, |
|
"loss": 0.9689, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00043738984193597276, |
|
"loss": 0.9986, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00043703098545898523, |
|
"loss": 0.9723, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00043667125162231903, |
|
"loss": 1.017, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00043631064211348015, |
|
"loss": 0.9324, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0004359491586240828, |
|
"loss": 0.9321, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0004355868028498409, |
|
"loss": 0.9721, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00043522357649056003, |
|
"loss": 0.9174, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00043485948125013, |
|
"loss": 0.9662, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_bleu": 0.03917855209684242, |
|
"eval_loss": 0.9939388632774353, |
|
"eval_meteor": 0.08631095169531011, |
|
"eval_rouge1": 0.2463928809766775, |
|
"eval_rouge2": 0.08942025430978294, |
|
"eval_rougeL": 0.1996908256859301, |
|
"eval_rougeLsum": 0.19984084186528772, |
|
"eval_runtime": 832.7322, |
|
"eval_samples_per_second": 1.751, |
|
"eval_steps_per_second": 0.22, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00043449451883651634, |
|
"loss": 0.9689, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0004341286909617525, |
|
"loss": 0.9806, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0004337619993419318, |
|
"loss": 0.9709, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00043339444569719945, |
|
"loss": 0.9203, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0004330260317517443, |
|
"loss": 1.0103, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0004326567592337908, |
|
"loss": 0.9626, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0004322866298755913, |
|
"loss": 0.9813, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00043191564541341705, |
|
"loss": 0.9426, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0004315438075875511, |
|
"loss": 0.9703, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0004311711181422791, |
|
"loss": 0.998, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_bleu": 0.037666738349845554, |
|
"eval_loss": 0.990182101726532, |
|
"eval_meteor": 0.08261657914656466, |
|
"eval_rouge1": 0.2508693209550128, |
|
"eval_rouge2": 0.09566110119917426, |
|
"eval_rougeL": 0.20568123700510338, |
|
"eval_rougeLsum": 0.2057868552975371, |
|
"eval_runtime": 816.1905, |
|
"eval_samples_per_second": 1.786, |
|
"eval_steps_per_second": 0.224, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00043079757882588215, |
|
"loss": 0.9236, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0004304231913906278, |
|
"loss": 0.9792, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00043004795759276205, |
|
"loss": 0.9571, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00042967187919250137, |
|
"loss": 0.9683, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0004292949579540242, |
|
"loss": 0.9377, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0004289171956454626, |
|
"loss": 0.9828, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00042853859403889415, |
|
"loss": 0.9348, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00042815915491033353, |
|
"loss": 0.9656, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00042777888003972444, |
|
"loss": 0.9346, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00042739777121093077, |
|
"loss": 0.9279, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_bleu": 0.04039399417410163, |
|
"eval_loss": 0.9835119843482971, |
|
"eval_meteor": 0.08993880260990368, |
|
"eval_rouge1": 0.25691024631017906, |
|
"eval_rouge2": 0.09541982278172086, |
|
"eval_rougeL": 0.207949219088836, |
|
"eval_rougeLsum": 0.2080447313466683, |
|
"eval_runtime": 839.821, |
|
"eval_samples_per_second": 1.736, |
|
"eval_steps_per_second": 0.218, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0004270158302117285, |
|
"loss": 1.0139, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0004266330588337975, |
|
"loss": 1.0436, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00042624945887271285, |
|
"loss": 0.9818, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00042586503212793637, |
|
"loss": 0.944, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00042547978040280855, |
|
"loss": 1.0086, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00042509370550453964, |
|
"loss": 0.9564, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00042470680924420155, |
|
"loss": 1.0664, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0004243190934367189, |
|
"loss": 0.9321, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00042393055990086134, |
|
"loss": 0.9694, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0004235412104592339, |
|
"loss": 1.0098, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_bleu": 0.04175446311716826, |
|
"eval_loss": 0.9746083617210388, |
|
"eval_meteor": 0.08938471353901929, |
|
"eval_rouge1": 0.25822182165065355, |
|
"eval_rouge2": 0.09705031440640308, |
|
"eval_rougeL": 0.20920490296974928, |
|
"eval_rougeLsum": 0.20932626058268705, |
|
"eval_runtime": 860.5139, |
|
"eval_samples_per_second": 1.694, |
|
"eval_steps_per_second": 0.213, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0004231510469382692, |
|
"loss": 0.9461, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0004227600711682189, |
|
"loss": 0.9936, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0004223682849831446, |
|
"loss": 0.9362, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00042197569022090983, |
|
"loss": 0.9686, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00042158228872317096, |
|
"loss": 0.9262, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00042118808233536863, |
|
"loss": 1.0092, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00042079307290671965, |
|
"loss": 1.0363, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0004203972622902075, |
|
"loss": 0.9938, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.000420000652342574, |
|
"loss": 0.93, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0004196032449243108, |
|
"loss": 0.947, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_bleu": 0.03800874092707606, |
|
"eval_loss": 0.9699831008911133, |
|
"eval_meteor": 0.08482502347393063, |
|
"eval_rouge1": 0.2535812413123155, |
|
"eval_rouge2": 0.09593530575746165, |
|
"eval_rougeL": 0.207273016975525, |
|
"eval_rougeLsum": 0.20757717610284762, |
|
"eval_runtime": 831.0613, |
|
"eval_samples_per_second": 1.754, |
|
"eval_steps_per_second": 0.22, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0004192050418996506, |
|
"loss": 0.9635, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0004188060451365577, |
|
"loss": 1.0412, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0004184062565067207, |
|
"loss": 0.9838, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.000418005677885542, |
|
"loss": 0.9796, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00041760431115213034, |
|
"loss": 0.9707, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0004172021581892912, |
|
"loss": 0.9708, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00041679922088351854, |
|
"loss": 0.9252, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0004163955011249854, |
|
"loss": 0.9302, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00041599100080753535, |
|
"loss": 0.9139, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0004155857218286737, |
|
"loss": 0.9232, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_bleu": 0.03754130437155104, |
|
"eval_loss": 0.9632985591888428, |
|
"eval_meteor": 0.08673610645668663, |
|
"eval_rouge1": 0.25916585870685055, |
|
"eval_rouge2": 0.1021139087019877, |
|
"eval_rougeL": 0.21436899473514376, |
|
"eval_rougeLsum": 0.2146562879957944, |
|
"eval_runtime": 774.0166, |
|
"eval_samples_per_second": 1.884, |
|
"eval_steps_per_second": 0.236, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00041517966608955824, |
|
"loss": 0.9557, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00041477283549499084, |
|
"loss": 0.9851, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0004143652319534078, |
|
"loss": 0.9261, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0004139568573768715, |
|
"loss": 0.9777, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0004135477136810614, |
|
"loss": 0.9884, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0004131378027852646, |
|
"loss": 0.9272, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00041272712661236734, |
|
"loss": 0.9311, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0004123156870888455, |
|
"loss": 0.9391, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00041190348614475626, |
|
"loss": 1.0161, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0004114905257137281, |
|
"loss": 0.939, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_bleu": 0.0399426078249315, |
|
"eval_loss": 0.9562557935714722, |
|
"eval_meteor": 0.09014998855285877, |
|
"eval_rouge1": 0.2596305778291721, |
|
"eval_rouge2": 0.1020098544856582, |
|
"eval_rougeL": 0.21308349550633152, |
|
"eval_rougeLsum": 0.213354783921806, |
|
"eval_runtime": 814.2864, |
|
"eval_samples_per_second": 1.791, |
|
"eval_steps_per_second": 0.225, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00041107680773295275, |
|
"loss": 0.9592, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0004106623341431752, |
|
"loss": 0.9449, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00041024710688868505, |
|
"loss": 0.9596, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0004098311279173076, |
|
"loss": 0.976, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0004094143991803941, |
|
"loss": 0.848, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00040899692263281326, |
|
"loss": 0.9545, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0004085787002329414, |
|
"loss": 0.9091, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00040815973394265405, |
|
"loss": 0.9073, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0004077400257273158, |
|
"loss": 0.9744, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00040731957755577215, |
|
"loss": 0.9558, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_bleu": 0.040793093274275646, |
|
"eval_loss": 0.9490484595298767, |
|
"eval_meteor": 0.09393521432339826, |
|
"eval_rouge1": 0.26429144396118703, |
|
"eval_rouge2": 0.10437971228746701, |
|
"eval_rougeL": 0.21630868232084993, |
|
"eval_rougeLsum": 0.21677530475045115, |
|
"eval_runtime": 781.9845, |
|
"eval_samples_per_second": 1.864, |
|
"eval_steps_per_second": 0.234, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0004068983914003394, |
|
"loss": 0.8849, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00040647646923679565, |
|
"loss": 0.8983, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0004060538130443718, |
|
"loss": 0.918, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00040563042480574216, |
|
"loss": 0.9325, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0004052063065070147, |
|
"loss": 0.907, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00040478146013772236, |
|
"loss": 0.9623, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00040435588769081345, |
|
"loss": 0.946, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00040392959116264214, |
|
"loss": 0.9568, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0004035025725529594, |
|
"loss": 0.985, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00040307483386490346, |
|
"loss": 0.9472, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_bleu": 0.040405130469543436, |
|
"eval_loss": 0.9509351253509521, |
|
"eval_meteor": 0.0926254239512932, |
|
"eval_rouge1": 0.26761105094976106, |
|
"eval_rouge2": 0.10613058587329147, |
|
"eval_rougeL": 0.21951079385576955, |
|
"eval_rougeLsum": 0.2197804879551447, |
|
"eval_runtime": 763.0077, |
|
"eval_samples_per_second": 1.911, |
|
"eval_steps_per_second": 0.24, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00040264637710499035, |
|
"loss": 0.9468, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0004022172042831046, |
|
"loss": 0.9512, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0004017873174124897, |
|
"loss": 0.9386, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00040135671850973897, |
|
"loss": 0.9178, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0004009254095947856, |
|
"loss": 0.9475, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00040049339269089356, |
|
"loss": 0.8977, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00040006066982464805, |
|
"loss": 0.9214, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0003996272430259458, |
|
"loss": 0.9259, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0003991931143279856, |
|
"loss": 0.9222, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0003987582857672592, |
|
"loss": 0.9096, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_bleu": 0.04373085706341943, |
|
"eval_loss": 0.9403254389762878, |
|
"eval_meteor": 0.09096688878637306, |
|
"eval_rouge1": 0.2641722640912822, |
|
"eval_rouge2": 0.10333180929216512, |
|
"eval_rougeL": 0.21372787445525787, |
|
"eval_rougeLsum": 0.21385789437947128, |
|
"eval_runtime": 837.8326, |
|
"eval_samples_per_second": 1.74, |
|
"eval_steps_per_second": 0.218, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00039832275938354097, |
|
"loss": 0.9179, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.000397886537219879, |
|
"loss": 0.9398, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0003974496213225853, |
|
"loss": 0.9742, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00039701201374122596, |
|
"loss": 0.969, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00039657371652861205, |
|
"loss": 0.9305, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00039613473174078946, |
|
"loss": 0.9011, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0003956950614370295, |
|
"loss": 0.9639, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0003952547076798193, |
|
"loss": 0.9541, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00039481367253485207, |
|
"loss": 0.9387, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00039437195807101735, |
|
"loss": 0.9277, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_bleu": 0.04682319611528157, |
|
"eval_loss": 0.9397039413452148, |
|
"eval_meteor": 0.09917297138388688, |
|
"eval_rouge1": 0.27335755782038107, |
|
"eval_rouge2": 0.10915838373398484, |
|
"eval_rougeL": 0.22215345431562905, |
|
"eval_rougeLsum": 0.22226258369021143, |
|
"eval_runtime": 820.5156, |
|
"eval_samples_per_second": 1.777, |
|
"eval_steps_per_second": 0.223, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0003939295663603914, |
|
"loss": 0.9122, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0003934864994782274, |
|
"loss": 0.9937, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00039304275950294564, |
|
"loss": 0.9018, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0003925983485161243, |
|
"loss": 0.9186, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0003921532686024887, |
|
"loss": 0.857, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0003917075218499023, |
|
"loss": 0.889, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0003912611103493571, |
|
"loss": 0.9663, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0003908140361949627, |
|
"loss": 0.9312, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0003903663014839377, |
|
"loss": 0.925, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0003899179083165993, |
|
"loss": 0.9464, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_bleu": 0.05108072723005548, |
|
"eval_loss": 0.933294415473938, |
|
"eval_meteor": 0.10758567050718289, |
|
"eval_rouge1": 0.282465635064933, |
|
"eval_rouge2": 0.11111279364849651, |
|
"eval_rougeL": 0.2232303864687606, |
|
"eval_rougeLsum": 0.22365514740751907, |
|
"eval_runtime": 883.8776, |
|
"eval_samples_per_second": 1.65, |
|
"eval_steps_per_second": 0.207, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0003894688587963533, |
|
"loss": 0.9013, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00038901915502968467, |
|
"loss": 0.8591, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00038856879912614736, |
|
"loss": 0.9377, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0003881177931983544, |
|
"loss": 0.8988, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00038766613936196825, |
|
"loss": 0.9236, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0003872138397356905, |
|
"loss": 0.9323, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0003867608964412523, |
|
"loss": 0.8952, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00038630731160340417, |
|
"loss": 0.9222, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.000385853087349906, |
|
"loss": 0.883, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0003853982258115174, |
|
"loss": 0.8943, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_bleu": 0.041746329616716864, |
|
"eval_loss": 0.9250670671463013, |
|
"eval_meteor": 0.08375738598141255, |
|
"eval_rouge1": 0.26274044688348097, |
|
"eval_rouge2": 0.10485444901711441, |
|
"eval_rougeL": 0.2147760721327726, |
|
"eval_rougeLsum": 0.2150094067970953, |
|
"eval_runtime": 811.6181, |
|
"eval_samples_per_second": 1.796, |
|
"eval_steps_per_second": 0.225, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0003849427291219872, |
|
"loss": 0.8882, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00038448659941804403, |
|
"loss": 0.9458, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0003840298388393857, |
|
"loss": 0.8782, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0003835724495286696, |
|
"loss": 0.9404, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0003831144336315024, |
|
"loss": 0.9028, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0003826557932964301, |
|
"loss": 0.9556, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0003821965306749281, |
|
"loss": 0.8736, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0003817366479213906, |
|
"loss": 0.8751, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0003812761471931212, |
|
"loss": 0.8832, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00038081503065032213, |
|
"loss": 0.882, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_bleu": 0.04871321082416372, |
|
"eval_loss": 0.922956645488739, |
|
"eval_meteor": 0.09926044026714874, |
|
"eval_rouge1": 0.2746656175957424, |
|
"eval_rouge2": 0.11079977213562642, |
|
"eval_rougeL": 0.22253952375791686, |
|
"eval_rougeLsum": 0.22279895141838174, |
|
"eval_runtime": 818.2053, |
|
"eval_samples_per_second": 1.782, |
|
"eval_steps_per_second": 0.224, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00038035330045608454, |
|
"loss": 0.8734, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00037989095877637814, |
|
"loss": 0.9618, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00037942800778004116, |
|
"loss": 0.9192, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00037896444963877004, |
|
"loss": 0.86, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00037850028652710953, |
|
"loss": 0.9198, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00037803552062244195, |
|
"loss": 0.9254, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0003775701541049777, |
|
"loss": 0.9358, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0003771041891577445, |
|
"loss": 0.7919, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.000376637627966577, |
|
"loss": 0.8291, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00037617047272010725, |
|
"loss": 0.8321, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_bleu": 0.040097866687896645, |
|
"eval_loss": 0.9228019714355469, |
|
"eval_meteor": 0.08519845357678862, |
|
"eval_rouge1": 0.2611703934049219, |
|
"eval_rouge2": 0.1009454428518515, |
|
"eval_rougeL": 0.2138461760754211, |
|
"eval_rougeLsum": 0.21398332835293457, |
|
"eval_runtime": 781.3955, |
|
"eval_samples_per_second": 1.866, |
|
"eval_steps_per_second": 0.234, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0003757027256097538, |
|
"loss": 0.8089, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0003752343888297117, |
|
"loss": 0.8146, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0003747654645769421, |
|
"loss": 0.7723, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0003742959550511619, |
|
"loss": 0.8641, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0003738258624548335, |
|
"loss": 0.7915, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00037335518899315475, |
|
"loss": 0.7803, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00037288393687404803, |
|
"loss": 0.8374, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0003724121083081502, |
|
"loss": 0.8311, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0003719397055088024, |
|
"loss": 0.8233, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0003714667306920395, |
|
"loss": 0.8074, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_bleu": 0.04805963835445032, |
|
"eval_loss": 0.9211423397064209, |
|
"eval_meteor": 0.09777681760130216, |
|
"eval_rouge1": 0.2768917967072316, |
|
"eval_rouge2": 0.11394482210626287, |
|
"eval_rougeL": 0.22495604729992338, |
|
"eval_rougeLsum": 0.22503747839487787, |
|
"eval_runtime": 784.172, |
|
"eval_samples_per_second": 1.859, |
|
"eval_steps_per_second": 0.233, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0003709931860765795, |
|
"loss": 0.8508, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0003705190738838134, |
|
"loss": 0.8153, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00037004439633779476, |
|
"loss": 0.8223, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0003695691556652291, |
|
"loss": 0.8114, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00036909335409546386, |
|
"loss": 0.8071, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00036861699386047704, |
|
"loss": 0.7978, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0003681400771948679, |
|
"loss": 0.8122, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00036766260633584567, |
|
"loss": 0.8093, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0003671845835232194, |
|
"loss": 0.824, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00036670601099938707, |
|
"loss": 0.7732, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_bleu": 0.044086073704045374, |
|
"eval_loss": 0.9112805128097534, |
|
"eval_meteor": 0.0936695239845227, |
|
"eval_rouge1": 0.27497552526095137, |
|
"eval_rouge2": 0.11292299072717882, |
|
"eval_rougeL": 0.22588206109693404, |
|
"eval_rougeLsum": 0.22611735269705646, |
|
"eval_runtime": 752.6781, |
|
"eval_samples_per_second": 1.937, |
|
"eval_steps_per_second": 0.243, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00036622689100932577, |
|
"loss": 0.7849, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00036574722580058036, |
|
"loss": 0.8359, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00036526701762325356, |
|
"loss": 0.7662, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00036478626872999504, |
|
"loss": 0.7632, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00036430498137599097, |
|
"loss": 0.8423, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0003638231578189535, |
|
"loss": 0.863, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00036334080031911006, |
|
"loss": 0.8095, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00036285791113919264, |
|
"loss": 0.8135, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00036237449254442755, |
|
"loss": 0.7884, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00036189054680252467, |
|
"loss": 0.7864, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_bleu": 0.055913854990088815, |
|
"eval_loss": 0.9118927121162415, |
|
"eval_meteor": 0.10661617177830124, |
|
"eval_rouge1": 0.2871808089992608, |
|
"eval_rouge2": 0.11373487230857647, |
|
"eval_rougeL": 0.2268683510809197, |
|
"eval_rougeLsum": 0.22704030291083557, |
|
"eval_runtime": 912.7549, |
|
"eval_samples_per_second": 1.597, |
|
"eval_steps_per_second": 0.2, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0003614060761836664, |
|
"loss": 0.8249, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0003609210829604974, |
|
"loss": 0.8138, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0003604355694081141, |
|
"loss": 0.8196, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0003599495378040536, |
|
"loss": 0.7976, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00035946299042828323, |
|
"loss": 0.8189, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0003589759295631897, |
|
"loss": 0.7715, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00035848835749356856, |
|
"loss": 0.8159, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0003580002765066135, |
|
"loss": 0.7899, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00035751168889190546, |
|
"loss": 0.823, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0003570225969414018, |
|
"loss": 0.7904, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_bleu": 0.05759842604507193, |
|
"eval_loss": 0.9100894331932068, |
|
"eval_meteor": 0.1172660554701684, |
|
"eval_rouge1": 0.2875811269116024, |
|
"eval_rouge2": 0.11072576759792657, |
|
"eval_rougeL": 0.2242016848170022, |
|
"eval_rougeLsum": 0.22429594061863245, |
|
"eval_runtime": 929.3497, |
|
"eval_samples_per_second": 1.569, |
|
"eval_steps_per_second": 0.197, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00035653300294942605, |
|
"loss": 0.7919, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0003560429092126565, |
|
"loss": 0.7784, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00035555231803011594, |
|
"loss": 0.8166, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0003550612317031606, |
|
"loss": 0.7883, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0003545696525354693, |
|
"loss": 0.8146, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00035407758283303303, |
|
"loss": 0.8136, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0003535850249041437, |
|
"loss": 0.8016, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00035309198105938346, |
|
"loss": 0.8078, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0003525984536116139, |
|
"loss": 0.8358, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00035210444487596533, |
|
"loss": 0.8462, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_bleu": 0.056010150629172695, |
|
"eval_loss": 0.9048720002174377, |
|
"eval_meteor": 0.10867523251749334, |
|
"eval_rouge1": 0.28790699224278277, |
|
"eval_rouge2": 0.11628794079218072, |
|
"eval_rougeL": 0.22900965402181062, |
|
"eval_rougeLsum": 0.22914187535928354, |
|
"eval_runtime": 860.5182, |
|
"eval_samples_per_second": 1.694, |
|
"eval_steps_per_second": 0.213, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0003516099571698256, |
|
"loss": 0.7873, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0003511149928128295, |
|
"loss": 0.8956, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00035061955412684774, |
|
"loss": 0.7699, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00035012364343597614, |
|
"loss": 0.8018, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00034962726306652465, |
|
"loss": 0.8135, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0003491304153470064, |
|
"loss": 0.793, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00034863310260812696, |
|
"loss": 0.8396, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00034813532718277345, |
|
"loss": 0.8262, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00034763709140600313, |
|
"loss": 0.8359, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00034713839761503284, |
|
"loss": 0.8369, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_bleu": 0.0475289078710305, |
|
"eval_loss": 0.9005081057548523, |
|
"eval_meteor": 0.09295389016728517, |
|
"eval_rouge1": 0.2780343125397822, |
|
"eval_rouge2": 0.11550609844475938, |
|
"eval_rougeL": 0.22754197009003835, |
|
"eval_rougeLsum": 0.22763889368973056, |
|
"eval_runtime": 793.9629, |
|
"eval_samples_per_second": 1.836, |
|
"eval_steps_per_second": 0.23, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0003466392481492282, |
|
"loss": 0.8595, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00034613964535009213, |
|
"loss": 0.7884, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0003456395915612542, |
|
"loss": 0.7953, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0003451390891284596, |
|
"loss": 0.8477, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.000344638140399558, |
|
"loss": 0.7739, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00034413674772449293, |
|
"loss": 0.8307, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00034363491345529014, |
|
"loss": 0.8185, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00034313263994604695, |
|
"loss": 0.8131, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00034262992955292124, |
|
"loss": 0.8229, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00034212678463412037, |
|
"loss": 0.821, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_bleu": 0.049286340296485695, |
|
"eval_loss": 0.8980807662010193, |
|
"eval_meteor": 0.09961983408691519, |
|
"eval_rouge1": 0.28085788746126766, |
|
"eval_rouge2": 0.11624101358181467, |
|
"eval_rougeL": 0.22898661556205108, |
|
"eval_rougeLsum": 0.2293538462104961, |
|
"eval_runtime": 821.8741, |
|
"eval_samples_per_second": 1.774, |
|
"eval_steps_per_second": 0.223, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00034162320754988997, |
|
"loss": 0.8036, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00034111920066250275, |
|
"loss": 0.8166, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.000340614766336248, |
|
"loss": 0.8006, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00034010990693741986, |
|
"loss": 0.8164, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00033960462483430666, |
|
"loss": 0.8075, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0003390989223971793, |
|
"loss": 0.8245, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0003385928019982808, |
|
"loss": 0.8209, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0003380862660118148, |
|
"loss": 0.8118, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00033757931681393426, |
|
"loss": 0.8142, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0003370719567827306, |
|
"loss": 0.8182, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_bleu": 0.059112211923701315, |
|
"eval_loss": 0.8948565125465393, |
|
"eval_meteor": 0.1103037236277642, |
|
"eval_rouge1": 0.28935404210250865, |
|
"eval_rouge2": 0.11885095184198584, |
|
"eval_rougeL": 0.23006075558788136, |
|
"eval_rougeLsum": 0.2303551113078559, |
|
"eval_runtime": 867.0969, |
|
"eval_samples_per_second": 1.681, |
|
"eval_steps_per_second": 0.211, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0003365641882982227, |
|
"loss": 0.7839, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0003360560137423449, |
|
"loss": 0.7496, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00033554743549893705, |
|
"loss": 0.8139, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00033503845595373236, |
|
"loss": 0.816, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00033452907749434647, |
|
"loss": 0.7852, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00033401930251026663, |
|
"loss": 0.8049, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00033350913339283997, |
|
"loss": 0.8128, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00033299857253526235, |
|
"loss": 0.8001, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0003324876223325676, |
|
"loss": 0.8059, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0003319762851816159, |
|
"loss": 0.8055, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_bleu": 0.04968172439819074, |
|
"eval_loss": 0.8897386789321899, |
|
"eval_meteor": 0.0959252377557064, |
|
"eval_rouge1": 0.2825817806759327, |
|
"eval_rouge2": 0.11929168591085915, |
|
"eval_rougeL": 0.23065093006329562, |
|
"eval_rougeLsum": 0.23073602187546002, |
|
"eval_runtime": 785.58, |
|
"eval_samples_per_second": 1.856, |
|
"eval_steps_per_second": 0.233, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0003314645634810824, |
|
"loss": 0.8317, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0003309524596314462, |
|
"loss": 0.7836, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0003304399760349793, |
|
"loss": 0.7454, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0003299271150957349, |
|
"loss": 0.8285, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00032941387921953623, |
|
"loss": 0.7797, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00032890027081396557, |
|
"loss": 0.7894, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00032838629228835244, |
|
"loss": 0.7738, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0003278719460537629, |
|
"loss": 0.8577, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0003273572345229878, |
|
"loss": 0.8366, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0003268421601105315, |
|
"loss": 0.8452, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_bleu": 0.06175833888335998, |
|
"eval_loss": 0.8857202529907227, |
|
"eval_meteor": 0.11564428561526571, |
|
"eval_rouge1": 0.2984211620545767, |
|
"eval_rouge2": 0.12812198093574928, |
|
"eval_rougeL": 0.24078682657552683, |
|
"eval_rougeLsum": 0.24090611398567952, |
|
"eval_runtime": 839.1781, |
|
"eval_samples_per_second": 1.737, |
|
"eval_steps_per_second": 0.218, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.0003263267252326007, |
|
"loss": 0.7974, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.000325810932307093, |
|
"loss": 0.8564, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00032529478375358585, |
|
"loss": 0.7945, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00032477828199332454, |
|
"loss": 0.8155, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00032426142944921144, |
|
"loss": 0.8114, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00032374422854579454, |
|
"loss": 0.8534, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00032322668170925586, |
|
"loss": 0.7823, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0003227087913674001, |
|
"loss": 0.8028, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0003221905599496436, |
|
"loss": 0.8138, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0003216719898870025, |
|
"loss": 0.7679, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_bleu": 0.054543737255601424, |
|
"eval_loss": 0.8796692490577698, |
|
"eval_meteor": 0.10758804247216243, |
|
"eval_rouge1": 0.2919587093662439, |
|
"eval_rouge2": 0.12430008946491809, |
|
"eval_rougeL": 0.23694154847486093, |
|
"eval_rougeLsum": 0.2369246442180512, |
|
"eval_runtime": 800.7242, |
|
"eval_samples_per_second": 1.821, |
|
"eval_steps_per_second": 0.229, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0003211530836120815, |
|
"loss": 0.7682, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0003206338435590625, |
|
"loss": 0.7981, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00032011427216369325, |
|
"loss": 0.7743, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0003195943718632757, |
|
"loss": 0.7704, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0003190741450966547, |
|
"loss": 0.7726, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0003185535943042066, |
|
"loss": 0.7453, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0003180327219278277, |
|
"loss": 0.7667, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0003175115304109229, |
|
"loss": 0.8188, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0003169900221983942, |
|
"loss": 0.803, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.000316468199736629, |
|
"loss": 0.8071, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_bleu": 0.05066810537796977, |
|
"eval_loss": 0.878441572189331, |
|
"eval_meteor": 0.10026059669640651, |
|
"eval_rouge1": 0.28287618301786566, |
|
"eval_rouge2": 0.12062284642324037, |
|
"eval_rougeL": 0.2303816201143833, |
|
"eval_rougeLsum": 0.23049450812237804, |
|
"eval_runtime": 801.9914, |
|
"eval_samples_per_second": 1.818, |
|
"eval_steps_per_second": 0.228, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00031594606547348926, |
|
"loss": 0.7935, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00031542362185829907, |
|
"loss": 0.7439, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00031490087134183415, |
|
"loss": 0.8043, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00031437781637630967, |
|
"loss": 0.7793, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00031385445941536894, |
|
"loss": 0.7977, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.000313330802914072, |
|
"loss": 0.787, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.000312806849328884, |
|
"loss": 0.8028, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0003122826011176638, |
|
"loss": 0.7628, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0003117580607396521, |
|
"loss": 0.741, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00031123323065546056, |
|
"loss": 0.7545, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_bleu": 0.055928194281175944, |
|
"eval_loss": 0.8721160292625427, |
|
"eval_meteor": 0.1077819837416298, |
|
"eval_rouge1": 0.2921975434696711, |
|
"eval_rouge2": 0.12386717356595339, |
|
"eval_rougeL": 0.2364155319093878, |
|
"eval_rougeLsum": 0.2367077751961113, |
|
"eval_runtime": 819.8166, |
|
"eval_samples_per_second": 1.778, |
|
"eval_steps_per_second": 0.223, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0003107081133270594, |
|
"loss": 0.859, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00031018271121776653, |
|
"loss": 0.7895, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00030965702679223577, |
|
"loss": 0.798, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0003091310625164453, |
|
"loss": 0.8406, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0003086048208576859, |
|
"loss": 0.7767, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00030807830428454964, |
|
"loss": 0.7862, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00030755151526691836, |
|
"loss": 0.8394, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00030702445627595184, |
|
"loss": 0.777, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0003064971297840762, |
|
"loss": 0.8101, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00030596953826497237, |
|
"loss": 0.7956, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_bleu": 0.05328102304540896, |
|
"eval_loss": 0.8663896322250366, |
|
"eval_meteor": 0.10099821769452577, |
|
"eval_rouge1": 0.2878957845786232, |
|
"eval_rouge2": 0.1271121648147, |
|
"eval_rougeL": 0.237491222566861, |
|
"eval_rougeLsum": 0.2377537503458192, |
|
"eval_runtime": 773.4463, |
|
"eval_samples_per_second": 1.885, |
|
"eval_steps_per_second": 0.237, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00030544168419356475, |
|
"loss": 0.7576, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00030491357004600934, |
|
"loss": 0.7277, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0003043851982996818, |
|
"loss": 0.8212, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00030385657143316665, |
|
"loss": 0.8184, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0003033276919262448, |
|
"loss": 0.8254, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0003027985622598826, |
|
"loss": 0.7759, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00030226918491621974, |
|
"loss": 0.7916, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00030173956237855766, |
|
"loss": 0.7878, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00030120969713134813, |
|
"loss": 0.7516, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0003006795916601816, |
|
"loss": 0.7703, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_bleu": 0.060527438648474874, |
|
"eval_loss": 0.8641963601112366, |
|
"eval_meteor": 0.11261367173892511, |
|
"eval_rouge1": 0.2960454761566517, |
|
"eval_rouge2": 0.12766022643473074, |
|
"eval_rougeL": 0.2372859654945768, |
|
"eval_rougeLsum": 0.23761311362862314, |
|
"eval_runtime": 805.5572, |
|
"eval_samples_per_second": 1.81, |
|
"eval_steps_per_second": 0.227, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0003001492484517751, |
|
"loss": 0.7887, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00029961866999396106, |
|
"loss": 0.7855, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00029908785877567554, |
|
"loss": 0.8741, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00029855681728694624, |
|
"loss": 0.7555, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0002980255480188813, |
|
"loss": 0.813, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00029749405346365715, |
|
"loss": 0.8078, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0002969623361145072, |
|
"loss": 0.7408, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00029643039846571006, |
|
"loss": 0.8094, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0002958982430125775, |
|
"loss": 0.8098, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0002953658722514431, |
|
"loss": 0.8057, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_bleu": 0.05442875886057264, |
|
"eval_loss": 0.8599924445152283, |
|
"eval_meteor": 0.10552484854688432, |
|
"eval_rouge1": 0.29102503559264836, |
|
"eval_rouge2": 0.12469741951657863, |
|
"eval_rougeL": 0.2361473787097038, |
|
"eval_rougeLsum": 0.23642929448072816, |
|
"eval_runtime": 813.0328, |
|
"eval_samples_per_second": 1.793, |
|
"eval_steps_per_second": 0.225, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00029483328867965065, |
|
"loss": 0.7872, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00029430049479554194, |
|
"loss": 0.7796, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0002937674930984455, |
|
"loss": 0.7743, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00029323428608866466, |
|
"loss": 0.8052, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0002927008762674659, |
|
"loss": 0.7816, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.000292167266137067, |
|
"loss": 0.8015, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0002916334582006256, |
|
"loss": 0.7281, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.000291099454962227, |
|
"loss": 0.7924, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0002905652589268727, |
|
"loss": 0.7755, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0002900308726004688, |
|
"loss": 0.7824, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_bleu": 0.06392759320475111, |
|
"eval_loss": 0.8572558760643005, |
|
"eval_meteor": 0.11881169721018137, |
|
"eval_rouge1": 0.3005040733582013, |
|
"eval_rouge2": 0.12951596850730016, |
|
"eval_rougeL": 0.23936215390148619, |
|
"eval_rougeLsum": 0.23961363908034655, |
|
"eval_runtime": 831.0266, |
|
"eval_samples_per_second": 1.754, |
|
"eval_steps_per_second": 0.22, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0002894962984898138, |
|
"loss": 0.8091, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00028896153910258724, |
|
"loss": 0.7404, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.0002884265969473378, |
|
"loss": 0.808, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00028789147453347146, |
|
"loss": 0.7979, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00028735617437123984, |
|
"loss": 0.7568, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0002868206989717283, |
|
"loss": 0.7652, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00028628505084684426, |
|
"loss": 0.7986, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00028574923250930556, |
|
"loss": 0.7757, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00028521324647262815, |
|
"loss": 0.7658, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.000284677095251115, |
|
"loss": 0.7875, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_bleu": 0.05108778859345946, |
|
"eval_loss": 0.8546839356422424, |
|
"eval_meteor": 0.10092634095964405, |
|
"eval_rouge1": 0.28767312877409235, |
|
"eval_rouge2": 0.12595652221150935, |
|
"eval_rougeL": 0.2369031036018187, |
|
"eval_rougeLsum": 0.23733770818251593, |
|
"eval_runtime": 764.2294, |
|
"eval_samples_per_second": 1.908, |
|
"eval_steps_per_second": 0.239, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00028414078135984385, |
|
"loss": 0.757, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002836043073146554, |
|
"loss": 0.7539, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00028306767563214183, |
|
"loss": 0.7763, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0002825308888296346, |
|
"loss": 0.8003, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0002819939494251929, |
|
"loss": 0.7286, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0002814568599375919, |
|
"loss": 0.7599, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00028091962288631075, |
|
"loss": 0.7894, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0002803822407915206, |
|
"loss": 0.7395, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0002798447161740732, |
|
"loss": 0.7465, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00027930705155548906, |
|
"loss": 0.7517, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_bleu": 0.0601774385170897, |
|
"eval_loss": 0.8497090339660645, |
|
"eval_meteor": 0.11022306403410814, |
|
"eval_rouge1": 0.3010936140000787, |
|
"eval_rouge2": 0.1366926965847685, |
|
"eval_rougeL": 0.24654397622034518, |
|
"eval_rougeLsum": 0.24674306964099543, |
|
"eval_runtime": 786.3862, |
|
"eval_samples_per_second": 1.854, |
|
"eval_steps_per_second": 0.233, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.000278769249457945, |
|
"loss": 0.8193, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00027823131240426304, |
|
"loss": 0.7361, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0002776932429178982, |
|
"loss": 0.7218, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00027715504352292687, |
|
"loss": 0.7519, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0002766167167440345, |
|
"loss": 0.7558, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0002760782651065044, |
|
"loss": 0.7513, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00027553969113620555, |
|
"loss": 0.8274, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0002750009973595808, |
|
"loss": 0.8634, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00027446218630363474, |
|
"loss": 0.7738, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0002739232604959225, |
|
"loss": 0.7668, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_bleu": 0.06568674364789237, |
|
"eval_loss": 0.8459737300872803, |
|
"eval_meteor": 0.1233007294370796, |
|
"eval_rouge1": 0.3079566863899688, |
|
"eval_rouge2": 0.13575207460463884, |
|
"eval_rougeL": 0.24647523647052938, |
|
"eval_rougeLsum": 0.24679921908579772, |
|
"eval_runtime": 856.424, |
|
"eval_samples_per_second": 1.702, |
|
"eval_steps_per_second": 0.214, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0002733842224645373, |
|
"loss": 0.7658, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0002728450747380989, |
|
"loss": 0.7407, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0002723058198457415, |
|
"loss": 0.7539, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00027176646031710204, |
|
"loss": 0.7514, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0002712269986823084, |
|
"loss": 0.8452, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00027068743747196754, |
|
"loss": 0.8191, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0002701477792171533, |
|
"loss": 0.7548, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00026960802644939466, |
|
"loss": 0.7621, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00026906818170066425, |
|
"loss": 0.7934, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0002685282475033663, |
|
"loss": 0.7624, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_bleu": 0.055168737670307534, |
|
"eval_loss": 0.8426267504692078, |
|
"eval_meteor": 0.10550167652095581, |
|
"eval_rouge1": 0.29606869778442146, |
|
"eval_rouge2": 0.1343951413693717, |
|
"eval_rougeL": 0.24232327940360698, |
|
"eval_rougeLsum": 0.24250078759870264, |
|
"eval_runtime": 741.2455, |
|
"eval_samples_per_second": 1.967, |
|
"eval_steps_per_second": 0.247, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00026798822639032406, |
|
"loss": 0.7741, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00026744812089476896, |
|
"loss": 0.788, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0002669079335503283, |
|
"loss": 0.7681, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00026636766689101305, |
|
"loss": 0.7905, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0002658273234512064, |
|
"loss": 0.8, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0002652869057656517, |
|
"loss": 0.7575, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0002647464163694406, |
|
"loss": 0.744, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00026420585779800125, |
|
"loss": 0.7633, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00026366523258708604, |
|
"loss": 0.7565, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00026312454327276005, |
|
"loss": 0.7525, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_bleu": 0.062340843273236565, |
|
"eval_loss": 0.839045524597168, |
|
"eval_meteor": 0.11806962218120415, |
|
"eval_rouge1": 0.3067770262356754, |
|
"eval_rouge2": 0.13539633125965317, |
|
"eval_rougeL": 0.24638166743744208, |
|
"eval_rougeLsum": 0.24638859347807696, |
|
"eval_runtime": 824.3541, |
|
"eval_samples_per_second": 1.769, |
|
"eval_steps_per_second": 0.222, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00026258379239138926, |
|
"loss": 0.7502, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0002620429824796283, |
|
"loss": 0.7504, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0002615021160744086, |
|
"loss": 0.7774, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0002609611957129269, |
|
"loss": 0.7769, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00026042022393263283, |
|
"loss": 0.7089, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00025987920327121726, |
|
"loss": 0.7315, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0002593381362666004, |
|
"loss": 0.7915, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0002587970254569198, |
|
"loss": 0.747, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0002582558733805186, |
|
"loss": 0.767, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0002577146825759333, |
|
"loss": 0.824, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_bleu": 0.07021984841301115, |
|
"eval_loss": 0.8348736763000488, |
|
"eval_meteor": 0.12590184841226557, |
|
"eval_rouge1": 0.31316689079775006, |
|
"eval_rouge2": 0.1378270756947887, |
|
"eval_rougeL": 0.2496795974505986, |
|
"eval_rougeLsum": 0.24984224026373703, |
|
"eval_runtime": 843.8417, |
|
"eval_samples_per_second": 1.728, |
|
"eval_steps_per_second": 0.217, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0002571734555818824, |
|
"loss": 0.7773, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0002566321949372539, |
|
"loss": 0.7559, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0002560909031810939, |
|
"loss": 0.7429, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.000255549582852594, |
|
"loss": 0.7808, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00025500823649108034, |
|
"loss": 0.7261, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0002544668666360008, |
|
"loss": 0.7736, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00025392547582691385, |
|
"loss": 0.8067, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00025338406660347593, |
|
"loss": 0.7485, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0002528426415054299, |
|
"loss": 0.7992, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0002523012030725932, |
|
"loss": 0.7764, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_bleu": 0.07546374872793683, |
|
"eval_loss": 0.8316299319267273, |
|
"eval_meteor": 0.1314441956272719, |
|
"eval_rouge1": 0.31233109484334876, |
|
"eval_rouge2": 0.1367833279138738, |
|
"eval_rougeL": 0.2451846244024204, |
|
"eval_rougeLsum": 0.24546922923659403, |
|
"eval_runtime": 914.8761, |
|
"eval_samples_per_second": 1.594, |
|
"eval_steps_per_second": 0.2, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00025175975384484587, |
|
"loss": 0.7807, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0002512182963621183, |
|
"loss": 0.8244, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00025067683316437996, |
|
"loss": 0.7786, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.000250135366791627, |
|
"loss": 0.732, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0002495938997838702, |
|
"loss": 0.7757, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00024905243468112387, |
|
"loss": 0.7643, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0002485109740233927, |
|
"loss": 0.7998, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0002479695203506613, |
|
"loss": 0.7835, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0002474280762028808, |
|
"loss": 0.7401, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0002468866441199581, |
|
"loss": 0.7701, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_bleu": 0.07057443909304043, |
|
"eval_loss": 0.8267539143562317, |
|
"eval_meteor": 0.11902596203459297, |
|
"eval_rouge1": 0.3085150378498419, |
|
"eval_rouge2": 0.13657943348061313, |
|
"eval_rougeL": 0.24749803270119058, |
|
"eval_rougeLsum": 0.24774236419143747, |
|
"eval_runtime": 886.3895, |
|
"eval_samples_per_second": 1.645, |
|
"eval_steps_per_second": 0.206, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0002463452266417433, |
|
"loss": 0.7728, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002458038263080181, |
|
"loss": 0.8172, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00024526244565848364, |
|
"loss": 0.737, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00024472108723274895, |
|
"loss": 0.7607, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002441797535703186, |
|
"loss": 0.7523, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00024363844721058084, |
|
"loss": 0.7373, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00024309717069279644, |
|
"loss": 0.7361, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00024255592655608567, |
|
"loss": 0.7995, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00024201471733941682, |
|
"loss": 0.726, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0002414735455815949, |
|
"loss": 0.7703, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_bleu": 0.07698054120195008, |
|
"eval_loss": 0.8239230513572693, |
|
"eval_meteor": 0.13032822498369387, |
|
"eval_rouge1": 0.317148061863241, |
|
"eval_rouge2": 0.1393288744292123, |
|
"eval_rougeL": 0.24958709786205027, |
|
"eval_rougeLsum": 0.2497938351151056, |
|
"eval_runtime": 929.7249, |
|
"eval_samples_per_second": 1.568, |
|
"eval_steps_per_second": 0.197, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00024093241382124872, |
|
"loss": 0.7861, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00024039132459681965, |
|
"loss": 0.7833, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00023985028044654956, |
|
"loss": 0.7616, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00023930928390846878, |
|
"loss": 0.7294, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00023876833752038435, |
|
"loss": 0.766, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.000238227443819868, |
|
"loss": 0.7395, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0002376866053442444, |
|
"loss": 0.6734, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00023714582463057912, |
|
"loss": 0.7234, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00023660510421566664, |
|
"loss": 0.7693, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00023606444663601885, |
|
"loss": 0.7722, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_bleu": 0.06367408230519342, |
|
"eval_loss": 0.8238893747329712, |
|
"eval_meteor": 0.11446926281259544, |
|
"eval_rouge1": 0.30253947243762624, |
|
"eval_rouge2": 0.13361449367649733, |
|
"eval_rougeL": 0.24391072697854624, |
|
"eval_rougeLsum": 0.24413536682879666, |
|
"eval_runtime": 817.4943, |
|
"eval_samples_per_second": 1.783, |
|
"eval_steps_per_second": 0.224, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00023552385442785265, |
|
"loss": 0.7081, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00023498333012707826, |
|
"loss": 0.7194, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00023444287626928767, |
|
"loss": 0.7595, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0002339024953897421, |
|
"loss": 0.7518, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0002333621900233604, |
|
"loss": 0.7371, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.0002328219627047075, |
|
"loss": 0.7686, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00023228181596798202, |
|
"loss": 0.7819, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00023174175234700443, |
|
"loss": 0.6983, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00023120177437520559, |
|
"loss": 0.7916, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0002306618845856144, |
|
"loss": 0.7564, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_bleu": 0.06788107247637484, |
|
"eval_loss": 0.81615149974823, |
|
"eval_meteor": 0.12103882245540673, |
|
"eval_rouge1": 0.30985260417723115, |
|
"eval_rouge2": 0.14150097767015785, |
|
"eval_rougeL": 0.2509749736056771, |
|
"eval_rougeLsum": 0.2512924339869609, |
|
"eval_runtime": 830.803, |
|
"eval_samples_per_second": 1.755, |
|
"eval_steps_per_second": 0.22, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0002301220855108461, |
|
"loss": 0.7453, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00022958237968309054, |
|
"loss": 0.7378, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00022904276963409996, |
|
"loss": 0.7665, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0002285032578951773, |
|
"loss": 0.7595, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00022796384699716462, |
|
"loss": 0.7001, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002274245394704307, |
|
"loss": 0.7705, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002268853378448592, |
|
"loss": 0.7568, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002263462446498375, |
|
"loss": 0.7679, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.000225807262414244, |
|
"loss": 0.7385, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00022526839366643657, |
|
"loss": 0.7655, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_bleu": 0.07350621292572981, |
|
"eval_loss": 0.813992977142334, |
|
"eval_meteor": 0.12823671504096398, |
|
"eval_rouge1": 0.31414562156332837, |
|
"eval_rouge2": 0.14044409999271865, |
|
"eval_rougeL": 0.2503023214800615, |
|
"eval_rougeLsum": 0.25051506634153897, |
|
"eval_runtime": 853.2505, |
|
"eval_samples_per_second": 1.709, |
|
"eval_steps_per_second": 0.214, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00022472964093424088, |
|
"loss": 0.7357, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00022419100674493834, |
|
"loss": 0.7636, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00022365249362525408, |
|
"loss": 0.7219, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0002231141041013456, |
|
"loss": 0.752, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00022257584069879043, |
|
"loss": 0.7961, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0002220377059425743, |
|
"loss": 0.7812, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00022149970235708002, |
|
"loss": 0.6933, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00022096183246607447, |
|
"loss": 0.7401, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00022042409879269754, |
|
"loss": 0.8012, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00021988650385945045, |
|
"loss": 0.7048, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_bleu": 0.07086633459606108, |
|
"eval_loss": 0.8118289709091187, |
|
"eval_meteor": 0.12330330474512881, |
|
"eval_rouge1": 0.31267720187527753, |
|
"eval_rouge2": 0.14043126182758564, |
|
"eval_rougeL": 0.25134755247816853, |
|
"eval_rougeLsum": 0.25151023733695077, |
|
"eval_runtime": 854.5708, |
|
"eval_samples_per_second": 1.706, |
|
"eval_steps_per_second": 0.214, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.0002193490501881832, |
|
"loss": 0.76, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00021881174030008327, |
|
"loss": 0.7345, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00021827457671566377, |
|
"loss": 0.7611, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.0002177375619547513, |
|
"loss": 0.7541, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00021720069853647448, |
|
"loss": 0.7818, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00021666398897925181, |
|
"loss": 0.7341, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00021612743580078032, |
|
"loss": 0.7844, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0002155910415180232, |
|
"loss": 0.7218, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00021505480864719833, |
|
"loss": 0.7721, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00021451873970376652, |
|
"loss": 0.7697, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_bleu": 0.0718030649419069, |
|
"eval_loss": 0.8096761107444763, |
|
"eval_meteor": 0.12683164072142025, |
|
"eval_rouge1": 0.31540613698257497, |
|
"eval_rouge2": 0.14463980588140418, |
|
"eval_rougeL": 0.25528347800714857, |
|
"eval_rougeLsum": 0.2554846420759572, |
|
"eval_runtime": 839.5267, |
|
"eval_samples_per_second": 1.737, |
|
"eval_steps_per_second": 0.218, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00021398283720241954, |
|
"loss": 0.7887, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0002134471036570682, |
|
"loss": 0.7192, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00021291154158083105, |
|
"loss": 0.7905, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00021237615348602225, |
|
"loss": 0.7283, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0002118409418841393, |
|
"loss": 0.7428, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00021130590928585254, |
|
"loss": 0.7782, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00021077105820099215, |
|
"loss": 0.792, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00021023639113853683, |
|
"loss": 0.7308, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00020970191060660224, |
|
"loss": 0.8318, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00020916761911242884, |
|
"loss": 0.7378, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_bleu": 0.07122867455733606, |
|
"eval_loss": 0.8025975823402405, |
|
"eval_meteor": 0.12454999755173883, |
|
"eval_rouge1": 0.31188022635326357, |
|
"eval_rouge2": 0.14169719003484493, |
|
"eval_rougeL": 0.25154225885054093, |
|
"eval_rougeLsum": 0.25179235910230063, |
|
"eval_runtime": 853.9032, |
|
"eval_samples_per_second": 1.707, |
|
"eval_steps_per_second": 0.214, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00020863351916237035, |
|
"loss": 0.7252, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00020809961326188214, |
|
"loss": 0.745, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00020756590391550906, |
|
"loss": 0.7913, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00020703239362687394, |
|
"loss": 0.7257, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00020649908489866618, |
|
"loss": 0.7389, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00020596598023262915, |
|
"loss": 0.727, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00020543308212954924, |
|
"loss": 0.6832, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00020490039308924396, |
|
"loss": 0.6975, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00020436791561054992, |
|
"loss": 0.7122, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00020383565219131135, |
|
"loss": 0.7939, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_bleu": 0.07447497548622753, |
|
"eval_loss": 0.805306613445282, |
|
"eval_meteor": 0.12635021856271, |
|
"eval_rouge1": 0.3173315559154648, |
|
"eval_rouge2": 0.14349372947373074, |
|
"eval_rougeL": 0.25391833407729314, |
|
"eval_rougeLsum": 0.25413413249366223, |
|
"eval_runtime": 882.299, |
|
"eval_samples_per_second": 1.653, |
|
"eval_steps_per_second": 0.207, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00020330360532836832, |
|
"loss": 0.7241, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00020277177751754517, |
|
"loss": 0.7291, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00020224017125363837, |
|
"loss": 0.7349, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00020170878903040545, |
|
"loss": 0.7454, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0002011776333405528, |
|
"loss": 0.7769, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.000200646706675724, |
|
"loss": 0.7773, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00020011601152648882, |
|
"loss": 0.7845, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0001995855503823303, |
|
"loss": 0.7137, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00019905532573163403, |
|
"loss": 0.7573, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00019852534006167653, |
|
"loss": 0.7338, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_bleu": 0.0797120050349496, |
|
"eval_loss": 0.7981196641921997, |
|
"eval_meteor": 0.13308199196309556, |
|
"eval_rouge1": 0.3218638993683711, |
|
"eval_rouge2": 0.1470521731131963, |
|
"eval_rougeL": 0.2567287850308242, |
|
"eval_rougeLsum": 0.25698501119096195, |
|
"eval_runtime": 933.0075, |
|
"eval_samples_per_second": 1.563, |
|
"eval_steps_per_second": 0.196, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00019799559585861288, |
|
"loss": 0.7741, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0001974660956074655, |
|
"loss": 0.7501, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0001969368417921126, |
|
"loss": 0.6996, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00019640783689527626, |
|
"loss": 0.7216, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0001958790833985108, |
|
"loss": 0.7644, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00019535058378219135, |
|
"loss": 0.7121, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00019482234052550207, |
|
"loss": 0.7133, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00019429435610642453, |
|
"loss": 0.7203, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00019376663300172604, |
|
"loss": 0.7858, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0001932391736869482, |
|
"loss": 0.7745, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_bleu": 0.06749424480527735, |
|
"eval_loss": 0.7952741980552673, |
|
"eval_meteor": 0.11775873915588794, |
|
"eval_rouge1": 0.31241096019045156, |
|
"eval_rouge2": 0.14563533936287387, |
|
"eval_rougeL": 0.2535571033024633, |
|
"eval_rougeLsum": 0.25368001649766037, |
|
"eval_runtime": 790.6936, |
|
"eval_samples_per_second": 1.844, |
|
"eval_steps_per_second": 0.231, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00019271198063639514, |
|
"loss": 0.6772, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00019218505632312178, |
|
"loss": 0.7551, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00019165840321892276, |
|
"loss": 0.7524, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0001911320237943201, |
|
"loss": 0.7154, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00019060592051855209, |
|
"loss": 0.7407, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00019008009585956188, |
|
"loss": 0.6886, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0001895545522839853, |
|
"loss": 0.8154, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00018902929225713978, |
|
"loss": 0.6969, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0001885043182430126, |
|
"loss": 0.7091, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00018797963270424935, |
|
"loss": 0.7121, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_bleu": 0.08102165916855171, |
|
"eval_loss": 0.7926180958747864, |
|
"eval_meteor": 0.13749606266115308, |
|
"eval_rouge1": 0.3250026709620075, |
|
"eval_rouge2": 0.14713626139319874, |
|
"eval_rougeL": 0.25753084598773646, |
|
"eval_rougeLsum": 0.25769367336423854, |
|
"eval_runtime": 865.9868, |
|
"eval_samples_per_second": 1.684, |
|
"eval_steps_per_second": 0.211, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00018745523810214239, |
|
"loss": 0.71, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00018693113689661927, |
|
"loss": 0.7691, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00018640733154623134, |
|
"loss": 0.7113, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00018588382450814185, |
|
"loss": 0.7408, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00018536061823811505, |
|
"loss": 0.7774, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00018483771519050397, |
|
"loss": 0.7449, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00018431511781823916, |
|
"loss": 0.6867, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00018379282857281768, |
|
"loss": 0.7213, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00018327084990429082, |
|
"loss": 0.7532, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.000182749184261253, |
|
"loss": 0.7681, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_bleu": 0.08042295331793536, |
|
"eval_loss": 0.788446307182312, |
|
"eval_meteor": 0.13951383501554995, |
|
"eval_rouge1": 0.3287291422762437, |
|
"eval_rouge2": 0.1501384483932246, |
|
"eval_rougeL": 0.2604585825998745, |
|
"eval_rougeLsum": 0.2608096246567302, |
|
"eval_runtime": 882.1557, |
|
"eval_samples_per_second": 1.653, |
|
"eval_steps_per_second": 0.207, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00018222783409083037, |
|
"loss": 0.7147, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00018170680183866912, |
|
"loss": 0.6799, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.000181186089948924, |
|
"loss": 0.7336, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00018066570086424716, |
|
"loss": 0.7402, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.0001801456370257763, |
|
"loss": 0.7423, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.0001796259008731234, |
|
"loss": 0.7252, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00017910649484436349, |
|
"loss": 0.7666, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00017858742137602263, |
|
"loss": 0.7378, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00017806868290306705, |
|
"loss": 0.7522, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0001775502818588917, |
|
"loss": 0.7305, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_bleu": 0.07804930399724541, |
|
"eval_loss": 0.7845081090927124, |
|
"eval_meteor": 0.12981944353457556, |
|
"eval_rouge1": 0.31977795247985463, |
|
"eval_rouge2": 0.14862122230147748, |
|
"eval_rougeL": 0.257294098259071, |
|
"eval_rougeLsum": 0.2575039258120313, |
|
"eval_runtime": 860.1602, |
|
"eval_samples_per_second": 1.695, |
|
"eval_steps_per_second": 0.213, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00017703222067530838, |
|
"loss": 0.7159, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00017651450178253448, |
|
"loss": 0.7303, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00017599712760918223, |
|
"loss": 0.7004, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00017548010058224638, |
|
"loss": 0.7326, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00017496342312709324, |
|
"loss": 0.7073, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00017444709766744948, |
|
"loss": 0.7874, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0001739311266253904, |
|
"loss": 0.7154, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00017341551242132874, |
|
"loss": 0.6947, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0001729002574740034, |
|
"loss": 0.7658, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00017238536420046796, |
|
"loss": 0.7241, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_bleu": 0.07789543557871356, |
|
"eval_loss": 0.7799738645553589, |
|
"eval_meteor": 0.13081456975819492, |
|
"eval_rouge1": 0.32400315543804187, |
|
"eval_rouge2": 0.15097537177776, |
|
"eval_rougeL": 0.2616413327310113, |
|
"eval_rougeLsum": 0.2617294761544998, |
|
"eval_runtime": 848.0422, |
|
"eval_samples_per_second": 1.719, |
|
"eval_steps_per_second": 0.216, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00017187083501607944, |
|
"loss": 0.7677, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00017135667233448675, |
|
"loss": 0.7523, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00017084287856761996, |
|
"loss": 0.6877, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00017032945612567804, |
|
"loss": 0.7225, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0001698164074171183, |
|
"loss": 0.7191, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00016930373484864508, |
|
"loss": 0.7792, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00016879144082519804, |
|
"loss": 0.7002, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00016827952774994096, |
|
"loss": 0.6956, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00016776799802425092, |
|
"loss": 0.6983, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00016725685404770648, |
|
"loss": 0.7288, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_bleu": 0.07738994044012307, |
|
"eval_loss": 0.7785964608192444, |
|
"eval_meteor": 0.1288623449734404, |
|
"eval_rouge1": 0.3200471438296305, |
|
"eval_rouge2": 0.1512205647401233, |
|
"eval_rougeL": 0.25939742372931385, |
|
"eval_rougeLsum": 0.2595082473182987, |
|
"eval_runtime": 849.1307, |
|
"eval_samples_per_second": 1.717, |
|
"eval_steps_per_second": 0.216, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00016674609821807674, |
|
"loss": 0.7445, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00016623573293130997, |
|
"loss": 0.7073, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.0001657257605815225, |
|
"loss": 0.7019, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00016521618356098722, |
|
"loss": 0.7356, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00016470700426012274, |
|
"loss": 0.7484, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.0001641982250674819, |
|
"loss": 0.7221, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00016368984836974046, |
|
"loss": 0.7336, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00016318187655168647, |
|
"loss": 0.7145, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00016267431199620838, |
|
"loss": 0.7411, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0001621671570842841, |
|
"loss": 0.6602, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_bleu": 0.08154034067100566, |
|
"eval_loss": 0.7753680944442749, |
|
"eval_meteor": 0.13727691890379068, |
|
"eval_rouge1": 0.32744856365015873, |
|
"eval_rouge2": 0.15379576444407406, |
|
"eval_rougeL": 0.2631346202032693, |
|
"eval_rougeLsum": 0.2633981653212556, |
|
"eval_runtime": 850.2937, |
|
"eval_samples_per_second": 1.715, |
|
"eval_steps_per_second": 0.215, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0001616604141949703, |
|
"loss": 0.7086, |
|
"step": 9010 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0001611540857053905, |
|
"loss": 0.6951, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00016064817399072444, |
|
"loss": 0.7344, |
|
"step": 9030 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00016014268142419674, |
|
"loss": 0.7093, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00015963761037706583, |
|
"loss": 0.6708, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.0001591329632186127, |
|
"loss": 0.6905, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00015862874231613005, |
|
"loss": 0.7261, |
|
"step": 9070 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0001581249500349109, |
|
"loss": 0.7586, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00015762158873823746, |
|
"loss": 0.716, |
|
"step": 9090 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00015711866078737063, |
|
"loss": 0.7052, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_bleu": 0.08479542822767472, |
|
"eval_loss": 0.7742032408714294, |
|
"eval_meteor": 0.1404376008139251, |
|
"eval_rouge1": 0.333944577670829, |
|
"eval_rouge2": 0.15612515006119043, |
|
"eval_rougeL": 0.26610226692047334, |
|
"eval_rougeLsum": 0.2663932194338652, |
|
"eval_runtime": 845.8621, |
|
"eval_samples_per_second": 1.724, |
|
"eval_steps_per_second": 0.216, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00015661616854153822, |
|
"loss": 0.7269, |
|
"step": 9110 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00015611411435792393, |
|
"loss": 0.7756, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00015561250059165703, |
|
"loss": 0.7007, |
|
"step": 9130 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00015511132959580037, |
|
"loss": 0.6797, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00015461060372133994, |
|
"loss": 0.7614, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00015411032531717378, |
|
"loss": 0.7044, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0001536104967301006, |
|
"loss": 0.7114, |
|
"step": 9170 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00015311112030480918, |
|
"loss": 0.7087, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00015261219838386714, |
|
"loss": 0.7531, |
|
"step": 9190 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00015211373330771023, |
|
"loss": 0.678, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_bleu": 0.08695413590105541, |
|
"eval_loss": 0.7727900743484497, |
|
"eval_meteor": 0.14477618566565462, |
|
"eval_rouge1": 0.33589539947395375, |
|
"eval_rouge2": 0.15756852616667666, |
|
"eval_rougeL": 0.26808455505037576, |
|
"eval_rougeLsum": 0.26831771903262536, |
|
"eval_runtime": 888.3101, |
|
"eval_samples_per_second": 1.641, |
|
"eval_steps_per_second": 0.206, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00015161572741463094, |
|
"loss": 0.7038, |
|
"step": 9210 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00015111818304076763, |
|
"loss": 0.654, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0001506211025200941, |
|
"loss": 0.7551, |
|
"step": 9230 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0001501244881844078, |
|
"loss": 0.7142, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00014962834236331943, |
|
"loss": 0.6727, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00014913266738424214, |
|
"loss": 0.7451, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00014863746557238012, |
|
"loss": 0.7149, |
|
"step": 9270 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00014814273925071778, |
|
"loss": 0.7426, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00014764849074000945, |
|
"loss": 0.6932, |
|
"step": 9290 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.0001471547223587678, |
|
"loss": 0.7148, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_bleu": 0.08405093456725986, |
|
"eval_loss": 0.769240140914917, |
|
"eval_meteor": 0.13799072224223335, |
|
"eval_rouge1": 0.3289513225249592, |
|
"eval_rouge2": 0.1533487264129132, |
|
"eval_rougeL": 0.2631557822340731, |
|
"eval_rougeLsum": 0.26330399290771866, |
|
"eval_runtime": 898.0268, |
|
"eval_samples_per_second": 1.624, |
|
"eval_steps_per_second": 0.204, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00014666143642325316, |
|
"loss": 0.7087, |
|
"step": 9310 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00014616863524746288, |
|
"loss": 0.6975, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00014567632114312017, |
|
"loss": 0.715, |
|
"step": 9330 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00014518449641966346, |
|
"loss": 0.6941, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0001446931633842355, |
|
"loss": 0.7099, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00014420232434167265, |
|
"loss": 0.6913, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00014371198159449363, |
|
"loss": 0.6762, |
|
"step": 9370 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0001432221374428894, |
|
"loss": 0.6857, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0001427327941847119, |
|
"loss": 0.7254, |
|
"step": 9390 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0001422439541154632, |
|
"loss": 0.66, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_bleu": 0.08126520981780667, |
|
"eval_loss": 0.7649896740913391, |
|
"eval_meteor": 0.13394522974660503, |
|
"eval_rouge1": 0.32676943290951493, |
|
"eval_rouge2": 0.15365151539028427, |
|
"eval_rougeL": 0.26329337074340675, |
|
"eval_rougeLsum": 0.2635319064528655, |
|
"eval_runtime": 855.2654, |
|
"eval_samples_per_second": 1.705, |
|
"eval_steps_per_second": 0.214, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0001417556195282852, |
|
"loss": 0.7211, |
|
"step": 9410 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0001412677927139485, |
|
"loss": 0.7022, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00014078047596084153, |
|
"loss": 0.7302, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00014029367155496048, |
|
"loss": 0.6871, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00013980738177989772, |
|
"loss": 0.7628, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0001393216089168314, |
|
"loss": 0.702, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00013883635524451554, |
|
"loss": 0.7124, |
|
"step": 9470 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0001383516230392678, |
|
"loss": 0.6751, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00013786741457496018, |
|
"loss": 0.7329, |
|
"step": 9490 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00013738373212300776, |
|
"loss": 0.7458, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_bleu": 0.08398597438341937, |
|
"eval_loss": 0.7601596117019653, |
|
"eval_meteor": 0.13751031408558206, |
|
"eval_rouge1": 0.32718893678105365, |
|
"eval_rouge2": 0.15421218231356643, |
|
"eval_rougeL": 0.2623710305546715, |
|
"eval_rougeLsum": 0.26249565506158357, |
|
"eval_runtime": 897.9844, |
|
"eval_samples_per_second": 1.624, |
|
"eval_steps_per_second": 0.204, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0001369005779523578, |
|
"loss": 0.6931, |
|
"step": 9510 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00013641795432947968, |
|
"loss": 0.6898, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00013593586351835402, |
|
"loss": 0.7274, |
|
"step": 9530 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00013545430778046176, |
|
"loss": 0.7119, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00013497328937477404, |
|
"loss": 0.7305, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00013449281055774148, |
|
"loss": 0.6822, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00013401287358328308, |
|
"loss": 0.7183, |
|
"step": 9570 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00013353348070277654, |
|
"loss": 0.7271, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00013305463416504677, |
|
"loss": 0.7173, |
|
"step": 9590 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00013257633621635635, |
|
"loss": 0.698, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_bleu": 0.0817528900097528, |
|
"eval_loss": 0.7589043378829956, |
|
"eval_meteor": 0.13704695695493954, |
|
"eval_rouge1": 0.3281627687948676, |
|
"eval_rouge2": 0.15600391066953626, |
|
"eval_rougeL": 0.26503808697382725, |
|
"eval_rougeLsum": 0.2650383145678832, |
|
"eval_runtime": 855.0342, |
|
"eval_samples_per_second": 1.705, |
|
"eval_steps_per_second": 0.214, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00013209858910039396, |
|
"loss": 0.6694, |
|
"step": 9610 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00013162139505826427, |
|
"loss": 0.7398, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00013114475632847812, |
|
"loss": 0.693, |
|
"step": 9630 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0001306686751469407, |
|
"loss": 0.7365, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00013019315374694206, |
|
"loss": 0.7069, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0001297181943591465, |
|
"loss": 0.7177, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0001292437992115814, |
|
"loss": 0.6798, |
|
"step": 9670 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0001287699705296278, |
|
"loss": 0.694, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0001282967105360093, |
|
"loss": 0.6923, |
|
"step": 9690 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00012782402145078165, |
|
"loss": 0.7088, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_bleu": 0.08876181125840059, |
|
"eval_loss": 0.7568917274475098, |
|
"eval_meteor": 0.14372947816583434, |
|
"eval_rouge1": 0.33318061543843014, |
|
"eval_rouge2": 0.1591907361897143, |
|
"eval_rougeL": 0.26712978368697327, |
|
"eval_rougeLsum": 0.2672763669770769, |
|
"eval_runtime": 893.8551, |
|
"eval_samples_per_second": 1.631, |
|
"eval_steps_per_second": 0.205, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0001273519054913226, |
|
"loss": 0.7364, |
|
"step": 9710 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0001268803648723214, |
|
"loss": 0.7066, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00012640940180576852, |
|
"loss": 0.6871, |
|
"step": 9730 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0001259390185009447, |
|
"loss": 0.6165, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00012546921716441136, |
|
"loss": 0.5763, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.00012500000000000006, |
|
"loss": 0.549, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.00012453136920880154, |
|
"loss": 0.5756, |
|
"step": 9770 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.00012406332698915623, |
|
"loss": 0.6103, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.00012359587553664364, |
|
"loss": 0.5919, |
|
"step": 9790 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.00012312901704407165, |
|
"loss": 0.5845, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_bleu": 0.08752286174344838, |
|
"eval_loss": 0.773460328578949, |
|
"eval_meteor": 0.1421850867823396, |
|
"eval_rouge1": 0.3298986551193279, |
|
"eval_rouge2": 0.1547142091088999, |
|
"eval_rougeL": 0.26382376164807064, |
|
"eval_rougeLsum": 0.26396298947970476, |
|
"eval_runtime": 906.9245, |
|
"eval_samples_per_second": 1.608, |
|
"eval_steps_per_second": 0.202, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00012266275370146682, |
|
"loss": 0.6, |
|
"step": 9810 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00012219708769606398, |
|
"loss": 0.5771, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00012173202121229534, |
|
"loss": 0.57, |
|
"step": 9830 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00012126755643178158, |
|
"loss": 0.5917, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00012080369553332016, |
|
"loss": 0.6256, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00012034044069287567, |
|
"loss": 0.515, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00011987779408357039, |
|
"loss": 0.5654, |
|
"step": 9870 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.0001194157578756726, |
|
"loss": 0.5812, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.0001189543342365877, |
|
"loss": 0.5899, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00011849352533084742, |
|
"loss": 0.6001, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_bleu": 0.08335100172587591, |
|
"eval_loss": 0.7738049626350403, |
|
"eval_meteor": 0.137726241326334, |
|
"eval_rouge1": 0.32609782139980853, |
|
"eval_rouge2": 0.15276224386507742, |
|
"eval_rougeL": 0.26190468442551107, |
|
"eval_rougeLsum": 0.26189079971457113, |
|
"eval_runtime": 871.7823, |
|
"eval_samples_per_second": 1.672, |
|
"eval_steps_per_second": 0.21, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0001180333333200996, |
|
"loss": 0.5459, |
|
"step": 9910 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0001175737603630985, |
|
"loss": 0.5765, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.00011711480861569432, |
|
"loss": 0.5662, |
|
"step": 9930 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.000116656480230823, |
|
"loss": 0.5976, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.00011619877735849654, |
|
"loss": 0.5819, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.00011574170214579263, |
|
"loss": 0.6135, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0001152852567368444, |
|
"loss": 0.5921, |
|
"step": 9970 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.00011482944327283092, |
|
"loss": 0.5475, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.00011437426389196634, |
|
"loss": 0.6202, |
|
"step": 9990 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00011391972072949103, |
|
"loss": 0.5409, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_bleu": 0.08613877560956296, |
|
"eval_loss": 0.7758316993713379, |
|
"eval_meteor": 0.1390858986492675, |
|
"eval_rouge1": 0.32920401616548073, |
|
"eval_rouge2": 0.1570104690313574, |
|
"eval_rougeL": 0.26574067707983184, |
|
"eval_rougeLsum": 0.2658030757914498, |
|
"eval_runtime": 858.8548, |
|
"eval_samples_per_second": 1.698, |
|
"eval_steps_per_second": 0.213, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00011346581591766023, |
|
"loss": 0.6027, |
|
"step": 10010 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00011301255158573479, |
|
"loss": 0.5798, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00011255992985997155, |
|
"loss": 0.6236, |
|
"step": 10030 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00011210795286361228, |
|
"loss": 0.5835, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00011165662271687472, |
|
"loss": 0.582, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00011120594153694219, |
|
"loss": 0.5577, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00011075591143795352, |
|
"loss": 0.5753, |
|
"step": 10070 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00011030653453099353, |
|
"loss": 0.5667, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.000109857812924083, |
|
"loss": 0.588, |
|
"step": 10090 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00010940974872216835, |
|
"loss": 0.5305, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_bleu": 0.08651590184183662, |
|
"eval_loss": 0.7703911066055298, |
|
"eval_meteor": 0.1395275989000706, |
|
"eval_rouge1": 0.3302969178304646, |
|
"eval_rouge2": 0.15676210346077937, |
|
"eval_rougeL": 0.2648231662080226, |
|
"eval_rougeLsum": 0.26503782477899707, |
|
"eval_runtime": 871.9589, |
|
"eval_samples_per_second": 1.672, |
|
"eval_steps_per_second": 0.21, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00010896234402711244, |
|
"loss": 0.5334, |
|
"step": 10110 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.0001085156009376844, |
|
"loss": 0.5625, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00010806952154954949, |
|
"loss": 0.5371, |
|
"step": 10130 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00010762410795525981, |
|
"loss": 0.5834, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00010717936224424422, |
|
"loss": 0.5657, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00010673528650279854, |
|
"loss": 0.5677, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00010629188281407556, |
|
"loss": 0.5904, |
|
"step": 10170 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.0001058491532580757, |
|
"loss": 0.6088, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00010540709991163711, |
|
"loss": 0.5933, |
|
"step": 10190 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00010496572484842554, |
|
"loss": 0.5893, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_bleu": 0.089530983333522, |
|
"eval_loss": 0.772607147693634, |
|
"eval_meteor": 0.1443407934532475, |
|
"eval_rouge1": 0.33192986354544063, |
|
"eval_rouge2": 0.15483055336733048, |
|
"eval_rougeL": 0.2644166774691801, |
|
"eval_rougeLsum": 0.2646752431573552, |
|
"eval_runtime": 908.5802, |
|
"eval_samples_per_second": 1.605, |
|
"eval_steps_per_second": 0.201, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00010452503013892515, |
|
"loss": 0.5314, |
|
"step": 10210 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00010408501785042864, |
|
"loss": 0.579, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00010364569004702709, |
|
"loss": 0.5467, |
|
"step": 10230 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00010320704878960132, |
|
"loss": 0.5593, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00010276909613581099, |
|
"loss": 0.5838, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00010233183414008557, |
|
"loss": 0.5822, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0001018952648536152, |
|
"loss": 0.5753, |
|
"step": 10270 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00010145939032433987, |
|
"loss": 0.5771, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00010102421259694087, |
|
"loss": 0.534, |
|
"step": 10290 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00010058973371283079, |
|
"loss": 0.5688, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_bleu": 0.08809811926330971, |
|
"eval_loss": 0.7703852653503418, |
|
"eval_meteor": 0.1416168131544854, |
|
"eval_rouge1": 0.3319761907382092, |
|
"eval_rouge2": 0.15820516943097546, |
|
"eval_rougeL": 0.2664765023191772, |
|
"eval_rougeLsum": 0.2666412367346098, |
|
"eval_runtime": 881.0156, |
|
"eval_samples_per_second": 1.655, |
|
"eval_steps_per_second": 0.208, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00010015595571014365, |
|
"loss": 0.5807, |
|
"step": 10310 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 9.972288062372603e-05, |
|
"loss": 0.5526, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 9.929051048512697e-05, |
|
"loss": 0.5775, |
|
"step": 10330 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.885884732258846e-05, |
|
"loss": 0.6064, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.842789316103634e-05, |
|
"loss": 0.5984, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.799765002207048e-05, |
|
"loss": 0.6041, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.756811992395512e-05, |
|
"loss": 0.5659, |
|
"step": 10370 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 9.713930488161e-05, |
|
"loss": 0.569, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.671120690660006e-05, |
|
"loss": 0.6157, |
|
"step": 10390 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.628382800712718e-05, |
|
"loss": 0.5697, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_bleu": 0.09149777598279565, |
|
"eval_loss": 0.7672737836837769, |
|
"eval_meteor": 0.146987179902779, |
|
"eval_rouge1": 0.3367525797385138, |
|
"eval_rouge2": 0.16056383093555285, |
|
"eval_rougeL": 0.2691661585054529, |
|
"eval_rougeLsum": 0.26928521308877285, |
|
"eval_runtime": 888.1416, |
|
"eval_samples_per_second": 1.642, |
|
"eval_steps_per_second": 0.206, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.585717018801943e-05, |
|
"loss": 0.5452, |
|
"step": 10410 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.543123545072241e-05, |
|
"loss": 0.5753, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 9.500602579329024e-05, |
|
"loss": 0.5478, |
|
"step": 10430 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.458154321037505e-05, |
|
"loss": 0.5644, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.415778969321867e-05, |
|
"loss": 0.5457, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.373476722964286e-05, |
|
"loss": 0.5663, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.331247780403979e-05, |
|
"loss": 0.5569, |
|
"step": 10470 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 9.289092339736321e-05, |
|
"loss": 0.5722, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.247010598711892e-05, |
|
"loss": 0.5509, |
|
"step": 10490 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.205002754735515e-05, |
|
"loss": 0.5809, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_bleu": 0.09340577680090072, |
|
"eval_loss": 0.7675700187683105, |
|
"eval_meteor": 0.14649790404153848, |
|
"eval_rouge1": 0.3364097748563989, |
|
"eval_rouge2": 0.15999938208570486, |
|
"eval_rougeL": 0.26875388155513424, |
|
"eval_rougeLsum": 0.26885507960626287, |
|
"eval_runtime": 898.7719, |
|
"eval_samples_per_second": 1.622, |
|
"eval_steps_per_second": 0.204, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.163069004865396e-05, |
|
"loss": 0.5497, |
|
"step": 10510 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.121209545812168e-05, |
|
"loss": 0.5645, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.079424573937936e-05, |
|
"loss": 0.5964, |
|
"step": 10530 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 9.03771428525541e-05, |
|
"loss": 0.5954, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.996078875426966e-05, |
|
"loss": 0.6002, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.954518539763718e-05, |
|
"loss": 0.5523, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.913033473224593e-05, |
|
"loss": 0.5929, |
|
"step": 10570 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 8.871623870415451e-05, |
|
"loss": 0.5437, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.830289925588159e-05, |
|
"loss": 0.5775, |
|
"step": 10590 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.789031832639638e-05, |
|
"loss": 0.6316, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_bleu": 0.091430753935327, |
|
"eval_loss": 0.7666054368019104, |
|
"eval_meteor": 0.1455915765123039, |
|
"eval_rouge1": 0.3320923748569906, |
|
"eval_rouge2": 0.15785984877990583, |
|
"eval_rougeL": 0.2654922382168149, |
|
"eval_rougeLsum": 0.2656547831229893, |
|
"eval_runtime": 902.9811, |
|
"eval_samples_per_second": 1.615, |
|
"eval_steps_per_second": 0.203, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.747849785111028e-05, |
|
"loss": 0.5735, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.706743976186729e-05, |
|
"loss": 0.5759, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 8.665714598693486e-05, |
|
"loss": 0.5884, |
|
"step": 10630 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.624761845099532e-05, |
|
"loss": 0.5787, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.583885907513658e-05, |
|
"loss": 0.5874, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.543086977684275e-05, |
|
"loss": 0.6063, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 8.502365246998616e-05, |
|
"loss": 0.5949, |
|
"step": 10670 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.461720906481718e-05, |
|
"loss": 0.5796, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.421154146795586e-05, |
|
"loss": 0.5487, |
|
"step": 10690 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.380665158238343e-05, |
|
"loss": 0.5758, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_bleu": 0.09265973555963605, |
|
"eval_loss": 0.7650392651557922, |
|
"eval_meteor": 0.14777758180443298, |
|
"eval_rouge1": 0.33567764511002285, |
|
"eval_rouge2": 0.1606127228435707, |
|
"eval_rougeL": 0.2686869642907861, |
|
"eval_rougeLsum": 0.2688085649915639, |
|
"eval_runtime": 879.9204, |
|
"eval_samples_per_second": 1.657, |
|
"eval_steps_per_second": 0.208, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.340254130743233e-05, |
|
"loss": 0.5971, |
|
"step": 10710 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.299921253877809e-05, |
|
"loss": 0.5621, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.259666716843037e-05, |
|
"loss": 0.5779, |
|
"step": 10730 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.219490708472354e-05, |
|
"loss": 0.5786, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.179393417230849e-05, |
|
"loss": 0.5931, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.13937503121435e-05, |
|
"loss": 0.6162, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 8.099435738148517e-05, |
|
"loss": 0.5635, |
|
"step": 10770 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 8.059575725388019e-05, |
|
"loss": 0.5994, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 8.019795179915584e-05, |
|
"loss": 0.5847, |
|
"step": 10790 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.980094288341211e-05, |
|
"loss": 0.58, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_bleu": 0.09242445407904702, |
|
"eval_loss": 0.762134850025177, |
|
"eval_meteor": 0.14684020303531567, |
|
"eval_rouge1": 0.3343030705608627, |
|
"eval_rouge2": 0.1606706329060451, |
|
"eval_rougeL": 0.26860717791389854, |
|
"eval_rougeLsum": 0.26885395331943707, |
|
"eval_runtime": 896.974, |
|
"eval_samples_per_second": 1.625, |
|
"eval_steps_per_second": 0.204, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.940473236901202e-05, |
|
"loss": 0.5634, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.900932211457326e-05, |
|
"loss": 0.566, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.861471397495998e-05, |
|
"loss": 0.5791, |
|
"step": 10830 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.822090980127312e-05, |
|
"loss": 0.5495, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.782791144084245e-05, |
|
"loss": 0.5444, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.74357207372178e-05, |
|
"loss": 0.5442, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 7.704433953015996e-05, |
|
"loss": 0.5643, |
|
"step": 10870 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.665376965563267e-05, |
|
"loss": 0.5733, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.626401294579369e-05, |
|
"loss": 0.5746, |
|
"step": 10890 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.587507122898601e-05, |
|
"loss": 0.5638, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_bleu": 0.089537290822849, |
|
"eval_loss": 0.7606632113456726, |
|
"eval_meteor": 0.1449897555235905, |
|
"eval_rouge1": 0.3332662651280562, |
|
"eval_rouge2": 0.1610878677164809, |
|
"eval_rougeL": 0.26858714023223096, |
|
"eval_rougeLsum": 0.26876594978594354, |
|
"eval_runtime": 868.091, |
|
"eval_samples_per_second": 1.68, |
|
"eval_steps_per_second": 0.211, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.54869463297298e-05, |
|
"loss": 0.5802, |
|
"step": 10910 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.509964006871342e-05, |
|
"loss": 0.5653, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.471315426278488e-05, |
|
"loss": 0.5997, |
|
"step": 10930 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.432749072494365e-05, |
|
"loss": 0.5469, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.394265126433186e-05, |
|
"loss": 0.5552, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.355863768622604e-05, |
|
"loss": 0.5612, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 7.317545179202814e-05, |
|
"loss": 0.6028, |
|
"step": 10970 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.279309537925791e-05, |
|
"loss": 0.5558, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.24115702415438e-05, |
|
"loss": 0.6165, |
|
"step": 10990 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.203087816861465e-05, |
|
"loss": 0.5924, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_bleu": 0.09254252833387837, |
|
"eval_loss": 0.7580552101135254, |
|
"eval_meteor": 0.1487179718286077, |
|
"eval_rouge1": 0.3357887007632099, |
|
"eval_rouge2": 0.1613849865003667, |
|
"eval_rougeL": 0.2690760558011852, |
|
"eval_rougeLsum": 0.2691567935964941, |
|
"eval_runtime": 882.2156, |
|
"eval_samples_per_second": 1.653, |
|
"eval_steps_per_second": 0.207, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 7.165102094629164e-05, |
|
"loss": 0.5886, |
|
"step": 11010 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.12720003564796e-05, |
|
"loss": 0.5581, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.089381817715859e-05, |
|
"loss": 0.5675, |
|
"step": 11030 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.051647618237584e-05, |
|
"loss": 0.5704, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.013997614223735e-05, |
|
"loss": 0.5618, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 6.976431982289918e-05, |
|
"loss": 0.578, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 6.93895089865601e-05, |
|
"loss": 0.5521, |
|
"step": 11070 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 6.901554539145219e-05, |
|
"loss": 0.5543, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 6.864243079183319e-05, |
|
"loss": 0.5385, |
|
"step": 11090 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 6.827016693797867e-05, |
|
"loss": 0.5691, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_bleu": 0.09567666824974702, |
|
"eval_loss": 0.7573705911636353, |
|
"eval_meteor": 0.15182971978916235, |
|
"eval_rouge1": 0.33875513059536566, |
|
"eval_rouge2": 0.16409429043711457, |
|
"eval_rougeL": 0.2726580072350722, |
|
"eval_rougeLsum": 0.27279754151676305, |
|
"eval_runtime": 886.0016, |
|
"eval_samples_per_second": 1.646, |
|
"eval_steps_per_second": 0.207, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 6.789875557617279e-05, |
|
"loss": 0.6145, |
|
"step": 11110 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 6.752819844870103e-05, |
|
"loss": 0.5475, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 6.715849729384165e-05, |
|
"loss": 0.5457, |
|
"step": 11130 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 6.678965384585734e-05, |
|
"loss": 0.5717, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 6.642166983498752e-05, |
|
"loss": 0.5603, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 6.605454698743999e-05, |
|
"loss": 0.5449, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 6.56882870253826e-05, |
|
"loss": 0.5601, |
|
"step": 11170 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 6.532289166693579e-05, |
|
"loss": 0.5671, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 6.495836262616378e-05, |
|
"loss": 0.6011, |
|
"step": 11190 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 6.459470161306722e-05, |
|
"loss": 0.573, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_bleu": 0.09669621525646314, |
|
"eval_loss": 0.754612147808075, |
|
"eval_meteor": 0.1522786494560258, |
|
"eval_rouge1": 0.34213440762917285, |
|
"eval_rouge2": 0.16585313954171707, |
|
"eval_rougeL": 0.2745908548352378, |
|
"eval_rougeLsum": 0.27468421725712566, |
|
"eval_runtime": 890.0677, |
|
"eval_samples_per_second": 1.638, |
|
"eval_steps_per_second": 0.206, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 6.423191033357473e-05, |
|
"loss": 0.5711, |
|
"step": 11210 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.386999048953481e-05, |
|
"loss": 0.5867, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.350894377870862e-05, |
|
"loss": 0.5595, |
|
"step": 11230 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.31487718947609e-05, |
|
"loss": 0.5815, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.278947652725267e-05, |
|
"loss": 0.585, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 6.24310593616337e-05, |
|
"loss": 0.5438, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.207352207923356e-05, |
|
"loss": 0.579, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.17168663572546e-05, |
|
"loss": 0.5713, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.136109386876377e-05, |
|
"loss": 0.581, |
|
"step": 11290 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.1006206282684576e-05, |
|
"loss": 0.565, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_bleu": 0.09693076606184378, |
|
"eval_loss": 0.752987265586853, |
|
"eval_meteor": 0.15195052351303506, |
|
"eval_rouge1": 0.3390728857820168, |
|
"eval_rouge2": 0.16408775910224543, |
|
"eval_rougeL": 0.2717820079608549, |
|
"eval_rougeLsum": 0.27183992843277444, |
|
"eval_runtime": 919.394, |
|
"eval_samples_per_second": 1.586, |
|
"eval_steps_per_second": 0.199, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.06522052637897e-05, |
|
"loss": 0.5841, |
|
"step": 11310 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 6.0299092472692914e-05, |
|
"loss": 0.5634, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 5.9946869565841165e-05, |
|
"loss": 0.5519, |
|
"step": 11330 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 5.9595538195507116e-05, |
|
"loss": 0.5649, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 5.92451000097812e-05, |
|
"loss": 0.5727, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 5.889555665256402e-05, |
|
"loss": 0.5409, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 5.8546909763558336e-05, |
|
"loss": 0.5704, |
|
"step": 11370 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 5.819916097826178e-05, |
|
"loss": 0.5407, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 5.7852311927958975e-05, |
|
"loss": 0.5766, |
|
"step": 11390 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 5.750636423971376e-05, |
|
"loss": 0.5701, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_bleu": 0.09451444544780298, |
|
"eval_loss": 0.7516891956329346, |
|
"eval_meteor": 0.1491007947413212, |
|
"eval_rouge1": 0.3374223084956188, |
|
"eval_rouge2": 0.1641744168149868, |
|
"eval_rougeL": 0.27153010563954705, |
|
"eval_rougeLsum": 0.2718139102707796, |
|
"eval_runtime": 876.804, |
|
"eval_samples_per_second": 1.663, |
|
"eval_steps_per_second": 0.209, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 5.716131953636183e-05, |
|
"loss": 0.5613, |
|
"step": 11410 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 5.681717943650305e-05, |
|
"loss": 0.5466, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 5.647394555449356e-05, |
|
"loss": 0.5975, |
|
"step": 11430 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 5.6131619500438654e-05, |
|
"loss": 0.5635, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 5.579020288018505e-05, |
|
"loss": 0.5606, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 5.544969729531302e-05, |
|
"loss": 0.586, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 5.511010434312963e-05, |
|
"loss": 0.5625, |
|
"step": 11470 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 5.477142561666046e-05, |
|
"loss": 0.6051, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 5.443366270464245e-05, |
|
"loss": 0.5405, |
|
"step": 11490 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 5.409681719151685e-05, |
|
"loss": 0.567, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_bleu": 0.09659311052432423, |
|
"eval_loss": 0.7519721984863281, |
|
"eval_meteor": 0.15300008106339708, |
|
"eval_rouge1": 0.3402818843893822, |
|
"eval_rouge2": 0.16345269648502525, |
|
"eval_rougeL": 0.2724127119961447, |
|
"eval_rougeLsum": 0.27274514869536814, |
|
"eval_runtime": 893.8056, |
|
"eval_samples_per_second": 1.631, |
|
"eval_steps_per_second": 0.205, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 5.376089065742093e-05, |
|
"loss": 0.5727, |
|
"step": 11510 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 5.342588467818132e-05, |
|
"loss": 0.5817, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 5.30918008253064e-05, |
|
"loss": 0.5795, |
|
"step": 11530 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 5.2758640665978546e-05, |
|
"loss": 0.5444, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 5.242640576304741e-05, |
|
"loss": 0.53, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 5.209509767502224e-05, |
|
"loss": 0.5508, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 5.1764717956064386e-05, |
|
"loss": 0.5678, |
|
"step": 11570 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 5.143526815598054e-05, |
|
"loss": 0.5702, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 5.110674982021482e-05, |
|
"loss": 0.5792, |
|
"step": 11590 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 5.077916448984218e-05, |
|
"loss": 0.5471, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_bleu": 0.09654326870647105, |
|
"eval_loss": 0.7493044137954712, |
|
"eval_meteor": 0.15195282986704975, |
|
"eval_rouge1": 0.33993616177484764, |
|
"eval_rouge2": 0.1655289963553561, |
|
"eval_rougeL": 0.27337080205371495, |
|
"eval_rougeLsum": 0.2735205543764996, |
|
"eval_runtime": 908.3763, |
|
"eval_samples_per_second": 1.605, |
|
"eval_steps_per_second": 0.201, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 5.045251370156076e-05, |
|
"loss": 0.5938, |
|
"step": 11610 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 5.012679898768452e-05, |
|
"loss": 0.5613, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.98020218761368e-05, |
|
"loss": 0.5452, |
|
"step": 11630 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.9478183890442285e-05, |
|
"loss": 0.5476, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.915528654972015e-05, |
|
"loss": 0.5542, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.883333136867751e-05, |
|
"loss": 0.5421, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.851231985760132e-05, |
|
"loss": 0.5665, |
|
"step": 11670 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.819225352235204e-05, |
|
"loss": 0.5335, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.787313386435632e-05, |
|
"loss": 0.5787, |
|
"step": 11690 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.755496238059981e-05, |
|
"loss": 0.5683, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_bleu": 0.09798471221336068, |
|
"eval_loss": 0.7474952936172485, |
|
"eval_meteor": 0.15420187655774772, |
|
"eval_rouge1": 0.34319670267039215, |
|
"eval_rouge2": 0.16729896354968785, |
|
"eval_rougeL": 0.27489475132848673, |
|
"eval_rougeLsum": 0.2751072116414318, |
|
"eval_runtime": 897.2313, |
|
"eval_samples_per_second": 1.625, |
|
"eval_steps_per_second": 0.204, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 4.723774056362046e-05, |
|
"loss": 0.5546, |
|
"step": 11710 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 4.692146990150131e-05, |
|
"loss": 0.5768, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 4.660615187786341e-05, |
|
"loss": 0.5757, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 4.629178797185915e-05, |
|
"loss": 0.5727, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 4.597837965816515e-05, |
|
"loss": 0.5355, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 4.5665928406975366e-05, |
|
"loss": 0.5899, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 4.535443568399403e-05, |
|
"loss": 0.5605, |
|
"step": 11770 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 4.5043902950429124e-05, |
|
"loss": 0.5798, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 4.473433166298532e-05, |
|
"loss": 0.5682, |
|
"step": 11790 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.442572327385694e-05, |
|
"loss": 0.5402, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_bleu": 0.09348541312931007, |
|
"eval_loss": 0.748202919960022, |
|
"eval_meteor": 0.14941673641095454, |
|
"eval_rouge1": 0.3404406614890879, |
|
"eval_rouge2": 0.1683617380475291, |
|
"eval_rougeL": 0.2749536079055395, |
|
"eval_rougeLsum": 0.2750200876510746, |
|
"eval_runtime": 849.8792, |
|
"eval_samples_per_second": 1.716, |
|
"eval_steps_per_second": 0.215, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.411807923072164e-05, |
|
"loss": 0.5465, |
|
"step": 11810 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.381140097673328e-05, |
|
"loss": 0.5457, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.350568995051501e-05, |
|
"loss": 0.5906, |
|
"step": 11830 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.3200947586152976e-05, |
|
"loss": 0.5147, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 4.289717531318932e-05, |
|
"loss": 0.5841, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 4.2594374556615215e-05, |
|
"loss": 0.5304, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 4.2292546736864916e-05, |
|
"loss": 0.536, |
|
"step": 11870 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 4.1991693269808367e-05, |
|
"loss": 0.5361, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 4.169181556674467e-05, |
|
"loss": 0.5729, |
|
"step": 11890 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.139291503439613e-05, |
|
"loss": 0.5649, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_bleu": 0.09786265490061229, |
|
"eval_loss": 0.745526909828186, |
|
"eval_meteor": 0.1526483330907014, |
|
"eval_rouge1": 0.34189580982015344, |
|
"eval_rouge2": 0.1680239361162218, |
|
"eval_rougeL": 0.2741906650155964, |
|
"eval_rougeLsum": 0.2742155466381009, |
|
"eval_runtime": 892.0994, |
|
"eval_samples_per_second": 1.634, |
|
"eval_steps_per_second": 0.205, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.109499307490066e-05, |
|
"loss": 0.5678, |
|
"step": 11910 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.0798051085806034e-05, |
|
"loss": 0.5635, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.050209046006287e-05, |
|
"loss": 0.558, |
|
"step": 11930 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.020711258601814e-05, |
|
"loss": 0.5449, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.991311884740892e-05, |
|
"loss": 0.561, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.962011062335563e-05, |
|
"loss": 0.5725, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.93280892883556e-05, |
|
"loss": 0.586, |
|
"step": 11970 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.903705621227685e-05, |
|
"loss": 0.5431, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.874701276035123e-05, |
|
"loss": 0.5659, |
|
"step": 11990 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.845796029316848e-05, |
|
"loss": 0.5316, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_bleu": 0.10100476957195267, |
|
"eval_loss": 0.7438293099403381, |
|
"eval_meteor": 0.15711395720502055, |
|
"eval_rouge1": 0.34583777175461966, |
|
"eval_rouge2": 0.1704569137198032, |
|
"eval_rougeL": 0.276850315093792, |
|
"eval_rougeLsum": 0.27691836225365285, |
|
"eval_runtime": 899.4257, |
|
"eval_samples_per_second": 1.621, |
|
"eval_steps_per_second": 0.203, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.8169900166669686e-05, |
|
"loss": 0.5601, |
|
"step": 12010 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.7882833732140605e-05, |
|
"loss": 0.5749, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.7596762336205986e-05, |
|
"loss": 0.5478, |
|
"step": 12030 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.7311687320822546e-05, |
|
"loss": 0.5943, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.7027610023273015e-05, |
|
"loss": 0.539, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.6744531776160134e-05, |
|
"loss": 0.5261, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.646245390739974e-05, |
|
"loss": 0.551, |
|
"step": 12070 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.6181377740215183e-05, |
|
"loss": 0.5747, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.590130459313076e-05, |
|
"loss": 0.5688, |
|
"step": 12090 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.562223577996551e-05, |
|
"loss": 0.5556, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_bleu": 0.09966539519817649, |
|
"eval_loss": 0.7424455285072327, |
|
"eval_meteor": 0.15665923943127544, |
|
"eval_rouge1": 0.3441533388692247, |
|
"eval_rouge2": 0.17048004262995126, |
|
"eval_rougeL": 0.277212962618987, |
|
"eval_rougeLsum": 0.27746090653359995, |
|
"eval_runtime": 886.9354, |
|
"eval_samples_per_second": 1.644, |
|
"eval_steps_per_second": 0.206, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.534417260982728e-05, |
|
"loss": 0.556, |
|
"step": 12110 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.50671163871065e-05, |
|
"loss": 0.5951, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.4791068411469814e-05, |
|
"loss": 0.553, |
|
"step": 12130 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.451602997785441e-05, |
|
"loss": 0.549, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.424200237646172e-05, |
|
"loss": 0.5672, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.396898689275113e-05, |
|
"loss": 0.5423, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.369698480743449e-05, |
|
"loss": 0.5503, |
|
"step": 12170 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.342599739646965e-05, |
|
"loss": 0.5217, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.315602593105477e-05, |
|
"loss": 0.5416, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.288707167762201e-05, |
|
"loss": 0.5814, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_bleu": 0.10041581826901816, |
|
"eval_loss": 0.7406966090202332, |
|
"eval_meteor": 0.15663880758960727, |
|
"eval_rouge1": 0.34468331121112705, |
|
"eval_rouge2": 0.17018303583605143, |
|
"eval_rougeL": 0.2770648241756336, |
|
"eval_rougeLsum": 0.27722355912734675, |
|
"eval_runtime": 897.432, |
|
"eval_samples_per_second": 1.625, |
|
"eval_steps_per_second": 0.204, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.261913589783197e-05, |
|
"loss": 0.5241, |
|
"step": 12210 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.235221984856762e-05, |
|
"loss": 0.5684, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.2086324781928244e-05, |
|
"loss": 0.5624, |
|
"step": 12230 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.1821451945223845e-05, |
|
"loss": 0.5689, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.1557602580969154e-05, |
|
"loss": 0.611, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.129477792687763e-05, |
|
"loss": 0.5697, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.103297921585613e-05, |
|
"loss": 0.5692, |
|
"step": 12270 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.0772207675998614e-05, |
|
"loss": 0.5185, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.0512464530580514e-05, |
|
"loss": 0.5331, |
|
"step": 12290 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.0253750998053414e-05, |
|
"loss": 0.5542, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_bleu": 0.0996036559150731, |
|
"eval_loss": 0.7407082319259644, |
|
"eval_meteor": 0.15756565444456452, |
|
"eval_rouge1": 0.34545981541047066, |
|
"eval_rouge2": 0.17206381974902546, |
|
"eval_rougeL": 0.2791030663611749, |
|
"eval_rougeLsum": 0.27911797898126445, |
|
"eval_runtime": 868.2424, |
|
"eval_samples_per_second": 1.679, |
|
"eval_steps_per_second": 0.211, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.999606829203866e-05, |
|
"loss": 0.5717, |
|
"step": 12310 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.9739417621322206e-05, |
|
"loss": 0.5697, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.9483800189848697e-05, |
|
"loss": 0.5611, |
|
"step": 12330 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.9229217196715756e-05, |
|
"loss": 0.5582, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.8975669836168618e-05, |
|
"loss": 0.5775, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.872315929759439e-05, |
|
"loss": 0.5511, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.8471686765516288e-05, |
|
"loss": 0.5508, |
|
"step": 12370 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.822125341958848e-05, |
|
"loss": 0.5126, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.7971860434590047e-05, |
|
"loss": 0.5488, |
|
"step": 12390 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.772350898042006e-05, |
|
"loss": 0.5395, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_bleu": 0.09793411558915392, |
|
"eval_loss": 0.7390555143356323, |
|
"eval_meteor": 0.15480160724591788, |
|
"eval_rouge1": 0.34472812673992614, |
|
"eval_rouge2": 0.17075180566044076, |
|
"eval_rougeL": 0.2783650491528822, |
|
"eval_rougeLsum": 0.27856745790022225, |
|
"eval_runtime": 860.021, |
|
"eval_samples_per_second": 1.695, |
|
"eval_steps_per_second": 0.213, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.747620022209163e-05, |
|
"loss": 0.5653, |
|
"step": 12410 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.722993531972645e-05, |
|
"loss": 0.5698, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.698471542854991e-05, |
|
"loss": 0.5259, |
|
"step": 12430 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.6740541698884847e-05, |
|
"loss": 0.5496, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.6497415276146685e-05, |
|
"loss": 0.5801, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.6255337300838205e-05, |
|
"loss": 0.5774, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.6014308908543532e-05, |
|
"loss": 0.5549, |
|
"step": 12470 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.577433122992348e-05, |
|
"loss": 0.5526, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.553540539070995e-05, |
|
"loss": 0.5966, |
|
"step": 12490 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.5297532511700556e-05, |
|
"loss": 0.601, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_bleu": 0.1005040573050979, |
|
"eval_loss": 0.7373713254928589, |
|
"eval_meteor": 0.158927257890583, |
|
"eval_rouge1": 0.3468610329219707, |
|
"eval_rouge2": 0.17190859590573837, |
|
"eval_rougeL": 0.27812088802684365, |
|
"eval_rougeLsum": 0.2783355070004713, |
|
"eval_runtime": 887.3087, |
|
"eval_samples_per_second": 1.643, |
|
"eval_steps_per_second": 0.206, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.5060713708753624e-05, |
|
"loss": 0.5463, |
|
"step": 12510 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.482495009278282e-05, |
|
"loss": 0.5753, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.4590242769751774e-05, |
|
"loss": 0.5606, |
|
"step": 12530 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.435659284066924e-05, |
|
"loss": 0.547, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.4124001401583773e-05, |
|
"loss": 0.5286, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.389246954357835e-05, |
|
"loss": 0.5883, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.3661998352765668e-05, |
|
"loss": 0.5619, |
|
"step": 12570 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.3432588910282826e-05, |
|
"loss": 0.5448, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.3204242292286237e-05, |
|
"loss": 0.564, |
|
"step": 12590 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.2976959569946554e-05, |
|
"loss": 0.5481, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_bleu": 0.10270766304979792, |
|
"eval_loss": 0.736431360244751, |
|
"eval_meteor": 0.16042158822558053, |
|
"eval_rouge1": 0.3494645746359814, |
|
"eval_rouge2": 0.17282415796017336, |
|
"eval_rougeL": 0.28063431426696894, |
|
"eval_rougeLsum": 0.2807153560050697, |
|
"eval_runtime": 892.7218, |
|
"eval_samples_per_second": 1.633, |
|
"eval_steps_per_second": 0.205, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.2750741809443808e-05, |
|
"loss": 0.5529, |
|
"step": 12610 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.252559007196231e-05, |
|
"loss": 0.5607, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.2301505413685613e-05, |
|
"loss": 0.5276, |
|
"step": 12630 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.2078488885791643e-05, |
|
"loss": 0.5236, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.1856541534447773e-05, |
|
"loss": 0.5516, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.1635664400805804e-05, |
|
"loss": 0.5362, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.1415858520997255e-05, |
|
"loss": 0.545, |
|
"step": 12670 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.1197124926128382e-05, |
|
"loss": 0.5386, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.0979464642275237e-05, |
|
"loss": 0.5631, |
|
"step": 12690 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.076287869047927e-05, |
|
"loss": 0.5651, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_bleu": 0.10040652618969621, |
|
"eval_loss": 0.7359732985496521, |
|
"eval_meteor": 0.1574342173339186, |
|
"eval_rouge1": 0.3461762106584311, |
|
"eval_rouge2": 0.17204462260550002, |
|
"eval_rougeL": 0.2788443761970661, |
|
"eval_rougeLsum": 0.27897930241934155, |
|
"eval_runtime": 893.4452, |
|
"eval_samples_per_second": 1.632, |
|
"eval_steps_per_second": 0.205, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.0547368086742013e-05, |
|
"loss": 0.5625, |
|
"step": 12710 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.0332933842020508e-05, |
|
"loss": 0.581, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.0119576962222913e-05, |
|
"loss": 0.5756, |
|
"step": 12730 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.9907298448203214e-05, |
|
"loss": 0.5158, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.9696099295756885e-05, |
|
"loss": 0.5386, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.9485980495616174e-05, |
|
"loss": 0.546, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.9276943033445292e-05, |
|
"loss": 0.5266, |
|
"step": 12770 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.9068987889836047e-05, |
|
"loss": 0.565, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.886211604030305e-05, |
|
"loss": 0.5768, |
|
"step": 12790 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.865632845527912e-05, |
|
"loss": 0.5712, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_bleu": 0.10092666745075086, |
|
"eval_loss": 0.7347350716590881, |
|
"eval_meteor": 0.1573389047028448, |
|
"eval_rouge1": 0.34609349040173526, |
|
"eval_rouge2": 0.17181357652793253, |
|
"eval_rougeL": 0.27823672744284345, |
|
"eval_rougeLsum": 0.27841579581359654, |
|
"eval_runtime": 894.5382, |
|
"eval_samples_per_second": 1.63, |
|
"eval_steps_per_second": 0.205, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.8451626100111003e-05, |
|
"loss": 0.5655, |
|
"step": 12810 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.824800993505432e-05, |
|
"loss": 0.5474, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.8045480915269807e-05, |
|
"loss": 0.5418, |
|
"step": 12830 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.7844039990818102e-05, |
|
"loss": 0.5254, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.764368810665562e-05, |
|
"loss": 0.553, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.7444426202630342e-05, |
|
"loss": 0.5325, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.7246255213476892e-05, |
|
"loss": 0.5119, |
|
"step": 12870 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.704917606881262e-05, |
|
"loss": 0.5029, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.685318969313296e-05, |
|
"loss": 0.5577, |
|
"step": 12890 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.6658297005807178e-05, |
|
"loss": 0.5657, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_bleu": 0.10163919434601944, |
|
"eval_loss": 0.7352385520935059, |
|
"eval_meteor": 0.15888338921782014, |
|
"eval_rouge1": 0.34757518993317543, |
|
"eval_rouge2": 0.1726758051895667, |
|
"eval_rougeL": 0.27939187861369463, |
|
"eval_rougeLsum": 0.2796240509482587, |
|
"eval_runtime": 889.6237, |
|
"eval_samples_per_second": 1.639, |
|
"eval_steps_per_second": 0.206, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.646449892107407e-05, |
|
"loss": 0.5326, |
|
"step": 12910 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.6271796348037742e-05, |
|
"loss": 0.5278, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.608019019066312e-05, |
|
"loss": 0.5141, |
|
"step": 12930 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.5889681347772022e-05, |
|
"loss": 0.5345, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.570027071303873e-05, |
|
"loss": 0.5349, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.5511959174985712e-05, |
|
"loss": 0.574, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.5324747616979772e-05, |
|
"loss": 0.5652, |
|
"step": 12970 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.5138636917227667e-05, |
|
"loss": 0.5522, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.495362794877203e-05, |
|
"loss": 0.5447, |
|
"step": 12990 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.4769721579487223e-05, |
|
"loss": 0.5539, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_bleu": 0.09980297055295018, |
|
"eval_loss": 0.7346807718276978, |
|
"eval_meteor": 0.15562732207277716, |
|
"eval_rouge1": 0.34657429697696573, |
|
"eval_rouge2": 0.17156388172356662, |
|
"eval_rougeL": 0.27927918876917873, |
|
"eval_rougeLsum": 0.27942559780588516, |
|
"eval_runtime": 884.9996, |
|
"eval_samples_per_second": 1.647, |
|
"eval_steps_per_second": 0.207, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.4586918672075427e-05, |
|
"loss": 0.5644, |
|
"step": 13010 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.4405220084062532e-05, |
|
"loss": 0.5949, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.4224626667793922e-05, |
|
"loss": 0.5773, |
|
"step": 13030 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.4045139270430773e-05, |
|
"loss": 0.5723, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.3866758733945911e-05, |
|
"loss": 0.5538, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.3689485895119869e-05, |
|
"loss": 0.5845, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.3513321585536975e-05, |
|
"loss": 0.5556, |
|
"step": 13070 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.3338266631581553e-05, |
|
"loss": 0.544, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.316432185443378e-05, |
|
"loss": 0.5413, |
|
"step": 13090 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.2991488070066253e-05, |
|
"loss": 0.5535, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_bleu": 0.09928934624142, |
|
"eval_loss": 0.7340034246444702, |
|
"eval_meteor": 0.1565346095149297, |
|
"eval_rouge1": 0.3444092132519548, |
|
"eval_rouge2": 0.1708440682004594, |
|
"eval_rougeL": 0.27786881606057756, |
|
"eval_rougeLsum": 0.27805772250635513, |
|
"eval_runtime": 871.5889, |
|
"eval_samples_per_second": 1.673, |
|
"eval_steps_per_second": 0.21, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.281976608923971e-05, |
|
"loss": 0.5807, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.2649156717499443e-05, |
|
"loss": 0.5422, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.2479660755171735e-05, |
|
"loss": 0.5351, |
|
"step": 13130 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.2311278997359566e-05, |
|
"loss": 0.5675, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.214401223393946e-05, |
|
"loss": 0.5689, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.1977861249557381e-05, |
|
"loss": 0.5548, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.1812826823625194e-05, |
|
"loss": 0.533, |
|
"step": 13170 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.164890973031707e-05, |
|
"loss": 0.5616, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.148611073856573e-05, |
|
"loss": 0.5409, |
|
"step": 13190 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 1.1324430612058928e-05, |
|
"loss": 0.5508, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_bleu": 0.0992730263673719, |
|
"eval_loss": 0.7337315082550049, |
|
"eval_meteor": 0.15597886653026946, |
|
"eval_rouge1": 0.3457664153171462, |
|
"eval_rouge2": 0.17218406358699478, |
|
"eval_rougeL": 0.27835859343494507, |
|
"eval_rougeLsum": 0.2787157812501009, |
|
"eval_runtime": 891.1062, |
|
"eval_samples_per_second": 1.636, |
|
"eval_steps_per_second": 0.205, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.1163870109235808e-05, |
|
"loss": 0.5473, |
|
"step": 13210 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.100442998328341e-05, |
|
"loss": 0.5696, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.0846110982133061e-05, |
|
"loss": 0.5332, |
|
"step": 13230 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.0688913848456966e-05, |
|
"loss": 0.526, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.0532839319664561e-05, |
|
"loss": 0.5411, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.037788812789936e-05, |
|
"loss": 0.5301, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.0224061000035117e-05, |
|
"loss": 0.534, |
|
"step": 13270 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.0071358657672613e-05, |
|
"loss": 0.5604, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 9.919781817136487e-06, |
|
"loss": 0.5399, |
|
"step": 13290 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 9.76933118947143e-06, |
|
"loss": 0.5723, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_bleu": 0.10246628269242825, |
|
"eval_loss": 0.7333744168281555, |
|
"eval_meteor": 0.15928134076198716, |
|
"eval_rouge1": 0.3475656867028768, |
|
"eval_rouge2": 0.1729267246770691, |
|
"eval_rougeL": 0.2796649835084845, |
|
"eval_rougeLsum": 0.27981386138496755, |
|
"eval_runtime": 888.2991, |
|
"eval_samples_per_second": 1.641, |
|
"eval_steps_per_second": 0.206, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 9.620007480439202e-06, |
|
"loss": 0.5798, |
|
"step": 13310 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 9.471811390515284e-06, |
|
"loss": 0.5727, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 9.324743614885279e-06, |
|
"loss": 0.538, |
|
"step": 13330 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 9.178804843442107e-06, |
|
"loss": 0.5624, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 9.033995760782426e-06, |
|
"loss": 0.5398, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.890317046203577e-06, |
|
"loss": 0.5415, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.747769373700338e-06, |
|
"loss": 0.5617, |
|
"step": 13370 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.606353411961837e-06, |
|
"loss": 0.5415, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.466069824368345e-06, |
|
"loss": 0.5406, |
|
"step": 13390 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.326919268988098e-06, |
|
"loss": 0.5258, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_bleu": 0.10156079720565611, |
|
"eval_loss": 0.7325248718261719, |
|
"eval_meteor": 0.15866863843726547, |
|
"eval_rouge1": 0.34802954724699564, |
|
"eval_rouge2": 0.17357419666478002, |
|
"eval_rougeL": 0.2800104074681471, |
|
"eval_rougeLsum": 0.2802216595217447, |
|
"eval_runtime": 878.2912, |
|
"eval_samples_per_second": 1.66, |
|
"eval_steps_per_second": 0.208, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 8.188902398574393e-06, |
|
"loss": 0.5569, |
|
"step": 13410 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 8.052019860562449e-06, |
|
"loss": 0.5838, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 7.916272297066207e-06, |
|
"loss": 0.5593, |
|
"step": 13430 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 7.781660344875596e-06, |
|
"loss": 0.5785, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 7.648184635453358e-06, |
|
"loss": 0.5456, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.515845794932113e-06, |
|
"loss": 0.5424, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.3846444441114935e-06, |
|
"loss": 0.5289, |
|
"step": 13470 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.254581198455151e-06, |
|
"loss": 0.5693, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.1256566680878985e-06, |
|
"loss": 0.5431, |
|
"step": 13490 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 6.997871457792904e-06, |
|
"loss": 0.5455, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_bleu": 0.10167425548935258, |
|
"eval_loss": 0.7320215106010437, |
|
"eval_meteor": 0.15906516015435773, |
|
"eval_rouge1": 0.3479376560352928, |
|
"eval_rouge2": 0.17281257972388747, |
|
"eval_rougeL": 0.2806442588652185, |
|
"eval_rougeLsum": 0.280986040501735, |
|
"eval_runtime": 886.2681, |
|
"eval_samples_per_second": 1.645, |
|
"eval_steps_per_second": 0.206, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 6.871226167008748e-06, |
|
"loss": 0.5641, |
|
"step": 13510 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 6.745721389826625e-06, |
|
"loss": 0.5339, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 6.621357714987758e-06, |
|
"loss": 0.5183, |
|
"step": 13530 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 6.498135725880261e-06, |
|
"loss": 0.5291, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 6.376056000536757e-06, |
|
"loss": 0.5538, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 6.255119111631541e-06, |
|
"loss": 0.5426, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 6.135325626477755e-06, |
|
"loss": 0.5318, |
|
"step": 13570 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 6.016676107024971e-06, |
|
"loss": 0.5818, |
|
"step": 13580 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 5.899171109856411e-06, |
|
"loss": 0.5404, |
|
"step": 13590 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 5.78281118618626e-06, |
|
"loss": 0.5671, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_bleu": 0.1007919137136251, |
|
"eval_loss": 0.7318324446678162, |
|
"eval_meteor": 0.15723030918924374, |
|
"eval_rouge1": 0.34667767052184184, |
|
"eval_rouge2": 0.17226911283980478, |
|
"eval_rougeL": 0.27936862457654044, |
|
"eval_rougeLsum": 0.2795830015719606, |
|
"eval_runtime": 884.4168, |
|
"eval_samples_per_second": 1.649, |
|
"eval_steps_per_second": 0.207, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 5.667596881857334e-06, |
|
"loss": 0.5602, |
|
"step": 13610 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 5.55352873733822e-06, |
|
"loss": 0.5445, |
|
"step": 13620 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 5.440607287721e-06, |
|
"loss": 0.5377, |
|
"step": 13630 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 5.328833062718586e-06, |
|
"loss": 0.5588, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5.2182065866621945e-06, |
|
"loss": 0.5622, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5.108728378499156e-06, |
|
"loss": 0.5463, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5.000398951790136e-06, |
|
"loss": 0.5564, |
|
"step": 13670 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 4.8932188147068345e-06, |
|
"loss": 0.5337, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 4.787188470029874e-06, |
|
"loss": 0.573, |
|
"step": 13690 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.682308415146025e-06, |
|
"loss": 0.5327, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_bleu": 0.10173626706056466, |
|
"eval_loss": 0.7313176393508911, |
|
"eval_meteor": 0.15728269725931887, |
|
"eval_rouge1": 0.3472733480681677, |
|
"eval_rouge2": 0.17260088108297064, |
|
"eval_rougeL": 0.27949937701394906, |
|
"eval_rougeLsum": 0.2796326901165262, |
|
"eval_runtime": 881.6852, |
|
"eval_samples_per_second": 1.654, |
|
"eval_steps_per_second": 0.208, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.578579142046124e-06, |
|
"loss": 0.5567, |
|
"step": 13710 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.4760011373227435e-06, |
|
"loss": 0.5537, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.374574882167776e-06, |
|
"loss": 0.5448, |
|
"step": 13730 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.274300852370378e-06, |
|
"loss": 0.5372, |
|
"step": 13740 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.175179518314615e-06, |
|
"loss": 0.5662, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.077211344977155e-06, |
|
"loss": 0.5504, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.9803967919253834e-06, |
|
"loss": 0.5559, |
|
"step": 13770 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.884736313314985e-06, |
|
"loss": 0.5463, |
|
"step": 13780 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.7902303578878363e-06, |
|
"loss": 0.5562, |
|
"step": 13790 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.696879368970063e-06, |
|
"loss": 0.5451, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_bleu": 0.10243119986944875, |
|
"eval_loss": 0.7312376499176025, |
|
"eval_meteor": 0.16088101420447667, |
|
"eval_rouge1": 0.34836128872402766, |
|
"eval_rouge2": 0.17273652022337566, |
|
"eval_rougeL": 0.27977759855689, |
|
"eval_rougeLsum": 0.2800622244983262, |
|
"eval_runtime": 892.0349, |
|
"eval_samples_per_second": 1.634, |
|
"eval_steps_per_second": 0.205, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.604683784469792e-06, |
|
"loss": 0.5718, |
|
"step": 13810 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.513644036875263e-06, |
|
"loss": 0.5395, |
|
"step": 13820 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.4237605532525527e-06, |
|
"loss": 0.5597, |
|
"step": 13830 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.3350337552437994e-06, |
|
"loss": 0.5227, |
|
"step": 13840 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 3.2474640590652304e-06, |
|
"loss": 0.5474, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 3.161051875504917e-06, |
|
"loss": 0.549, |
|
"step": 13860 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 3.0757976099212715e-06, |
|
"loss": 0.5202, |
|
"step": 13870 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.9917016622408034e-06, |
|
"loss": 0.5621, |
|
"step": 13880 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.9087644269563675e-06, |
|
"loss": 0.5639, |
|
"step": 13890 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.8269862931254442e-06, |
|
"loss": 0.5344, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_bleu": 0.10191207168300208, |
|
"eval_loss": 0.7312436103820801, |
|
"eval_meteor": 0.15926873167510167, |
|
"eval_rouge1": 0.3477737281579141, |
|
"eval_rouge2": 0.17282641415565858, |
|
"eval_rougeL": 0.27933110853159515, |
|
"eval_rougeLsum": 0.27944709498941334, |
|
"eval_runtime": 892.7842, |
|
"eval_samples_per_second": 1.633, |
|
"eval_steps_per_second": 0.205, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.7463676443679753e-06, |
|
"loss": 0.5381, |
|
"step": 13910 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.6669088588649473e-06, |
|
"loss": 0.5631, |
|
"step": 13920 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.588610309356393e-06, |
|
"loss": 0.53, |
|
"step": 13930 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.5114723631396717e-06, |
|
"loss": 0.5744, |
|
"step": 13940 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.435495382067776e-06, |
|
"loss": 0.5043, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.360679722547693e-06, |
|
"loss": 0.5461, |
|
"step": 13960 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.287025735538517e-06, |
|
"loss": 0.566, |
|
"step": 13970 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.2145337665501475e-06, |
|
"loss": 0.5449, |
|
"step": 13980 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.143204155641315e-06, |
|
"loss": 0.5786, |
|
"step": 13990 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.073037237418224e-06, |
|
"loss": 0.5458, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_bleu": 0.1021267162859044, |
|
"eval_loss": 0.7309987545013428, |
|
"eval_meteor": 0.158667362599979, |
|
"eval_rouge1": 0.348744497173308, |
|
"eval_rouge2": 0.17304834533435584, |
|
"eval_rougeL": 0.280742088680976, |
|
"eval_rougeLsum": 0.28098362632025586, |
|
"eval_runtime": 886.896, |
|
"eval_samples_per_second": 1.644, |
|
"eval_steps_per_second": 0.206, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.004033341032857e-06, |
|
"loss": 0.5326, |
|
"step": 14010 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.9361927901815624e-06, |
|
"loss": 0.5636, |
|
"step": 14020 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.8695159031033304e-06, |
|
"loss": 0.5331, |
|
"step": 14030 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.8040029925785185e-06, |
|
"loss": 0.5182, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.7396543659272402e-06, |
|
"loss": 0.5477, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.6764703250080337e-06, |
|
"loss": 0.5554, |
|
"step": 14060 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.6144511662162798e-06, |
|
"loss": 0.5443, |
|
"step": 14070 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.5535971804829242e-06, |
|
"loss": 0.5486, |
|
"step": 14080 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.493908653273257e-06, |
|
"loss": 0.572, |
|
"step": 14090 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.4353858645852202e-06, |
|
"loss": 0.5409, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_bleu": 0.10264475388522705, |
|
"eval_loss": 0.7310293912887573, |
|
"eval_meteor": 0.1605753762160779, |
|
"eval_rouge1": 0.34886254304961106, |
|
"eval_rouge2": 0.17305946043676362, |
|
"eval_rougeL": 0.28068092583378756, |
|
"eval_rougeLsum": 0.2808523595018192, |
|
"eval_runtime": 887.8211, |
|
"eval_samples_per_second": 1.642, |
|
"eval_steps_per_second": 0.206, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.3780290889484072e-06, |
|
"loss": 0.5521, |
|
"step": 14110 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.3218385954226198e-06, |
|
"loss": 0.5632, |
|
"step": 14120 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.2668146475966758e-06, |
|
"loss": 0.5588, |
|
"step": 14130 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.212957503587131e-06, |
|
"loss": 0.5164, |
|
"step": 14140 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.1602674160370586e-06, |
|
"loss": 0.5299, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.1087446321149108e-06, |
|
"loss": 0.5555, |
|
"step": 14160 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.0583893935133537e-06, |
|
"loss": 0.5433, |
|
"step": 14170 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.009201936448073e-06, |
|
"loss": 0.5012, |
|
"step": 14180 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 9.611824916566925e-07, |
|
"loss": 0.5664, |
|
"step": 14190 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 9.143312843978014e-07, |
|
"loss": 0.5236, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_bleu": 0.10244624097824648, |
|
"eval_loss": 0.7309274077415466, |
|
"eval_meteor": 0.16005667824339107, |
|
"eval_rouge1": 0.3490422457116665, |
|
"eval_rouge2": 0.1733463893140626, |
|
"eval_rougeL": 0.28057105638475316, |
|
"eval_rougeLsum": 0.2807499959620654, |
|
"eval_runtime": 888.0294, |
|
"eval_samples_per_second": 1.642, |
|
"eval_steps_per_second": 0.206, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 8.686485344497063e-07, |
|
"loss": 0.5504, |
|
"step": 14210 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 8.241344561095709e-07, |
|
"loss": 0.4984, |
|
"step": 14220 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 7.807892581922494e-07, |
|
"loss": 0.5531, |
|
"step": 14230 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 7.386131440294541e-07, |
|
"loss": 0.5439, |
|
"step": 14240 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.976063114687847e-07, |
|
"loss": 0.553, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.577689528726449e-07, |
|
"loss": 0.5774, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 6.19101255117549e-07, |
|
"loss": 0.5293, |
|
"step": 14270 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.816033995931224e-07, |
|
"loss": 0.5738, |
|
"step": 14280 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.452755622012418e-07, |
|
"loss": 0.6027, |
|
"step": 14290 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.101179133552847e-07, |
|
"loss": 0.5585, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_bleu": 0.10184766000251703, |
|
"eval_loss": 0.7308475375175476, |
|
"eval_meteor": 0.15979922866240157, |
|
"eval_rouge1": 0.34845561005207215, |
|
"eval_rouge2": 0.1730858632368449, |
|
"eval_rougeL": 0.2802316743175929, |
|
"eval_rougeLsum": 0.28039957039910934, |
|
"eval_runtime": 889.345, |
|
"eval_samples_per_second": 1.639, |
|
"eval_steps_per_second": 0.206, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 4.7613061797927017e-07, |
|
"loss": 0.5272, |
|
"step": 14310 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 4.433138355071364e-07, |
|
"loss": 0.5336, |
|
"step": 14320 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.1166771988190854e-07, |
|
"loss": 0.5732, |
|
"step": 14330 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.811924195551153e-07, |
|
"loss": 0.571, |
|
"step": 14340 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.518880774859012e-07, |
|
"loss": 0.5303, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.2375483114052697e-07, |
|
"loss": 0.5865, |
|
"step": 14360 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 2.9679281249167524e-07, |
|
"loss": 0.5151, |
|
"step": 14370 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.7100214801775713e-07, |
|
"loss": 0.5549, |
|
"step": 14380 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.463829587024402e-07, |
|
"loss": 0.5729, |
|
"step": 14390 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.2293536003398229e-07, |
|
"loss": 0.5583, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_bleu": 0.1018034775532683, |
|
"eval_loss": 0.730782151222229, |
|
"eval_meteor": 0.15932308064758732, |
|
"eval_rouge1": 0.3484697219289754, |
|
"eval_rouge2": 0.17293266015970904, |
|
"eval_rougeL": 0.2799082120065427, |
|
"eval_rougeLsum": 0.28003296184605053, |
|
"eval_runtime": 896.7591, |
|
"eval_samples_per_second": 1.626, |
|
"eval_steps_per_second": 0.204, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.006594620047042e-07, |
|
"loss": 0.5603, |
|
"step": 14410 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.7955536911057336e-07, |
|
"loss": 0.5393, |
|
"step": 14420 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.596231803505377e-07, |
|
"loss": 0.5596, |
|
"step": 14430 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.4086298922624808e-07, |
|
"loss": 0.5791, |
|
"step": 14440 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.2327488374147543e-07, |
|
"loss": 0.5645, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.0685894640177774e-07, |
|
"loss": 0.5598, |
|
"step": 14460 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 9.161525421408357e-08, |
|
"loss": 0.524, |
|
"step": 14470 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 7.75438786863314e-08, |
|
"loss": 0.5659, |
|
"step": 14480 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 6.464488582713646e-08, |
|
"loss": 0.5511, |
|
"step": 14490 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 5.2918336145513225e-08, |
|
"loss": 0.5437, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_bleu": 0.10173439407301835, |
|
"eval_loss": 0.7307738661766052, |
|
"eval_meteor": 0.1593960612395741, |
|
"eval_rouge1": 0.3486543535941503, |
|
"eval_rouge2": 0.17300853906054273, |
|
"eval_rougeL": 0.2799853708554518, |
|
"eval_rougeLsum": 0.28011112359864354, |
|
"eval_runtime": 900.2112, |
|
"eval_samples_per_second": 1.62, |
|
"eval_steps_per_second": 0.203, |
|
"step": 14500 |
|
} |
|
], |
|
"max_steps": 14595, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.0055395438032323e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|