diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,5862 @@ +{ + "best_metric": 0.3075002644877919, + "best_model_checkpoint": "ru_t5_logs/checkpoint-7200", + "epoch": 1.21786719081204, + "global_step": 7900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.555555555555555e-05, + "loss": 8.8426, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001111111111111111, + "loss": 2.5171, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016666666666666666, + "loss": 1.4573, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002222222222222222, + "loss": 1.1602, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002777777777777778, + "loss": 0.9913, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003333333333333333, + "loss": 1.0849, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003888888888888889, + "loss": 1.1257, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004444444444444444, + "loss": 1.1418, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 0.0005, + "loss": 1.0913, + "step": 90 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999996711179958, + "loss": 1.147, + "step": 100 + }, + { + "epoch": 0.02, + "eval_bleu": 0.057150905355919934, + "eval_loss": 0.9198915958404541, + "eval_meteor": 0.11030404785434701, + "eval_rouge1": 0.279545903055327, + "eval_rouge2": 0.1375545207790398, + "eval_rougeL": 0.22993057523222138, + "eval_rougeLsum": 0.23002986644843249, + "eval_runtime": 1292.7346, + "eval_samples_per_second": 1.128, + "eval_steps_per_second": 0.188, + "step": 100 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999986844728487, + "loss": 1.067, + "step": 110 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999970400671544, + "loss": 1.1341, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999947379052395, + "loss": 0.9852, + "step": 130 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999917779931613, + "loss": 1.0095, + "step": 140 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999881603387073, + "loss": 1.0118, + "step": 150 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999838849513957, + "loss": 1.0513, + "step": 160 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999789518424755, + "loss": 1.009, + "step": 170 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999733610249258, + "loss": 0.9138, + "step": 180 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999671125134564, + "loss": 0.9989, + "step": 190 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999602063245074, + "loss": 0.9443, + "step": 200 + }, + { + "epoch": 0.03, + "eval_bleu": 0.08174242542801959, + "eval_loss": 0.8470357656478882, + "eval_meteor": 0.13334461127658528, + "eval_rouge1": 0.3118630390437005, + "eval_rouge2": 0.17036843477123215, + "eval_rougeL": 0.26155329349275835, + "eval_rougeLsum": 0.26141341054848355, + "eval_runtime": 1098.495, + "eval_samples_per_second": 1.327, + "eval_steps_per_second": 0.221, + "step": 200 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999526424762496, + "loss": 0.9839, + "step": 210 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004999444209885838, + "loss": 0.9479, + "step": 220 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999355418831412, + "loss": 0.917, + "step": 230 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999260051832831, + "loss": 0.87, + "step": 240 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999158109141012, + "loss": 0.9722, + "step": 250 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004999049591024172, + "loss": 0.9619, + "step": 260 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998934497767828, + "loss": 1.0447, + "step": 270 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998812829674797, + "loss": 0.8547, + "step": 280 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004998684587065195, + "loss": 0.8972, + "step": 290 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004998549770276435, + "loss": 0.8871, + "step": 300 + }, + { + "epoch": 0.05, + "eval_bleu": 0.046252384714512554, + "eval_loss": 0.8526527881622314, + "eval_meteor": 0.09538529454387626, + "eval_rouge1": 0.27543511716574076, + "eval_rouge2": 0.1605961208091074, + "eval_rougeL": 0.24354530581569278, + "eval_rougeLsum": 0.24344913233941234, + "eval_runtime": 882.6979, + "eval_samples_per_second": 1.652, + "eval_steps_per_second": 0.275, + "step": 300 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004998408379663226, + "loss": 0.8772, + "step": 310 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004998260415597578, + "loss": 1.0016, + "step": 320 + }, + { + "epoch": 0.05, + "learning_rate": 0.000499810587846879, + "loss": 0.9394, + "step": 330 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997944768683458, + "loss": 0.8853, + "step": 340 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004997777086665473, + "loss": 0.9091, + "step": 350 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004997602832856013, + "loss": 0.8932, + "step": 360 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004997422007713551, + "loss": 0.8659, + "step": 370 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004997234611713849, + "loss": 0.9325, + "step": 380 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004997040645349955, + "loss": 0.9674, + "step": 390 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004996840109132205, + "loss": 0.9608, + "step": 400 + }, + { + "epoch": 0.06, + "eval_bleu": 0.08586731335447921, + "eval_loss": 0.8303579092025757, + "eval_meteor": 0.1430676871634322, + "eval_rouge1": 0.31709190550852295, + "eval_rouge2": 0.17041273297322634, + "eval_rougeL": 0.267869089287303, + "eval_rougeLsum": 0.2677306207885424, + "eval_runtime": 1072.7177, + "eval_samples_per_second": 1.359, + "eval_steps_per_second": 0.227, + "step": 400 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004996633003588222, + "loss": 0.9616, + "step": 410 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004996419329262913, + "loss": 0.9675, + "step": 420 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004996199086718466, + "loss": 0.9418, + "step": 430 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004995972276534351, + "loss": 0.9631, + "step": 440 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004995738899307319, + "loss": 0.9218, + "step": 450 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004995498955651399, + "loss": 0.984, + "step": 460 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004995252446197894, + "loss": 0.9286, + "step": 470 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004994999371595388, + "loss": 0.8722, + "step": 480 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004994739732509731, + "loss": 0.8834, + "step": 490 + }, + { + "epoch": 0.08, + "learning_rate": 0.000499447352962405, + "loss": 0.953, + "step": 500 + }, + { + "epoch": 0.08, + "eval_bleu": 0.09861710848144628, + "eval_loss": 0.8263402581214905, + "eval_meteor": 0.15312912847569377, + "eval_rouge1": 0.3263458627968157, + "eval_rouge2": 0.17504244009075126, + "eval_rougeL": 0.2714073081979089, + "eval_rougeLsum": 0.27118688987783984, + "eval_runtime": 1146.0503, + "eval_samples_per_second": 1.272, + "eval_steps_per_second": 0.212, + "step": 500 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004994200763638739, + "loss": 0.8363, + "step": 510 + }, + { + "epoch": 0.08, + "learning_rate": 0.000499392143527146, + "loss": 0.9203, + "step": 520 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004993635545257144, + "loss": 0.9375, + "step": 530 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004993343094347979, + "loss": 0.9548, + "step": 540 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004993044083313425, + "loss": 0.9221, + "step": 550 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004992738512940194, + "loss": 0.8666, + "step": 560 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004992426384032258, + "loss": 0.8917, + "step": 570 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004992107697410848, + "loss": 0.9237, + "step": 580 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004991782453914444, + "loss": 0.8657, + "step": 590 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004991450654398782, + "loss": 0.9489, + "step": 600 + }, + { + "epoch": 0.09, + "eval_bleu": 0.10113643092041803, + "eval_loss": 0.8359497785568237, + "eval_meteor": 0.1654018684728887, + "eval_rouge1": 0.33698613363555824, + "eval_rouge2": 0.17694734353424882, + "eval_rougeL": 0.2749185603408262, + "eval_rougeLsum": 0.2750113209557301, + "eval_runtime": 1213.3811, + "eval_samples_per_second": 1.202, + "eval_steps_per_second": 0.2, + "step": 600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004991112299736844, + "loss": 0.9289, + "step": 610 + }, + { + "epoch": 0.1, + "learning_rate": 0.000499076739081886, + "loss": 0.9123, + "step": 620 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004990415928552305, + "loss": 0.9329, + "step": 630 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004990057913861896, + "loss": 0.8874, + "step": 640 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004989693347689589, + "loss": 0.9698, + "step": 650 + }, + { + "epoch": 0.1, + "learning_rate": 0.000498932223099458, + "loss": 0.831, + "step": 660 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004988944564753295, + "loss": 0.8701, + "step": 670 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004988560349959396, + "loss": 0.8953, + "step": 680 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004988169587623776, + "loss": 0.9621, + "step": 690 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004987772278774548, + "loss": 0.8948, + "step": 700 + }, + { + "epoch": 0.11, + "eval_bleu": 0.04255087666728896, + "eval_loss": 0.8361812233924866, + "eval_meteor": 0.099997885358938, + "eval_rouge1": 0.28420928207646434, + "eval_rouge2": 0.1653214913198871, + "eval_rougeL": 0.25267504887077563, + "eval_rougeLsum": 0.25262418214939675, + "eval_runtime": 754.6649, + "eval_samples_per_second": 1.932, + "eval_steps_per_second": 0.322, + "step": 700 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004987368424457058, + "loss": 0.9126, + "step": 710 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004986958025733868, + "loss": 0.9425, + "step": 720 + }, + { + "epoch": 0.11, + "learning_rate": 0.000498654108368476, + "loss": 0.8944, + "step": 730 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004986117599406733, + "loss": 0.8977, + "step": 740 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004985687574013994, + "loss": 0.8851, + "step": 750 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004985251008637968, + "loss": 0.8542, + "step": 760 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004984807904427281, + "loss": 0.8809, + "step": 770 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004984358262547766, + "loss": 0.8334, + "step": 780 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004983902084182455, + "loss": 0.8488, + "step": 790 + }, + { + "epoch": 0.12, + "learning_rate": 0.000498343937053158, + "loss": 0.8409, + "step": 800 + }, + { + "epoch": 0.12, + "eval_bleu": 0.09329201888155486, + "eval_loss": 0.8163634538650513, + "eval_meteor": 0.15770696776351786, + "eval_rouge1": 0.3315883123187934, + "eval_rouge2": 0.1767418040045381, + "eval_rougeL": 0.27737567013849995, + "eval_rougeLsum": 0.27738229938080694, + "eval_runtime": 1063.2636, + "eval_samples_per_second": 1.371, + "eval_steps_per_second": 0.229, + "step": 800 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004982970122812566, + "loss": 0.7996, + "step": 810 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004982494342260029, + "loss": 0.8564, + "step": 820 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004982012030125775, + "loss": 0.9214, + "step": 830 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004981523187678796, + "loss": 0.8301, + "step": 840 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004981027816205262, + "loss": 0.8948, + "step": 850 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004980525917008523, + "loss": 0.8467, + "step": 860 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004980017491409103, + "loss": 0.9733, + "step": 870 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004979502540744702, + "loss": 0.9432, + "step": 880 + }, + { + "epoch": 0.14, + "learning_rate": 0.000497898106637018, + "loss": 0.9256, + "step": 890 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004978453069657568, + "loss": 0.8505, + "step": 900 + }, + { + "epoch": 0.14, + "eval_bleu": 0.08288033858994784, + "eval_loss": 0.8515655398368835, + "eval_meteor": 0.13825343548644084, + "eval_rouge1": 0.31117223601763094, + "eval_rouge2": 0.17049205940823398, + "eval_rougeL": 0.26282109139798354, + "eval_rougeLsum": 0.26267587400999515, + "eval_runtime": 1043.0682, + "eval_samples_per_second": 1.398, + "eval_steps_per_second": 0.233, + "step": 900 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004977918551996054, + "loss": 0.8471, + "step": 910 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004977377514791983, + "loss": 0.9285, + "step": 920 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004976829959468855, + "loss": 0.8993, + "step": 930 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004976275887467319, + "loss": 0.9278, + "step": 940 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004975715300245169, + "loss": 0.8716, + "step": 950 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004975148199277342, + "loss": 0.8836, + "step": 960 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004974574586055912, + "loss": 0.9705, + "step": 970 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004973994462090088, + "loss": 0.8766, + "step": 980 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004973407828906207, + "loss": 0.8644, + "step": 990 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004972814688047736, + "loss": 0.8516, + "step": 1000 + }, + { + "epoch": 0.15, + "eval_bleu": 0.11086631103897832, + "eval_loss": 0.8234091401100159, + "eval_meteor": 0.1702736355204243, + "eval_rouge1": 0.33339211687613324, + "eval_rouge2": 0.17689852127575206, + "eval_rougeL": 0.2749015362723488, + "eval_rougeLsum": 0.2749567892668668, + "eval_runtime": 1362.5888, + "eval_samples_per_second": 1.07, + "eval_steps_per_second": 0.178, + "step": 1000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004972215041075261, + "loss": 0.9137, + "step": 1010 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004971608889566486, + "loss": 0.9558, + "step": 1020 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004970996235116231, + "loss": 0.9403, + "step": 1030 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004970377079336422, + "loss": 0.9096, + "step": 1040 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004969751423856095, + "loss": 0.9792, + "step": 1050 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004969119270321383, + "loss": 0.8368, + "step": 1060 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004968480620395519, + "loss": 0.9117, + "step": 1070 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004967835475758825, + "loss": 0.91, + "step": 1080 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004967183838108713, + "loss": 0.897, + "step": 1090 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004966525709159679, + "loss": 0.876, + "step": 1100 + }, + { + "epoch": 0.17, + "eval_bleu": 0.08012098475489943, + "eval_loss": 0.810808539390564, + "eval_meteor": 0.13737464649721876, + "eval_rouge1": 0.31919458917681476, + "eval_rouge2": 0.17969717787411127, + "eval_rougeL": 0.2725775781741208, + "eval_rougeLsum": 0.27236890457104335, + "eval_runtime": 942.8581, + "eval_samples_per_second": 1.546, + "eval_steps_per_second": 0.258, + "step": 1100 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004965861090643296, + "loss": 0.9041, + "step": 1110 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004965189984308215, + "loss": 0.9139, + "step": 1120 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004964512391920151, + "loss": 0.8988, + "step": 1130 + }, + { + "epoch": 0.18, + "learning_rate": 0.000496382831526189, + "loss": 0.8624, + "step": 1140 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004963137756133274, + "loss": 0.8518, + "step": 1150 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004962440716351205, + "loss": 0.9235, + "step": 1160 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004961737197749633, + "loss": 0.8071, + "step": 1170 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004961027202179554, + "loss": 0.894, + "step": 1180 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004960310731509007, + "loss": 0.9255, + "step": 1190 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004959587787623065, + "loss": 0.8109, + "step": 1200 + }, + { + "epoch": 0.18, + "eval_bleu": 0.09517651862521773, + "eval_loss": 0.8174175024032593, + "eval_meteor": 0.16180826590180308, + "eval_rouge1": 0.33582745913845957, + "eval_rouge2": 0.1811654909375417, + "eval_rougeL": 0.281565966212259, + "eval_rougeLsum": 0.2813838288792657, + "eval_runtime": 1002.5623, + "eval_samples_per_second": 1.454, + "eval_steps_per_second": 0.242, + "step": 1200 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004958858372423837, + "loss": 0.8624, + "step": 1210 + }, + { + "epoch": 0.19, + "learning_rate": 0.000495812248783045, + "loss": 0.8696, + "step": 1220 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004957380135779064, + "loss": 0.859, + "step": 1230 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004956631318222846, + "loss": 0.8144, + "step": 1240 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004955876037131974, + "loss": 0.87, + "step": 1250 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004955114294493639, + "loss": 0.8819, + "step": 1260 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004954346092312026, + "loss": 0.8935, + "step": 1270 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004953571432608321, + "loss": 0.8716, + "step": 1280 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004952790317420694, + "loss": 0.8911, + "step": 1290 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004952002748804304, + "loss": 0.8183, + "step": 1300 + }, + { + "epoch": 0.2, + "eval_bleu": 0.10596849766835054, + "eval_loss": 0.8043612837791443, + "eval_meteor": 0.1691677125903321, + "eval_rouge1": 0.3389925011791456, + "eval_rouge2": 0.18348093472072707, + "eval_rougeL": 0.28213366266002454, + "eval_rougeLsum": 0.28213090498762367, + "eval_runtime": 1141.1899, + "eval_samples_per_second": 1.278, + "eval_steps_per_second": 0.213, + "step": 1300 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004951208728831289, + "loss": 0.8433, + "step": 1310 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004950408259590757, + "loss": 0.8268, + "step": 1320 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004949601343188792, + "loss": 0.911, + "step": 1330 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004948787981748433, + "loss": 0.8638, + "step": 1340 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004947968177409681, + "loss": 0.8691, + "step": 1350 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004947141932329486, + "loss": 0.8499, + "step": 1360 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004946309248681745, + "loss": 0.8832, + "step": 1370 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004945470128657297, + "loss": 0.8744, + "step": 1380 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004944624574463913, + "loss": 0.8219, + "step": 1390 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004943772588326292, + "loss": 0.8407, + "step": 1400 + }, + { + "epoch": 0.22, + "eval_bleu": 0.11433808067218891, + "eval_loss": 0.8030869960784912, + "eval_meteor": 0.17630048064575726, + "eval_rouge1": 0.34423855201642795, + "eval_rouge2": 0.18361911351356364, + "eval_rougeL": 0.2837522876067602, + "eval_rougeLsum": 0.28364725151298015, + "eval_runtime": 1232.5857, + "eval_samples_per_second": 1.183, + "eval_steps_per_second": 0.197, + "step": 1400 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004942914172486059, + "loss": 0.8932, + "step": 1410 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004942049329201754, + "loss": 0.8544, + "step": 1420 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004941178060748829, + "loss": 0.7995, + "step": 1430 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004940300369419637, + "loss": 0.8576, + "step": 1440 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004939416257523436, + "loss": 0.8654, + "step": 1450 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004938525727386373, + "loss": 0.8559, + "step": 1460 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004937628781351483, + "loss": 0.8738, + "step": 1470 + }, + { + "epoch": 0.23, + "learning_rate": 0.000493672542177868, + "loss": 0.9433, + "step": 1480 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004935815651044755, + "loss": 0.8125, + "step": 1490 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004934899471543366, + "loss": 0.886, + "step": 1500 + }, + { + "epoch": 0.23, + "eval_bleu": 0.12742687031814315, + "eval_loss": 0.805793821811676, + "eval_meteor": 0.19296014590447547, + "eval_rouge1": 0.35221171953589286, + "eval_rouge2": 0.1846768871884069, + "eval_rougeL": 0.2848280618153002, + "eval_rougeLsum": 0.28493716966830007, + "eval_runtime": 1452.5375, + "eval_samples_per_second": 1.004, + "eval_steps_per_second": 0.167, + "step": 1500 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004933976885685031, + "loss": 0.8315, + "step": 1510 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004933047895897127, + "loss": 0.8242, + "step": 1520 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004932112504623876, + "loss": 0.8638, + "step": 1530 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004931170714326347, + "loss": 0.831, + "step": 1540 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004930222527482442, + "loss": 0.8754, + "step": 1550 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004929267946586894, + "loss": 0.817, + "step": 1560 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004928306974151258, + "loss": 0.906, + "step": 1570 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004927339612703908, + "loss": 0.8899, + "step": 1580 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004926365864790025, + "loss": 0.8845, + "step": 1590 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004925385732971595, + "loss": 0.8761, + "step": 1600 + }, + { + "epoch": 0.25, + "eval_bleu": 0.10770304213160696, + "eval_loss": 0.8112803101539612, + "eval_meteor": 0.17046581268631894, + "eval_rouge1": 0.3402358184654584, + "eval_rouge2": 0.1827631633093852, + "eval_rougeL": 0.28122066588095507, + "eval_rougeLsum": 0.28114478510696167, + "eval_runtime": 1067.7239, + "eval_samples_per_second": 1.366, + "eval_steps_per_second": 0.228, + "step": 1600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004924399219827398, + "loss": 0.9299, + "step": 1610 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004923406327953007, + "loss": 0.8943, + "step": 1620 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004922407059960776, + "loss": 0.9165, + "step": 1630 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004921401418479834, + "loss": 0.8499, + "step": 1640 + }, + { + "epoch": 0.25, + "learning_rate": 0.0004920389406156082, + "loss": 0.8766, + "step": 1650 + }, + { + "epoch": 0.26, + "learning_rate": 0.000491937102565218, + "loss": 0.8453, + "step": 1660 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004918346279647544, + "loss": 0.8943, + "step": 1670 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004917315170838339, + "loss": 0.8835, + "step": 1680 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004916277701937468, + "loss": 0.8386, + "step": 1690 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004915233875674572, + "loss": 0.872, + "step": 1700 + }, + { + "epoch": 0.26, + "eval_bleu": 0.11788189676776079, + "eval_loss": 0.802962601184845, + "eval_meteor": 0.18209545205592362, + "eval_rouge1": 0.3438678638236826, + "eval_rouge2": 0.18232469842121968, + "eval_rougeL": 0.28305359100970484, + "eval_rougeLsum": 0.2829156060905057, + "eval_runtime": 1203.8235, + "eval_samples_per_second": 1.211, + "eval_steps_per_second": 0.202, + "step": 1700 + }, + { + "epoch": 0.26, + "learning_rate": 0.0004914183694796016, + "loss": 0.8319, + "step": 1710 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004913127162064885, + "loss": 0.8564, + "step": 1720 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004912064280260974, + "loss": 0.8453, + "step": 1730 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004910995052180786, + "loss": 0.8255, + "step": 1740 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004909919480637519, + "loss": 0.8831, + "step": 1750 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004908837568461064, + "loss": 0.8273, + "step": 1760 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004907749318497991, + "loss": 0.8494, + "step": 1770 + }, + { + "epoch": 0.27, + "learning_rate": 0.0004906654733611547, + "loss": 0.8869, + "step": 1780 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004905553816681646, + "loss": 0.778, + "step": 1790 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004904446570604862, + "loss": 0.9082, + "step": 1800 + }, + { + "epoch": 0.28, + "eval_bleu": 0.11523676534243875, + "eval_loss": 0.8040370345115662, + "eval_meteor": 0.17806856469850704, + "eval_rouge1": 0.34321948648974265, + "eval_rouge2": 0.18492135732773402, + "eval_rougeL": 0.2827187054731721, + "eval_rougeLsum": 0.2825956342144159, + "eval_runtime": 1223.1433, + "eval_samples_per_second": 1.192, + "eval_steps_per_second": 0.199, + "step": 1800 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004903332998294422, + "loss": 0.8266, + "step": 1810 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004902213102680197, + "loss": 0.85, + "step": 1820 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004901086886708695, + "loss": 0.8447, + "step": 1830 + }, + { + "epoch": 0.28, + "learning_rate": 0.0004899954353343053, + "loss": 0.8803, + "step": 1840 + }, + { + "epoch": 0.29, + "learning_rate": 0.000489881550556303, + "loss": 0.7804, + "step": 1850 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004897670346364998, + "loss": 0.9184, + "step": 1860 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004896518878761937, + "loss": 0.8075, + "step": 1870 + }, + { + "epoch": 0.29, + "learning_rate": 0.000489536110578342, + "loss": 0.8086, + "step": 1880 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004894197030475614, + "loss": 0.8357, + "step": 1890 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004893026655901266, + "loss": 0.818, + "step": 1900 + }, + { + "epoch": 0.29, + "eval_bleu": 0.1152261234460638, + "eval_loss": 0.7954283356666565, + "eval_meteor": 0.18005815233416442, + "eval_rouge1": 0.3480841484526223, + "eval_rouge2": 0.1878809645185771, + "eval_rougeL": 0.2866495022876356, + "eval_rougeLsum": 0.2865319164852567, + "eval_runtime": 1150.8093, + "eval_samples_per_second": 1.267, + "eval_steps_per_second": 0.211, + "step": 1900 + }, + { + "epoch": 0.29, + "learning_rate": 0.0004891849985139697, + "loss": 0.919, + "step": 1910 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004890667021286794, + "loss": 0.8702, + "step": 1920 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004889477767455002, + "loss": 0.8223, + "step": 1930 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004888282226773313, + "loss": 0.8881, + "step": 1940 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004887080402387262, + "loss": 0.8326, + "step": 1950 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004885872297458915, + "loss": 0.7896, + "step": 1960 + }, + { + "epoch": 0.3, + "learning_rate": 0.0004884657915166867, + "loss": 0.8202, + "step": 1970 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004883437258706224, + "loss": 0.8457, + "step": 1980 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004882210331288601, + "loss": 0.8648, + "step": 1990 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004880977136142113, + "loss": 0.7916, + "step": 2000 + }, + { + "epoch": 0.31, + "eval_bleu": 0.102031392794918, + "eval_loss": 0.7973849773406982, + "eval_meteor": 0.1697416518292726, + "eval_rouge1": 0.3407404763360161, + "eval_rouge2": 0.18420618651745257, + "eval_rougeL": 0.2837111287248353, + "eval_rougeLsum": 0.28358533147000164, + "eval_runtime": 1060.7578, + "eval_samples_per_second": 1.374, + "eval_steps_per_second": 0.229, + "step": 2000 + }, + { + "epoch": 0.31, + "learning_rate": 0.00048797376765113667, + "loss": 0.8344, + "step": 2010 + }, + { + "epoch": 0.31, + "learning_rate": 0.0004878491955657448, + "loss": 0.8599, + "step": 2020 + }, + { + "epoch": 0.31, + "learning_rate": 0.000487723997685792, + "loss": 0.8316, + "step": 2030 + }, + { + "epoch": 0.31, + "learning_rate": 0.00048759817434068084, + "loss": 0.7967, + "step": 2040 + }, + { + "epoch": 0.32, + "learning_rate": 0.00048747172586145954, + "loss": 0.8272, + "step": 2050 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004873446525808212, + "loss": 0.8879, + "step": 2060 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004872169548331027, + "loss": 0.8938, + "step": 2070 + }, + { + "epoch": 0.32, + "learning_rate": 0.0004870886329542841, + "loss": 0.9446, + "step": 2080 + }, + { + "epoch": 0.32, + "learning_rate": 0.00048695968728198726, + "loss": 0.8329, + "step": 2090 + }, + { + "epoch": 0.32, + "learning_rate": 0.00048683011815547553, + "loss": 0.8701, + "step": 2100 + }, + { + "epoch": 0.32, + "eval_bleu": 0.10951609270697767, + "eval_loss": 0.7971030473709106, + "eval_meteor": 0.1724449245874015, + "eval_rouge1": 0.34523527799298537, + "eval_rouge2": 0.18728792305872893, + "eval_rougeL": 0.2875415995221111, + "eval_rougeLsum": 0.2878305472355617, + "eval_runtime": 1081.3597, + "eval_samples_per_second": 1.348, + "eval_steps_per_second": 0.225, + "step": 2100 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004866999259156526, + "loss": 0.856, + "step": 2110 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004865691109050615, + "loss": 0.8573, + "step": 2120 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004864376734678839, + "loss": 0.813, + "step": 2130 + }, + { + "epoch": 0.33, + "learning_rate": 0.000486305613949939, + "loss": 0.9175, + "step": 2140 + }, + { + "epoch": 0.33, + "learning_rate": 0.00048617293269868277, + "loss": 0.8486, + "step": 2150 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004860396300632072, + "loss": 0.8752, + "step": 2160 + }, + { + "epoch": 0.33, + "learning_rate": 0.0004859057063942387, + "loss": 0.8291, + "step": 2170 + }, + { + "epoch": 0.34, + "learning_rate": 0.00048577116204413817, + "loss": 0.8263, + "step": 2180 + }, + { + "epoch": 0.34, + "learning_rate": 0.00048563599736689935, + "loss": 0.8634, + "step": 2190 + }, + { + "epoch": 0.34, + "learning_rate": 0.00048550021271814793, + "loss": 0.813, + "step": 2200 + }, + { + "epoch": 0.34, + "eval_bleu": 0.11357786165382955, + "eval_loss": 0.7968371510505676, + "eval_meteor": 0.17812534934300692, + "eval_rouge1": 0.34547989314882344, + "eval_rouge2": 0.18647144671781885, + "eval_rougeL": 0.28578813253772484, + "eval_rougeLsum": 0.28593828002638405, + "eval_runtime": 1224.0946, + "eval_samples_per_second": 1.191, + "eval_steps_per_second": 0.199, + "step": 2200 + }, + { + "epoch": 0.34, + "learning_rate": 0.000485363808455141, + "loss": 0.8431, + "step": 2210 + }, + { + "epoch": 0.34, + "learning_rate": 0.0004852267849367659, + "loss": 0.9313, + "step": 2220 + }, + { + "epoch": 0.34, + "learning_rate": 0.000485089142523539, + "loss": 0.8619, + "step": 2230 + }, + { + "epoch": 0.35, + "learning_rate": 0.00048495088157760535, + "loss": 0.8258, + "step": 2240 + }, + { + "epoch": 0.35, + "learning_rate": 0.00048481200246273715, + "loss": 0.8584, + "step": 2250 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004846725055443333, + "loss": 0.8279, + "step": 2260 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004845323911894178, + "loss": 0.847, + "step": 2270 + }, + { + "epoch": 0.35, + "learning_rate": 0.00048439165976663947, + "loss": 0.837, + "step": 2280 + }, + { + "epoch": 0.35, + "learning_rate": 0.00048425031164627056, + "loss": 0.8753, + "step": 2290 + }, + { + "epoch": 0.35, + "learning_rate": 0.0004841083472002059, + "loss": 0.8525, + "step": 2300 + }, + { + "epoch": 0.35, + "eval_bleu": 0.11550228990610252, + "eval_loss": 0.7897738218307495, + "eval_meteor": 0.17967220096769168, + "eval_rouge1": 0.34880707864872973, + "eval_rouge2": 0.18938272334418443, + "eval_rougeL": 0.28769647914513974, + "eval_rougeLsum": 0.2877285709755628, + "eval_runtime": 1181.2131, + "eval_samples_per_second": 1.234, + "eval_steps_per_second": 0.206, + "step": 2300 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004839657668019619, + "loss": 0.831, + "step": 2310 + }, + { + "epoch": 0.36, + "learning_rate": 0.00048382257082667566, + "loss": 0.8446, + "step": 2320 + }, + { + "epoch": 0.36, + "learning_rate": 0.00048367875965110366, + "loss": 0.8041, + "step": 2330 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004835343336536212, + "loss": 0.8464, + "step": 2340 + }, + { + "epoch": 0.36, + "learning_rate": 0.0004833892932142213, + "loss": 0.8198, + "step": 2350 + }, + { + "epoch": 0.36, + "learning_rate": 0.00048324363871451325, + "loss": 0.9116, + "step": 2360 + }, + { + "epoch": 0.37, + "learning_rate": 0.00048309737053772245, + "loss": 0.8471, + "step": 2370 + }, + { + "epoch": 0.37, + "learning_rate": 0.00048295048906868854, + "loss": 0.8676, + "step": 2380 + }, + { + "epoch": 0.37, + "learning_rate": 0.00048280299469386493, + "loss": 0.8034, + "step": 2390 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004826548878013175, + "loss": 0.8494, + "step": 2400 + }, + { + "epoch": 0.37, + "eval_bleu": 0.11484040536071449, + "eval_loss": 0.7928580641746521, + "eval_meteor": 0.17927183932418322, + "eval_rouge1": 0.353015756939868, + "eval_rouge2": 0.1937422671781578, + "eval_rougeL": 0.29412424979719143, + "eval_rougeLsum": 0.29416263018885086, + "eval_runtime": 1151.8853, + "eval_samples_per_second": 1.266, + "eval_steps_per_second": 0.211, + "step": 2400 + }, + { + "epoch": 0.37, + "learning_rate": 0.00048250616878072383, + "loss": 0.8746, + "step": 2410 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004823568380233721, + "loss": 0.8028, + "step": 2420 + }, + { + "epoch": 0.37, + "learning_rate": 0.0004822068959221598, + "loss": 0.9032, + "step": 2430 + }, + { + "epoch": 0.38, + "learning_rate": 0.000482056342871593, + "loss": 0.8321, + "step": 2440 + }, + { + "epoch": 0.38, + "learning_rate": 0.0004819051792677852, + "loss": 0.854, + "step": 2450 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048175340550845637, + "loss": 0.9002, + "step": 2460 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048160102199293174, + "loss": 0.8416, + "step": 2470 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048144802912214094, + "loss": 0.879, + "step": 2480 + }, + { + "epoch": 0.38, + "learning_rate": 0.00048129442729861663, + "loss": 0.8377, + "step": 2490 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048114021692649404, + "loss": 0.8439, + "step": 2500 + }, + { + "epoch": 0.39, + "eval_bleu": 0.1110213869118398, + "eval_loss": 0.7995119094848633, + "eval_meteor": 0.17509915967969378, + "eval_rouge1": 0.34284195131985784, + "eval_rouge2": 0.1868890431147761, + "eval_rougeL": 0.2857996409683133, + "eval_rougeLsum": 0.28592281635680744, + "eval_runtime": 1147.2498, + "eval_samples_per_second": 1.271, + "eval_steps_per_second": 0.212, + "step": 2500 + }, + { + "epoch": 0.39, + "learning_rate": 0.000480985398411509, + "loss": 0.8648, + "step": 2510 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048082997216099797, + "loss": 0.8771, + "step": 2520 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004806739385838961, + "loss": 0.8275, + "step": 2530 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004805172980907363, + "loss": 0.8215, + "step": 2540 + }, + { + "epoch": 0.39, + "learning_rate": 0.00048036005109364856, + "loss": 0.8678, + "step": 2550 + }, + { + "epoch": 0.39, + "learning_rate": 0.0004802021980063586, + "loss": 0.8408, + "step": 2560 + }, + { + "epoch": 0.4, + "learning_rate": 0.00048004373924418674, + "loss": 0.8536, + "step": 2570 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004798846752240468, + "loss": 0.8302, + "step": 2580 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004797250063644452, + "loss": 0.8429, + "step": 2590 + }, + { + "epoch": 0.4, + "learning_rate": 0.0004795647330854795, + "loss": 0.8562, + "step": 2600 + }, + { + "epoch": 0.4, + "eval_bleu": 0.10131964675585854, + "eval_loss": 0.7919116616249084, + "eval_meteor": 0.1612163387444336, + "eval_rouge1": 0.3392932133690917, + "eval_rouge2": 0.18896904080765833, + "eval_rougeL": 0.2874520136930931, + "eval_rougeLsum": 0.287545512675921, + "eval_runtime": 998.4776, + "eval_samples_per_second": 1.46, + "eval_steps_per_second": 0.243, + "step": 2600 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047940385580883785, + "loss": 0.8855, + "step": 2610 + }, + { + "epoch": 0.4, + "learning_rate": 0.00047924237495779734, + "loss": 0.845, + "step": 2620 + }, + { + "epoch": 0.41, + "learning_rate": 0.00047908029095722305, + "loss": 0.8403, + "step": 2630 + }, + { + "epoch": 0.41, + "learning_rate": 0.00047891760423356724, + "loss": 0.8222, + "step": 2640 + }, + { + "epoch": 0.41, + "learning_rate": 0.00047875431521486757, + "loss": 0.8677, + "step": 2650 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004785904243307468, + "loss": 0.8145, + "step": 2660 + }, + { + "epoch": 0.41, + "learning_rate": 0.0004784259320124109, + "loss": 0.8303, + "step": 2670 + }, + { + "epoch": 0.41, + "learning_rate": 0.00047826083869264847, + "loss": 0.8224, + "step": 2680 + }, + { + "epoch": 0.41, + "learning_rate": 0.00047809514480582916, + "loss": 0.816, + "step": 2690 + }, + { + "epoch": 0.42, + "learning_rate": 0.00047792885078790304, + "loss": 0.7636, + "step": 2700 + }, + { + "epoch": 0.42, + "eval_bleu": 0.10954714255898276, + "eval_loss": 0.7920675277709961, + "eval_meteor": 0.17267822313892012, + "eval_rouge1": 0.34506677355407445, + "eval_rouge2": 0.18817280144902515, + "eval_rougeL": 0.2890166522888482, + "eval_rougeLsum": 0.2890120291145137, + "eval_runtime": 1093.769, + "eval_samples_per_second": 1.333, + "eval_steps_per_second": 0.222, + "step": 2700 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004777619570763988, + "loss": 0.8926, + "step": 2710 + }, + { + "epoch": 0.42, + "learning_rate": 0.00047759446411042335, + "loss": 0.934, + "step": 2720 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004774263723306599, + "loss": 0.8923, + "step": 2730 + }, + { + "epoch": 0.42, + "learning_rate": 0.0004772576821793674, + "loss": 0.8999, + "step": 2740 + }, + { + "epoch": 0.42, + "learning_rate": 0.00047708839410037914, + "loss": 0.8344, + "step": 2750 + }, + { + "epoch": 0.43, + "learning_rate": 0.00047691850853910146, + "loss": 0.8274, + "step": 2760 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004767480259425128, + "loss": 0.7697, + "step": 2770 + }, + { + "epoch": 0.43, + "learning_rate": 0.00047657694675916254, + "loss": 0.8455, + "step": 2780 + }, + { + "epoch": 0.43, + "learning_rate": 0.00047640527143916943, + "loss": 0.8216, + "step": 2790 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004762330004342209, + "loss": 0.8509, + "step": 2800 + }, + { + "epoch": 0.43, + "eval_bleu": 0.11026373004464625, + "eval_loss": 0.7961094379425049, + "eval_meteor": 0.17107804416084108, + "eval_rouge1": 0.34419235974247625, + "eval_rouge2": 0.1879446304753386, + "eval_rougeL": 0.2879382324644244, + "eval_rougeLsum": 0.28804849261741966, + "eval_runtime": 1114.3068, + "eval_samples_per_second": 1.308, + "eval_steps_per_second": 0.218, + "step": 2800 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004760601341975718, + "loss": 0.832, + "step": 2810 + }, + { + "epoch": 0.43, + "learning_rate": 0.0004758866731840426, + "loss": 0.8718, + "step": 2820 + }, + { + "epoch": 0.44, + "learning_rate": 0.00047571261785001913, + "loss": 0.8075, + "step": 2830 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004755379686534507, + "loss": 0.8044, + "step": 2840 + }, + { + "epoch": 0.44, + "learning_rate": 0.00047536272605384905, + "loss": 0.8582, + "step": 2850 + }, + { + "epoch": 0.44, + "learning_rate": 0.00047518689051228734, + "loss": 0.7933, + "step": 2860 + }, + { + "epoch": 0.44, + "learning_rate": 0.00047501046249139885, + "loss": 0.8387, + "step": 2870 + }, + { + "epoch": 0.44, + "learning_rate": 0.0004748334424553754, + "loss": 0.8743, + "step": 2880 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004746558308699667, + "loss": 0.8453, + "step": 2890 + }, + { + "epoch": 0.45, + "learning_rate": 0.00047447762820247876, + "loss": 0.7834, + "step": 2900 + }, + { + "epoch": 0.45, + "eval_bleu": 0.10144013679758371, + "eval_loss": 0.7860347628593445, + "eval_meteor": 0.16863044336346464, + "eval_rouge1": 0.3483945653742756, + "eval_rouge2": 0.1906512981948328, + "eval_rougeL": 0.2928307931237276, + "eval_rougeLsum": 0.292886082360032, + "eval_runtime": 1008.318, + "eval_samples_per_second": 1.446, + "eval_steps_per_second": 0.241, + "step": 2900 + }, + { + "epoch": 0.45, + "learning_rate": 0.00047429883492177284, + "loss": 0.8794, + "step": 2910 + }, + { + "epoch": 0.45, + "learning_rate": 0.00047411945149826397, + "loss": 0.8534, + "step": 2920 + }, + { + "epoch": 0.45, + "learning_rate": 0.00047393947840392015, + "loss": 0.8286, + "step": 2930 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004737589161122605, + "loss": 0.9363, + "step": 2940 + }, + { + "epoch": 0.45, + "learning_rate": 0.0004735777650983547, + "loss": 0.8218, + "step": 2950 + }, + { + "epoch": 0.46, + "learning_rate": 0.00047339602583882105, + "loss": 0.7604, + "step": 2960 + }, + { + "epoch": 0.46, + "learning_rate": 0.00047321369881182584, + "loss": 0.9392, + "step": 2970 + }, + { + "epoch": 0.46, + "learning_rate": 0.0004730307844970817, + "loss": 0.8586, + "step": 2980 + }, + { + "epoch": 0.46, + "learning_rate": 0.00047284728337584637, + "loss": 0.8175, + "step": 2990 + }, + { + "epoch": 0.46, + "learning_rate": 0.00047266319593092167, + "loss": 0.8156, + "step": 3000 + }, + { + "epoch": 0.46, + "eval_bleu": 0.11205014855700891, + "eval_loss": 0.7846682071685791, + "eval_meteor": 0.1751311712892055, + "eval_rouge1": 0.35172070697143076, + "eval_rouge2": 0.1931095449214266, + "eval_rougeL": 0.29234436972165456, + "eval_rougeLsum": 0.29205838030588194, + "eval_runtime": 1104.847, + "eval_samples_per_second": 1.32, + "eval_steps_per_second": 0.22, + "step": 3000 + }, + { + "epoch": 0.46, + "learning_rate": 0.00047247852264665184, + "loss": 0.821, + "step": 3010 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004722932640089228, + "loss": 0.8089, + "step": 3020 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004721074205051603, + "loss": 0.8205, + "step": 3030 + }, + { + "epoch": 0.47, + "learning_rate": 0.000471920992624329, + "loss": 0.8564, + "step": 3040 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004717339808569312, + "loss": 0.8746, + "step": 3050 + }, + { + "epoch": 0.47, + "learning_rate": 0.00047154638569500527, + "loss": 0.7676, + "step": 3060 + }, + { + "epoch": 0.47, + "learning_rate": 0.00047135820763212466, + "loss": 0.7534, + "step": 3070 + }, + { + "epoch": 0.47, + "learning_rate": 0.0004711694471633963, + "loss": 0.8063, + "step": 3080 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004709801047854596, + "loss": 0.8258, + "step": 3090 + }, + { + "epoch": 0.48, + "learning_rate": 0.00047079018099648495, + "loss": 0.8006, + "step": 3100 + }, + { + "epoch": 0.48, + "eval_bleu": 0.10544844410608596, + "eval_loss": 0.779005765914917, + "eval_meteor": 0.16928172352068147, + "eval_rouge1": 0.34521811226055105, + "eval_rouge2": 0.188414735386506, + "eval_rougeL": 0.2883152848683356, + "eval_rougeLsum": 0.28845113954285684, + "eval_runtime": 1093.1651, + "eval_samples_per_second": 1.334, + "eval_steps_per_second": 0.222, + "step": 3100 + }, + { + "epoch": 0.48, + "learning_rate": 0.00047059967629617253, + "loss": 0.8696, + "step": 3110 + }, + { + "epoch": 0.48, + "learning_rate": 0.00047040859118575087, + "loss": 0.8342, + "step": 3120 + }, + { + "epoch": 0.48, + "learning_rate": 0.0004702169261679755, + "loss": 0.8407, + "step": 3130 + }, + { + "epoch": 0.48, + "learning_rate": 0.000470024681747128, + "loss": 0.8277, + "step": 3140 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004698318584290141, + "loss": 0.861, + "step": 3150 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004696384567209628, + "loss": 0.8629, + "step": 3160 + }, + { + "epoch": 0.49, + "learning_rate": 0.00046944447713182473, + "loss": 0.7462, + "step": 3170 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004692499201719712, + "loss": 0.8569, + "step": 3180 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004690547863532924, + "loss": 0.7479, + "step": 3190 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004688590761891963, + "loss": 0.7449, + "step": 3200 + }, + { + "epoch": 0.49, + "eval_bleu": 0.11320699643880565, + "eval_loss": 0.7808765769004822, + "eval_meteor": 0.17517461553603783, + "eval_rouge1": 0.34801418474464885, + "eval_rouge2": 0.1926526264127003, + "eval_rougeL": 0.2935871923613115, + "eval_rougeLsum": 0.29356108231725353, + "eval_runtime": 1147.8505, + "eval_samples_per_second": 1.27, + "eval_steps_per_second": 0.212, + "step": 3200 + }, + { + "epoch": 0.49, + "learning_rate": 0.0004686627901946074, + "loss": 0.8372, + "step": 3210 + }, + { + "epoch": 0.5, + "learning_rate": 0.00046846592888596505, + "loss": 0.8033, + "step": 3220 + }, + { + "epoch": 0.5, + "learning_rate": 0.0004682684927812225, + "loss": 0.8315, + "step": 3230 + }, + { + "epoch": 0.5, + "learning_rate": 0.0004680704823998452, + "loss": 0.7874, + "step": 3240 + }, + { + "epoch": 0.5, + "learning_rate": 0.0004678718982628094, + "loss": 0.8616, + "step": 3250 + }, + { + "epoch": 0.5, + "learning_rate": 0.0004676727408926012, + "loss": 0.8039, + "step": 3260 + }, + { + "epoch": 0.5, + "learning_rate": 0.0004674730108132148, + "loss": 0.8342, + "step": 3270 + }, + { + "epoch": 0.51, + "learning_rate": 0.00046727270855015124, + "loss": 0.8528, + "step": 3280 + }, + { + "epoch": 0.51, + "learning_rate": 0.000467071834630417, + "loss": 0.8195, + "step": 3290 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004668703895825226, + "loss": 0.8123, + "step": 3300 + }, + { + "epoch": 0.51, + "eval_bleu": 0.12139040928235188, + "eval_loss": 0.7802536487579346, + "eval_meteor": 0.18649658941096503, + "eval_rouge1": 0.35447872684757437, + "eval_rouge2": 0.19104999366552095, + "eval_rougeL": 0.29335305776164255, + "eval_rougeLsum": 0.29324750486448675, + "eval_runtime": 1188.3453, + "eval_samples_per_second": 1.227, + "eval_steps_per_second": 0.204, + "step": 3300 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004666683739364812, + "loss": 0.8181, + "step": 3310 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004664657882238074, + "loss": 0.8842, + "step": 3320 + }, + { + "epoch": 0.51, + "learning_rate": 0.00046626263297751546, + "loss": 0.8528, + "step": 3330 + }, + { + "epoch": 0.51, + "learning_rate": 0.0004660589087321183, + "loss": 0.7764, + "step": 3340 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004658546160236257, + "loss": 0.8313, + "step": 3350 + }, + { + "epoch": 0.52, + "learning_rate": 0.00046564975538954334, + "loss": 0.8438, + "step": 3360 + }, + { + "epoch": 0.52, + "learning_rate": 0.00046544432736887097, + "loss": 0.8519, + "step": 3370 + }, + { + "epoch": 0.52, + "learning_rate": 0.00046523833250210135, + "loss": 0.809, + "step": 3380 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004650317713312183, + "loss": 0.8335, + "step": 3390 + }, + { + "epoch": 0.52, + "learning_rate": 0.00046482464439969595, + "loss": 0.8428, + "step": 3400 + }, + { + "epoch": 0.52, + "eval_bleu": 0.1250886172570181, + "eval_loss": 0.7814038991928101, + "eval_meteor": 0.19055488234703907, + "eval_rouge1": 0.3612439239393179, + "eval_rouge2": 0.1958006760878207, + "eval_rougeL": 0.2964134143411231, + "eval_rougeLsum": 0.296116576418878, + "eval_runtime": 1216.6365, + "eval_samples_per_second": 1.198, + "eval_steps_per_second": 0.2, + "step": 3400 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004646169522524969, + "loss": 0.8177, + "step": 3410 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004644086954360708, + "loss": 0.8482, + "step": 3420 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004641998744983529, + "loss": 0.86, + "step": 3430 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004639904899887629, + "loss": 0.8508, + "step": 3440 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004637805424582032, + "loss": 0.9085, + "step": 3450 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004635700324590574, + "loss": 0.8397, + "step": 3460 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004633589605451892, + "loss": 0.8734, + "step": 3470 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046314732727194063, + "loss": 0.7649, + "step": 3480 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046293513319613065, + "loss": 0.8471, + "step": 3490 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046272237887605384, + "loss": 0.832, + "step": 3500 + }, + { + "epoch": 0.54, + "eval_bleu": 0.10486572244619506, + "eval_loss": 0.7807884812355042, + "eval_meteor": 0.16818865401790847, + "eval_rouge1": 0.34705793027937726, + "eval_rouge2": 0.19113964280370677, + "eval_rougeL": 0.29299029802431953, + "eval_rougeLsum": 0.29317886290919454, + "eval_runtime": 1105.6756, + "eval_samples_per_second": 1.319, + "eval_steps_per_second": 0.22, + "step": 3500 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004625090648714786, + "loss": 0.8337, + "step": 3510 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046229519174364607, + "loss": 0.8794, + "step": 3520 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004620807600552686, + "loss": 0.7694, + "step": 3530 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004618657703705277, + "loss": 0.8027, + "step": 3540 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004616502232550734, + "loss": 0.8519, + "step": 3550 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004614341192760224, + "loss": 0.8001, + "step": 3560 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004612174590019562, + "loss": 0.8368, + "step": 3570 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004610002430029201, + "loss": 0.8555, + "step": 3580 + }, + { + "epoch": 0.55, + "learning_rate": 0.00046078247185042177, + "loss": 0.7932, + "step": 3590 + }, + { + "epoch": 0.55, + "learning_rate": 0.00046056414611742903, + "loss": 0.7795, + "step": 3600 + }, + { + "epoch": 0.55, + "eval_bleu": 0.12987137116221253, + "eval_loss": 0.7785532474517822, + "eval_meteor": 0.1977638908170833, + "eval_rouge1": 0.3563200854587399, + "eval_rouge2": 0.19216790450914428, + "eval_rougeL": 0.2927497382733434, + "eval_rougeLsum": 0.2929236969907393, + "eval_runtime": 1319.027, + "eval_samples_per_second": 1.105, + "eval_steps_per_second": 0.184, + "step": 3600 + }, + { + "epoch": 0.56, + "learning_rate": 0.00046034526637836926, + "loss": 0.7853, + "step": 3610 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004601258332091274, + "loss": 0.7442, + "step": 3620 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004599058471870443, + "loss": 0.8214, + "step": 3630 + }, + { + "epoch": 0.56, + "learning_rate": 0.00045968530889091555, + "loss": 0.7751, + "step": 3640 + }, + { + "epoch": 0.56, + "learning_rate": 0.00045946421890098965, + "loss": 0.8645, + "step": 3650 + }, + { + "epoch": 0.56, + "learning_rate": 0.00045924257779896693, + "loss": 0.8341, + "step": 3660 + }, + { + "epoch": 0.57, + "learning_rate": 0.00045902038616799746, + "loss": 0.8099, + "step": 3670 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004587976445926799, + "loss": 0.8532, + "step": 3680 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004585743536590599, + "loss": 0.851, + "step": 3690 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004583505139546281, + "loss": 0.8155, + "step": 3700 + }, + { + "epoch": 0.57, + "eval_bleu": 0.1275949150703291, + "eval_loss": 0.7744527459144592, + "eval_meteor": 0.19542313704697203, + "eval_rouge1": 0.36221871637002456, + "eval_rouge2": 0.19723981570527915, + "eval_rougeL": 0.29817828224087256, + "eval_rougeLsum": 0.29819760162358966, + "eval_runtime": 1204.787, + "eval_samples_per_second": 1.21, + "eval_steps_per_second": 0.202, + "step": 3700 + }, + { + "epoch": 0.57, + "learning_rate": 0.00045812612606831974, + "loss": 0.7528, + "step": 3710 + }, + { + "epoch": 0.57, + "learning_rate": 0.00045790119059051156, + "loss": 0.8188, + "step": 3720 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004576757081130216, + "loss": 0.8529, + "step": 3730 + }, + { + "epoch": 0.58, + "learning_rate": 0.00045744967922910684, + "loss": 0.7864, + "step": 3740 + }, + { + "epoch": 0.58, + "learning_rate": 0.00045722310453346195, + "loss": 0.78, + "step": 3750 + }, + { + "epoch": 0.58, + "learning_rate": 0.00045699598462221766, + "loss": 0.813, + "step": 3760 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004567683200929391, + "loss": 0.8402, + "step": 3770 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004565401115446246, + "loss": 0.8541, + "step": 3780 + }, + { + "epoch": 0.58, + "learning_rate": 0.00045631135957770343, + "loss": 0.7645, + "step": 3790 + }, + { + "epoch": 0.59, + "learning_rate": 0.00045608206479403484, + "loss": 0.8419, + "step": 3800 + }, + { + "epoch": 0.59, + "eval_bleu": 0.11288874484370615, + "eval_loss": 0.7737380862236023, + "eval_meteor": 0.1795823568139638, + "eval_rouge1": 0.3517171303500152, + "eval_rouge2": 0.19480929623517923, + "eval_rougeL": 0.29489583256807006, + "eval_rougeLsum": 0.2948638738211926, + "eval_runtime": 1083.0127, + "eval_samples_per_second": 1.346, + "eval_steps_per_second": 0.224, + "step": 3800 + }, + { + "epoch": 0.59, + "learning_rate": 0.00045585222779690636, + "loss": 0.7908, + "step": 3810 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004556218491910321, + "loss": 0.7799, + "step": 3820 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004553909295825508, + "loss": 0.7822, + "step": 3830 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004551594695790251, + "loss": 0.817, + "step": 3840 + }, + { + "epoch": 0.59, + "learning_rate": 0.0004549274697894392, + "loss": 0.7824, + "step": 3850 + }, + { + "epoch": 0.6, + "learning_rate": 0.00045469493082419757, + "loss": 0.8274, + "step": 3860 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004544618532951231, + "loss": 0.7928, + "step": 3870 + }, + { + "epoch": 0.6, + "learning_rate": 0.00045422823781545596, + "loss": 0.8542, + "step": 3880 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004539940849998516, + "loss": 0.8367, + "step": 3890 + }, + { + "epoch": 0.6, + "learning_rate": 0.00045375939546437916, + "loss": 0.8581, + "step": 3900 + }, + { + "epoch": 0.6, + "eval_bleu": 0.11781251984515774, + "eval_loss": 0.777377724647522, + "eval_meteor": 0.1829209829854384, + "eval_rouge1": 0.35563054870017097, + "eval_rouge2": 0.195963399617126, + "eval_rougeL": 0.2979095627621663, + "eval_rougeLsum": 0.2980344526869577, + "eval_runtime": 1135.666, + "eval_samples_per_second": 1.284, + "eval_steps_per_second": 0.214, + "step": 3900 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004535241698265199, + "loss": 0.8475, + "step": 3910 + }, + { + "epoch": 0.6, + "learning_rate": 0.0004532884087051657, + "loss": 0.8985, + "step": 3920 + }, + { + "epoch": 0.61, + "learning_rate": 0.0004530521127206173, + "loss": 0.8487, + "step": 3930 + }, + { + "epoch": 0.61, + "learning_rate": 0.0004528152824945827, + "loss": 0.7998, + "step": 3940 + }, + { + "epoch": 0.61, + "learning_rate": 0.00045257791865017537, + "loss": 0.7846, + "step": 3950 + }, + { + "epoch": 0.61, + "learning_rate": 0.00045234002181191303, + "loss": 0.7838, + "step": 3960 + }, + { + "epoch": 0.61, + "learning_rate": 0.00045210159260571553, + "loss": 0.8362, + "step": 3970 + }, + { + "epoch": 0.61, + "learning_rate": 0.00045186263165890344, + "loss": 0.8134, + "step": 3980 + }, + { + "epoch": 0.62, + "learning_rate": 0.0004516231396001965, + "loss": 0.7644, + "step": 3990 + }, + { + "epoch": 0.62, + "learning_rate": 0.00045138311705971156, + "loss": 0.8646, + "step": 4000 + }, + { + "epoch": 0.62, + "eval_bleu": 0.13005061015510616, + "eval_loss": 0.7740051746368408, + "eval_meteor": 0.19733944533403236, + "eval_rouge1": 0.3588018183491992, + "eval_rouge2": 0.192617974264134, + "eval_rougeL": 0.2937254663710055, + "eval_rougeLsum": 0.2938043972565847, + "eval_runtime": 1241.046, + "eval_samples_per_second": 1.175, + "eval_steps_per_second": 0.196, + "step": 4000 + }, + { + "epoch": 0.62, + "learning_rate": 0.0004511425646689615, + "loss": 0.7807, + "step": 4010 + }, + { + "epoch": 0.62, + "learning_rate": 0.0004509014830608532, + "loss": 0.8442, + "step": 4020 + }, + { + "epoch": 0.62, + "learning_rate": 0.0004506598728696858, + "loss": 0.8019, + "step": 4030 + }, + { + "epoch": 0.62, + "learning_rate": 0.0004504177347311492, + "loss": 0.7976, + "step": 4040 + }, + { + "epoch": 0.62, + "learning_rate": 0.0004501750692823224, + "loss": 0.9046, + "step": 4050 + }, + { + "epoch": 0.63, + "learning_rate": 0.00044993187716167195, + "loss": 0.7559, + "step": 4060 + }, + { + "epoch": 0.63, + "learning_rate": 0.0004496881590090498, + "loss": 0.8358, + "step": 4070 + }, + { + "epoch": 0.63, + "learning_rate": 0.00044944391546569213, + "loss": 0.791, + "step": 4080 + }, + { + "epoch": 0.63, + "learning_rate": 0.00044919914717421737, + "loss": 0.8007, + "step": 4090 + }, + { + "epoch": 0.63, + "learning_rate": 0.0004489538547786246, + "loss": 0.7515, + "step": 4100 + }, + { + "epoch": 0.63, + "eval_bleu": 0.10412061373178255, + "eval_loss": 0.7685180902481079, + "eval_meteor": 0.1663022168419246, + "eval_rouge1": 0.34510339415285696, + "eval_rouge2": 0.19279130187913826, + "eval_rougeL": 0.2909396669204617, + "eval_rougeLsum": 0.29102359815063095, + "eval_runtime": 1022.5977, + "eval_samples_per_second": 1.426, + "eval_steps_per_second": 0.238, + "step": 4100 + }, + { + "epoch": 0.63, + "learning_rate": 0.00044870803892429193, + "loss": 0.8091, + "step": 4110 + }, + { + "epoch": 0.64, + "learning_rate": 0.0004484617002579745, + "loss": 0.827, + "step": 4120 + }, + { + "epoch": 0.64, + "learning_rate": 0.0004482148394278033, + "loss": 0.8435, + "step": 4130 + }, + { + "epoch": 0.64, + "learning_rate": 0.00044796745708328297, + "loss": 0.7423, + "step": 4140 + }, + { + "epoch": 0.64, + "learning_rate": 0.0004477195538752902, + "loss": 0.8248, + "step": 4150 + }, + { + "epoch": 0.64, + "learning_rate": 0.00044747113045607234, + "loss": 0.8593, + "step": 4160 + }, + { + "epoch": 0.64, + "learning_rate": 0.0004472221874792454, + "loss": 0.8262, + "step": 4170 + }, + { + "epoch": 0.64, + "learning_rate": 0.00044697272559979207, + "loss": 0.7762, + "step": 4180 + }, + { + "epoch": 0.65, + "learning_rate": 0.00044672274547406067, + "loss": 0.8237, + "step": 4190 + }, + { + "epoch": 0.65, + "learning_rate": 0.0004464722477597629, + "loss": 0.8205, + "step": 4200 + }, + { + "epoch": 0.65, + "eval_bleu": 0.1236377868366298, + "eval_loss": 0.769066572189331, + "eval_meteor": 0.1904404203843731, + "eval_rouge1": 0.36051125596648215, + "eval_rouge2": 0.19601074427606005, + "eval_rougeL": 0.2983201969348075, + "eval_rougeLsum": 0.2983845195227759, + "eval_runtime": 1142.7885, + "eval_samples_per_second": 1.276, + "eval_steps_per_second": 0.213, + "step": 4200 + }, + { + "epoch": 0.65, + "learning_rate": 0.0004462212331159724, + "loss": 0.8109, + "step": 4210 + }, + { + "epoch": 0.65, + "learning_rate": 0.0004459697022031225, + "loss": 0.7642, + "step": 4220 + }, + { + "epoch": 0.65, + "learning_rate": 0.0004457176556830054, + "loss": 0.7603, + "step": 4230 + }, + { + "epoch": 0.65, + "learning_rate": 0.0004454650942187695, + "loss": 0.8168, + "step": 4240 + }, + { + "epoch": 0.66, + "learning_rate": 0.0004452120184749181, + "loss": 0.8137, + "step": 4250 + }, + { + "epoch": 0.66, + "learning_rate": 0.00044495842911730773, + "loss": 0.8485, + "step": 4260 + }, + { + "epoch": 0.66, + "learning_rate": 0.0004447043268131462, + "loss": 0.8846, + "step": 4270 + }, + { + "epoch": 0.66, + "learning_rate": 0.0004444497122309909, + "loss": 0.7891, + "step": 4280 + }, + { + "epoch": 0.66, + "learning_rate": 0.0004441945860407471, + "loss": 0.8096, + "step": 4290 + }, + { + "epoch": 0.66, + "learning_rate": 0.000443938948913666, + "loss": 0.7932, + "step": 4300 + }, + { + "epoch": 0.66, + "eval_bleu": 0.11741021582498118, + "eval_loss": 0.7680177688598633, + "eval_meteor": 0.18724966148417066, + "eval_rouge1": 0.3538289045097152, + "eval_rouge2": 0.1926089993689462, + "eval_rougeL": 0.2952244077253912, + "eval_rougeLsum": 0.2950938047080252, + "eval_runtime": 1113.7012, + "eval_samples_per_second": 1.309, + "eval_steps_per_second": 0.218, + "step": 4300 + }, + { + "epoch": 0.66, + "learning_rate": 0.00044368280152234333, + "loss": 0.7672, + "step": 4310 + }, + { + "epoch": 0.67, + "learning_rate": 0.00044342614454071714, + "loss": 0.7621, + "step": 4320 + }, + { + "epoch": 0.67, + "learning_rate": 0.0004431689786440664, + "loss": 0.8101, + "step": 4330 + }, + { + "epoch": 0.67, + "learning_rate": 0.000442911304509009, + "loss": 0.8431, + "step": 4340 + }, + { + "epoch": 0.67, + "learning_rate": 0.0004426531228134999, + "loss": 0.8133, + "step": 4350 + }, + { + "epoch": 0.67, + "learning_rate": 0.0004423944342368297, + "loss": 0.8458, + "step": 4360 + }, + { + "epoch": 0.67, + "learning_rate": 0.0004421352394596225, + "loss": 0.8306, + "step": 4370 + }, + { + "epoch": 0.68, + "learning_rate": 0.00044187553916383445, + "loss": 0.8032, + "step": 4380 + }, + { + "epoch": 0.68, + "learning_rate": 0.00044161533403275135, + "loss": 0.8051, + "step": 4390 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004413546247509875, + "loss": 0.8578, + "step": 4400 + }, + { + "epoch": 0.68, + "eval_bleu": 0.12595726943541374, + "eval_loss": 0.7692683339118958, + "eval_meteor": 0.19222266255963855, + "eval_rouge1": 0.3581310742460724, + "eval_rouge2": 0.19531037225008183, + "eval_rougeL": 0.2956186541319774, + "eval_rougeLsum": 0.2956367500630852, + "eval_runtime": 1160.1163, + "eval_samples_per_second": 1.257, + "eval_steps_per_second": 0.209, + "step": 4400 + }, + { + "epoch": 0.68, + "learning_rate": 0.00044109341200448385, + "loss": 0.7249, + "step": 4410 + }, + { + "epoch": 0.68, + "learning_rate": 0.0004408316964805056, + "loss": 0.8155, + "step": 4420 + }, + { + "epoch": 0.68, + "learning_rate": 0.000440569478867641, + "loss": 0.8433, + "step": 4430 + }, + { + "epoch": 0.68, + "learning_rate": 0.00044030675985579917, + "loss": 0.7484, + "step": 4440 + }, + { + "epoch": 0.69, + "learning_rate": 0.00044004354013620875, + "loss": 0.8086, + "step": 4450 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004397798204014154, + "loss": 0.8796, + "step": 4460 + }, + { + "epoch": 0.69, + "learning_rate": 0.00043951560134528056, + "loss": 0.8485, + "step": 4470 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004392508836629795, + "loss": 0.7362, + "step": 4480 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004389856680509991, + "loss": 0.8347, + "step": 4490 + }, + { + "epoch": 0.69, + "learning_rate": 0.0004387199552071366, + "loss": 0.8119, + "step": 4500 + }, + { + "epoch": 0.69, + "eval_bleu": 0.12373981502065873, + "eval_loss": 0.7634089589118958, + "eval_meteor": 0.18792913443871737, + "eval_rouge1": 0.3586570378567951, + "eval_rouge2": 0.1957026657950927, + "eval_rougeL": 0.29818979034251414, + "eval_rougeLsum": 0.2982401703305406, + "eval_runtime": 1151.9743, + "eval_samples_per_second": 1.266, + "eval_steps_per_second": 0.211, + "step": 4500 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043845374583049735, + "loss": 0.7577, + "step": 4510 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004381870406214932, + "loss": 0.7928, + "step": 4520 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004379198402818403, + "loss": 0.7664, + "step": 4530 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043765214551455794, + "loss": 0.7189, + "step": 4540 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043738395702396594, + "loss": 0.8276, + "step": 4550 + }, + { + "epoch": 0.7, + "learning_rate": 0.0004371152755156833, + "loss": 0.7872, + "step": 4560 + }, + { + "epoch": 0.7, + "learning_rate": 0.00043684610169662607, + "loss": 0.8111, + "step": 4570 + }, + { + "epoch": 0.71, + "learning_rate": 0.00043657643627500575, + "loss": 0.8056, + "step": 4580 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004363062799603271, + "loss": 0.7623, + "step": 4590 + }, + { + "epoch": 0.71, + "learning_rate": 0.00043603563346338644, + "loss": 0.8661, + "step": 4600 + }, + { + "epoch": 0.71, + "eval_bleu": 0.11089234547528978, + "eval_loss": 0.7632281184196472, + "eval_meteor": 0.17341941602705138, + "eval_rouge1": 0.34942191982099435, + "eval_rouge2": 0.19591049653677217, + "eval_rougeL": 0.29526297170998683, + "eval_rougeLsum": 0.2952619744332252, + "eval_runtime": 1071.5418, + "eval_samples_per_second": 1.361, + "eval_steps_per_second": 0.227, + "step": 4600 + }, + { + "epoch": 0.71, + "learning_rate": 0.00043576449749627, + "loss": 0.7433, + "step": 4610 + }, + { + "epoch": 0.71, + "learning_rate": 0.0004354928727723516, + "loss": 0.7855, + "step": 4620 + }, + { + "epoch": 0.71, + "learning_rate": 0.00043522076000629124, + "loss": 0.7527, + "step": 4630 + }, + { + "epoch": 0.72, + "learning_rate": 0.00043494815991403275, + "loss": 0.8015, + "step": 4640 + }, + { + "epoch": 0.72, + "learning_rate": 0.0004346750732128023, + "loss": 0.7345, + "step": 4650 + }, + { + "epoch": 0.72, + "learning_rate": 0.0004344015006211062, + "loss": 0.7952, + "step": 4660 + }, + { + "epoch": 0.72, + "learning_rate": 0.0004341274428587294, + "loss": 0.8057, + "step": 4670 + }, + { + "epoch": 0.72, + "learning_rate": 0.00043385290064673317, + "loss": 0.8136, + "step": 4680 + }, + { + "epoch": 0.72, + "learning_rate": 0.0004335778747074535, + "loss": 0.8069, + "step": 4690 + }, + { + "epoch": 0.72, + "learning_rate": 0.00043330236576449887, + "loss": 0.8397, + "step": 4700 + }, + { + "epoch": 0.72, + "eval_bleu": 0.11830285193176951, + "eval_loss": 0.7616626620292664, + "eval_meteor": 0.1821488344842372, + "eval_rouge1": 0.3558580945132578, + "eval_rouge2": 0.197915707595695, + "eval_rougeL": 0.2981723775850291, + "eval_rougeLsum": 0.29833413809671927, + "eval_runtime": 1132.5137, + "eval_samples_per_second": 1.287, + "eval_steps_per_second": 0.215, + "step": 4700 + }, + { + "epoch": 0.73, + "learning_rate": 0.000433026374542749, + "loss": 0.7386, + "step": 4710 + }, + { + "epoch": 0.73, + "learning_rate": 0.00043274990176835217, + "loss": 0.7961, + "step": 4720 + }, + { + "epoch": 0.73, + "learning_rate": 0.00043247294816872365, + "loss": 0.8104, + "step": 4730 + }, + { + "epoch": 0.73, + "learning_rate": 0.0004321955144725439, + "loss": 0.8091, + "step": 4740 + }, + { + "epoch": 0.73, + "learning_rate": 0.00043191760140975666, + "loss": 0.7693, + "step": 4750 + }, + { + "epoch": 0.73, + "learning_rate": 0.0004316392097115666, + "loss": 0.8092, + "step": 4760 + }, + { + "epoch": 0.74, + "learning_rate": 0.000431360340110438, + "loss": 0.8053, + "step": 4770 + }, + { + "epoch": 0.74, + "learning_rate": 0.00043108099334009234, + "loss": 0.7646, + "step": 4780 + }, + { + "epoch": 0.74, + "learning_rate": 0.0004308011701355066, + "loss": 0.8395, + "step": 4790 + }, + { + "epoch": 0.74, + "learning_rate": 0.0004305208712329114, + "loss": 0.7852, + "step": 4800 + }, + { + "epoch": 0.74, + "eval_bleu": 0.12483561089578614, + "eval_loss": 0.7647390365600586, + "eval_meteor": 0.19032935944350426, + "eval_rouge1": 0.35835454697825203, + "eval_rouge2": 0.19542840978745862, + "eval_rougeL": 0.29801385574610495, + "eval_rougeLsum": 0.29793876372769, + "eval_runtime": 1120.9038, + "eval_samples_per_second": 1.301, + "eval_steps_per_second": 0.217, + "step": 4800 + }, + { + "epoch": 0.74, + "learning_rate": 0.0004302400973697888, + "loss": 0.7485, + "step": 4810 + }, + { + "epoch": 0.74, + "learning_rate": 0.00042995884928487054, + "loss": 0.7812, + "step": 4820 + }, + { + "epoch": 0.74, + "learning_rate": 0.00042967712771813614, + "loss": 0.7857, + "step": 4830 + }, + { + "epoch": 0.75, + "learning_rate": 0.00042939493341081087, + "loss": 0.8019, + "step": 4840 + }, + { + "epoch": 0.75, + "learning_rate": 0.00042911226710536365, + "loss": 0.8257, + "step": 4850 + }, + { + "epoch": 0.75, + "learning_rate": 0.00042882912954550544, + "loss": 0.7601, + "step": 4860 + }, + { + "epoch": 0.75, + "learning_rate": 0.00042854552147618706, + "loss": 0.7856, + "step": 4870 + }, + { + "epoch": 0.75, + "learning_rate": 0.0004282614436435972, + "loss": 0.8138, + "step": 4880 + }, + { + "epoch": 0.75, + "learning_rate": 0.0004279768967951605, + "loss": 0.7765, + "step": 4890 + }, + { + "epoch": 0.76, + "learning_rate": 0.00042769188167953565, + "loss": 0.767, + "step": 4900 + }, + { + "epoch": 0.76, + "eval_bleu": 0.1302305276945029, + "eval_loss": 0.7597366571426392, + "eval_meteor": 0.19752698525972517, + "eval_rouge1": 0.36296173703809864, + "eval_rouge2": 0.19816465507239917, + "eval_rougeL": 0.3000226808734052, + "eval_rougeLsum": 0.3000833989034842, + "eval_runtime": 1189.0731, + "eval_samples_per_second": 1.226, + "eval_steps_per_second": 0.204, + "step": 4900 + }, + { + "epoch": 0.76, + "learning_rate": 0.0004274063990466135, + "loss": 0.8156, + "step": 4910 + }, + { + "epoch": 0.76, + "learning_rate": 0.0004271204496475148, + "loss": 0.7648, + "step": 4920 + }, + { + "epoch": 0.76, + "learning_rate": 0.00042683403423458843, + "loss": 0.7364, + "step": 4930 + }, + { + "epoch": 0.76, + "learning_rate": 0.00042654715356140946, + "loss": 0.8329, + "step": 4940 + }, + { + "epoch": 0.76, + "learning_rate": 0.0004262598083827769, + "loss": 0.8443, + "step": 4950 + }, + { + "epoch": 0.76, + "learning_rate": 0.000425971999454712, + "loss": 0.8809, + "step": 4960 + }, + { + "epoch": 0.77, + "learning_rate": 0.0004256837275344564, + "loss": 0.7959, + "step": 4970 + }, + { + "epoch": 0.77, + "learning_rate": 0.0004253949933804694, + "loss": 0.82, + "step": 4980 + }, + { + "epoch": 0.77, + "learning_rate": 0.00042510579775242684, + "loss": 0.8249, + "step": 4990 + }, + { + "epoch": 0.77, + "learning_rate": 0.00042481614141121873, + "loss": 0.8284, + "step": 5000 + }, + { + "epoch": 0.77, + "eval_bleu": 0.13696974043564947, + "eval_loss": 0.7628008127212524, + "eval_meteor": 0.20833444182082805, + "eval_rouge1": 0.367375191425503, + "eval_rouge2": 0.1978131466130248, + "eval_rougeL": 0.29990090210288556, + "eval_rougeLsum": 0.3001498394981842, + "eval_runtime": 1199.5655, + "eval_samples_per_second": 1.215, + "eval_steps_per_second": 0.203, + "step": 5000 + }, + { + "epoch": 0.77, + "learning_rate": 0.000424526025118947, + "loss": 0.7842, + "step": 5010 + }, + { + "epoch": 0.77, + "learning_rate": 0.00042423544963892393, + "loss": 0.8718, + "step": 5020 + }, + { + "epoch": 0.78, + "learning_rate": 0.0004239444157356699, + "loss": 0.8612, + "step": 5030 + }, + { + "epoch": 0.78, + "learning_rate": 0.00042365292417491135, + "loss": 0.7878, + "step": 5040 + }, + { + "epoch": 0.78, + "learning_rate": 0.000423360975723579, + "loss": 0.8274, + "step": 5050 + }, + { + "epoch": 0.78, + "learning_rate": 0.0004230685711498055, + "loss": 0.8017, + "step": 5060 + }, + { + "epoch": 0.78, + "learning_rate": 0.0004227757112229237, + "loss": 0.8154, + "step": 5070 + }, + { + "epoch": 0.78, + "learning_rate": 0.00042248239671346455, + "loss": 0.7849, + "step": 5080 + }, + { + "epoch": 0.78, + "learning_rate": 0.0004221886283931549, + "loss": 0.8234, + "step": 5090 + }, + { + "epoch": 0.79, + "learning_rate": 0.00042189440703491556, + "loss": 0.7984, + "step": 5100 + }, + { + "epoch": 0.79, + "eval_bleu": 0.11530197031936106, + "eval_loss": 0.755507230758667, + "eval_meteor": 0.1806513827098456, + "eval_rouge1": 0.3555621488323981, + "eval_rouge2": 0.19846639016470374, + "eval_rougeL": 0.29831273382603013, + "eval_rougeLsum": 0.2980805463936066, + "eval_runtime": 1059.4931, + "eval_samples_per_second": 1.376, + "eval_steps_per_second": 0.229, + "step": 5100 + }, + { + "epoch": 0.79, + "learning_rate": 0.0004215997334128595, + "loss": 0.8037, + "step": 5110 + }, + { + "epoch": 0.79, + "learning_rate": 0.0004213046083022896, + "loss": 0.7687, + "step": 5120 + }, + { + "epoch": 0.79, + "learning_rate": 0.00042100903247969647, + "loss": 0.7573, + "step": 5130 + }, + { + "epoch": 0.79, + "learning_rate": 0.00042071300672275676, + "loss": 0.8173, + "step": 5140 + }, + { + "epoch": 0.79, + "learning_rate": 0.0004204165318103307, + "loss": 0.8508, + "step": 5150 + }, + { + "epoch": 0.8, + "learning_rate": 0.00042011960852246044, + "loss": 0.8763, + "step": 5160 + }, + { + "epoch": 0.8, + "learning_rate": 0.0004198222376403678, + "loss": 0.8561, + "step": 5170 + }, + { + "epoch": 0.8, + "learning_rate": 0.00041952441994645224, + "loss": 0.8034, + "step": 5180 + }, + { + "epoch": 0.8, + "learning_rate": 0.00041922615622428885, + "loss": 0.7624, + "step": 5190 + }, + { + "epoch": 0.8, + "learning_rate": 0.000418927447258626, + "loss": 0.8129, + "step": 5200 + }, + { + "epoch": 0.8, + "eval_bleu": 0.1280088834881739, + "eval_loss": 0.7529436945915222, + "eval_meteor": 0.19464046676396524, + "eval_rouge1": 0.3620965211772262, + "eval_rouge2": 0.19866324113960265, + "eval_rougeL": 0.29920122666998356, + "eval_rougeLsum": 0.2993664973235719, + "eval_runtime": 1160.682, + "eval_samples_per_second": 1.256, + "eval_steps_per_second": 0.209, + "step": 5200 + }, + { + "epoch": 0.8, + "learning_rate": 0.00041862829383538397, + "loss": 0.8095, + "step": 5210 + }, + { + "epoch": 0.8, + "learning_rate": 0.00041832869674165204, + "loss": 0.7788, + "step": 5220 + }, + { + "epoch": 0.81, + "learning_rate": 0.00041802865676568695, + "loss": 0.8048, + "step": 5230 + }, + { + "epoch": 0.81, + "learning_rate": 0.0004177281746969107, + "loss": 0.8296, + "step": 5240 + }, + { + "epoch": 0.81, + "learning_rate": 0.00041742725132590854, + "loss": 0.7797, + "step": 5250 + }, + { + "epoch": 0.81, + "learning_rate": 0.0004171258874444266, + "loss": 0.8777, + "step": 5260 + }, + { + "epoch": 0.81, + "learning_rate": 0.0004168240838453702, + "loss": 0.7669, + "step": 5270 + }, + { + "epoch": 0.81, + "learning_rate": 0.00041652184132280146, + "loss": 0.831, + "step": 5280 + }, + { + "epoch": 0.82, + "learning_rate": 0.00041621916067193746, + "loss": 0.7852, + "step": 5290 + }, + { + "epoch": 0.82, + "learning_rate": 0.00041591604268914796, + "loss": 0.7811, + "step": 5300 + }, + { + "epoch": 0.82, + "eval_bleu": 0.12320084852539886, + "eval_loss": 0.7549387216567993, + "eval_meteor": 0.18792325044373648, + "eval_rouge1": 0.35864728570941573, + "eval_rouge2": 0.19706396904795415, + "eval_rougeL": 0.29758291424649863, + "eval_rougeLsum": 0.29778392714680746, + "eval_runtime": 1106.6121, + "eval_samples_per_second": 1.318, + "eval_steps_per_second": 0.22, + "step": 5300 + }, + { + "epoch": 0.82, + "learning_rate": 0.0004156124881719533, + "loss": 0.7769, + "step": 5310 + }, + { + "epoch": 0.82, + "learning_rate": 0.0004153084979190224, + "loss": 0.7397, + "step": 5320 + }, + { + "epoch": 0.82, + "learning_rate": 0.00041500407273017075, + "loss": 0.7779, + "step": 5330 + }, + { + "epoch": 0.82, + "learning_rate": 0.0004146992134063581, + "loss": 0.7955, + "step": 5340 + }, + { + "epoch": 0.82, + "learning_rate": 0.00041439392074968617, + "loss": 0.7659, + "step": 5350 + }, + { + "epoch": 0.83, + "learning_rate": 0.00041408819556339735, + "loss": 0.8533, + "step": 5360 + }, + { + "epoch": 0.83, + "learning_rate": 0.00041378203865187154, + "loss": 0.7967, + "step": 5370 + }, + { + "epoch": 0.83, + "learning_rate": 0.00041347545082062476, + "loss": 0.7941, + "step": 5380 + }, + { + "epoch": 0.83, + "learning_rate": 0.0004131684328763069, + "loss": 0.849, + "step": 5390 + }, + { + "epoch": 0.83, + "learning_rate": 0.00041286098562669926, + "loss": 0.836, + "step": 5400 + }, + { + "epoch": 0.83, + "eval_bleu": 0.12588415215553295, + "eval_loss": 0.75471031665802, + "eval_meteor": 0.1968519512568269, + "eval_rouge1": 0.36628943428680916, + "eval_rouge2": 0.2000519092857415, + "eval_rougeL": 0.30313942317590103, + "eval_rougeLsum": 0.3031091247198662, + "eval_runtime": 1092.5631, + "eval_samples_per_second": 1.334, + "eval_steps_per_second": 0.222, + "step": 5400 + }, + { + "epoch": 0.83, + "learning_rate": 0.00041255310988071284, + "loss": 0.7849, + "step": 5410 + }, + { + "epoch": 0.84, + "learning_rate": 0.00041224480644838586, + "loss": 0.7259, + "step": 5420 + }, + { + "epoch": 0.84, + "learning_rate": 0.000411936076140882, + "loss": 0.8354, + "step": 5430 + }, + { + "epoch": 0.84, + "learning_rate": 0.0004116269197704881, + "loss": 0.7819, + "step": 5440 + }, + { + "epoch": 0.84, + "learning_rate": 0.0004113173381506117, + "loss": 0.8633, + "step": 5450 + }, + { + "epoch": 0.84, + "learning_rate": 0.0004110073320957795, + "loss": 0.8141, + "step": 5460 + }, + { + "epoch": 0.84, + "learning_rate": 0.0004106969024216348, + "loss": 0.7929, + "step": 5470 + }, + { + "epoch": 0.84, + "learning_rate": 0.0004103860499449355, + "loss": 0.7972, + "step": 5480 + }, + { + "epoch": 0.85, + "learning_rate": 0.0004100747754835518, + "loss": 0.8356, + "step": 5490 + }, + { + "epoch": 0.85, + "learning_rate": 0.0004097630798564643, + "loss": 0.8168, + "step": 5500 + }, + { + "epoch": 0.85, + "eval_bleu": 0.12183344025510169, + "eval_loss": 0.7511031627655029, + "eval_meteor": 0.18681450779014622, + "eval_rouge1": 0.35671979001980275, + "eval_rouge2": 0.1960218610645066, + "eval_rougeL": 0.29562632322337584, + "eval_rougeLsum": 0.2957310907035756, + "eval_runtime": 1123.9062, + "eval_samples_per_second": 1.297, + "eval_steps_per_second": 0.216, + "step": 5500 + }, + { + "epoch": 0.85, + "learning_rate": 0.0004094509638837617, + "loss": 0.7949, + "step": 5510 + }, + { + "epoch": 0.85, + "learning_rate": 0.0004091384283866385, + "loss": 0.8108, + "step": 5520 + }, + { + "epoch": 0.85, + "learning_rate": 0.00040882547418739316, + "loss": 0.6972, + "step": 5530 + }, + { + "epoch": 0.85, + "learning_rate": 0.00040851210210942577, + "loss": 0.7515, + "step": 5540 + }, + { + "epoch": 0.86, + "learning_rate": 0.00040819831297723573, + "loss": 0.7821, + "step": 5550 + }, + { + "epoch": 0.86, + "learning_rate": 0.0004078841076164199, + "loss": 0.7728, + "step": 5560 + }, + { + "epoch": 0.86, + "learning_rate": 0.0004075694868536701, + "loss": 0.7493, + "step": 5570 + }, + { + "epoch": 0.86, + "learning_rate": 0.00040725445151677136, + "loss": 0.8138, + "step": 5580 + }, + { + "epoch": 0.86, + "learning_rate": 0.0004069390024345991, + "loss": 0.8215, + "step": 5590 + }, + { + "epoch": 0.86, + "learning_rate": 0.0004066231404371177, + "loss": 0.8057, + "step": 5600 + }, + { + "epoch": 0.86, + "eval_bleu": 0.12330222084393866, + "eval_loss": 0.7514679431915283, + "eval_meteor": 0.19033581377995815, + "eval_rouge1": 0.3581105465101981, + "eval_rouge2": 0.19665944172196212, + "eval_rougeL": 0.2981881930811607, + "eval_rougeLsum": 0.2979884824891669, + "eval_runtime": 1170.8391, + "eval_samples_per_second": 1.245, + "eval_steps_per_second": 0.208, + "step": 5600 + }, + { + "epoch": 0.86, + "learning_rate": 0.00040630686635537773, + "loss": 0.7275, + "step": 5610 + }, + { + "epoch": 0.87, + "learning_rate": 0.000405990181021514, + "loss": 0.7879, + "step": 5620 + }, + { + "epoch": 0.87, + "learning_rate": 0.00040567308526874324, + "loss": 0.7324, + "step": 5630 + }, + { + "epoch": 0.87, + "learning_rate": 0.00040535557993136236, + "loss": 0.7797, + "step": 5640 + }, + { + "epoch": 0.87, + "learning_rate": 0.0004050376658447456, + "loss": 0.7792, + "step": 5650 + }, + { + "epoch": 0.87, + "learning_rate": 0.0004047193438453427, + "loss": 0.7735, + "step": 5660 + }, + { + "epoch": 0.87, + "learning_rate": 0.0004044006147706767, + "loss": 0.7758, + "step": 5670 + }, + { + "epoch": 0.88, + "learning_rate": 0.00040408147945934173, + "loss": 0.8429, + "step": 5680 + }, + { + "epoch": 0.88, + "learning_rate": 0.00040376193875100053, + "loss": 0.7891, + "step": 5690 + }, + { + "epoch": 0.88, + "learning_rate": 0.0004034419934863828, + "loss": 0.8045, + "step": 5700 + }, + { + "epoch": 0.88, + "eval_bleu": 0.12057087168942168, + "eval_loss": 0.7541698217391968, + "eval_meteor": 0.1864159375566591, + "eval_rouge1": 0.3593783361406444, + "eval_rouge2": 0.19683759603742187, + "eval_rougeL": 0.29803496391685336, + "eval_rougeLsum": 0.29788976506923015, + "eval_runtime": 1127.7837, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.215, + "step": 5700 + }, + { + "epoch": 0.88, + "learning_rate": 0.0004031216445072822, + "loss": 0.7893, + "step": 5710 + }, + { + "epoch": 0.88, + "learning_rate": 0.0004028008926565551, + "loss": 0.8821, + "step": 5720 + }, + { + "epoch": 0.88, + "learning_rate": 0.0004024797387781175, + "loss": 0.8032, + "step": 5730 + }, + { + "epoch": 0.88, + "learning_rate": 0.0004021581837169432, + "loss": 0.7978, + "step": 5740 + }, + { + "epoch": 0.89, + "learning_rate": 0.00040183622831906166, + "loss": 0.8345, + "step": 5750 + }, + { + "epoch": 0.89, + "learning_rate": 0.0004015138734315554, + "loss": 0.7948, + "step": 5760 + }, + { + "epoch": 0.89, + "learning_rate": 0.0004011911199025584, + "loss": 0.7712, + "step": 5770 + }, + { + "epoch": 0.89, + "learning_rate": 0.00040086796858125324, + "loss": 0.8137, + "step": 5780 + }, + { + "epoch": 0.89, + "learning_rate": 0.00040054442031786907, + "loss": 0.7523, + "step": 5790 + }, + { + "epoch": 0.89, + "learning_rate": 0.0004002204759636796, + "loss": 0.7927, + "step": 5800 + }, + { + "epoch": 0.89, + "eval_bleu": 0.12228391385106198, + "eval_loss": 0.7471486926078796, + "eval_meteor": 0.19151605381653838, + "eval_rouge1": 0.3620636405755351, + "eval_rouge2": 0.19861702778304668, + "eval_rougeL": 0.30207238821110516, + "eval_rougeLsum": 0.3019676001231871, + "eval_runtime": 1147.7894, + "eval_samples_per_second": 1.27, + "eval_steps_per_second": 0.212, + "step": 5800 + }, + { + "epoch": 0.9, + "learning_rate": 0.00039989613637100055, + "loss": 0.7512, + "step": 5810 + }, + { + "epoch": 0.9, + "learning_rate": 0.00039957140239318744, + "loss": 0.7385, + "step": 5820 + }, + { + "epoch": 0.9, + "learning_rate": 0.00039924627488463374, + "loss": 0.8469, + "step": 5830 + }, + { + "epoch": 0.9, + "learning_rate": 0.00039892075470076795, + "loss": 0.72, + "step": 5840 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003985948426980521, + "loss": 0.797, + "step": 5850 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003982685397339789, + "loss": 0.7778, + "step": 5860 + }, + { + "epoch": 0.9, + "learning_rate": 0.00039794184666706964, + "loss": 0.7285, + "step": 5870 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003976147643568721, + "loss": 0.7779, + "step": 5880 + }, + { + "epoch": 0.91, + "learning_rate": 0.00039728729366395824, + "loss": 0.7841, + "step": 5890 + }, + { + "epoch": 0.91, + "learning_rate": 0.00039695943544992173, + "loss": 0.8402, + "step": 5900 + }, + { + "epoch": 0.91, + "eval_bleu": 0.11653429141819567, + "eval_loss": 0.7500145435333252, + "eval_meteor": 0.18259693460048834, + "eval_rouge1": 0.35693896022311644, + "eval_rouge2": 0.19481212920926488, + "eval_rougeL": 0.2974158389948098, + "eval_rougeLsum": 0.2972789083405306, + "eval_runtime": 1127.326, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.216, + "step": 5900 + }, + { + "epoch": 0.91, + "learning_rate": 0.000396631190577376, + "loss": 0.8434, + "step": 5910 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003963025599099516, + "loss": 0.8225, + "step": 5920 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003959735443122943, + "loss": 0.7828, + "step": 5930 + }, + { + "epoch": 0.92, + "learning_rate": 0.00039564414465006244, + "loss": 0.7987, + "step": 5940 + }, + { + "epoch": 0.92, + "learning_rate": 0.00039531436178992513, + "loss": 0.7857, + "step": 5950 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003949841965995595, + "loss": 0.7992, + "step": 5960 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003946536499476487, + "loss": 0.788, + "step": 5970 + }, + { + "epoch": 0.92, + "learning_rate": 0.00039432272270387955, + "loss": 0.769, + "step": 5980 + }, + { + "epoch": 0.92, + "learning_rate": 0.00039399141573893997, + "loss": 0.8262, + "step": 5990 + }, + { + "epoch": 0.92, + "learning_rate": 0.00039365972992451735, + "loss": 0.7963, + "step": 6000 + }, + { + "epoch": 0.92, + "eval_bleu": 0.12318553450668913, + "eval_loss": 0.7483591437339783, + "eval_meteor": 0.1913410867293855, + "eval_rouge1": 0.3654868855873549, + "eval_rouge2": 0.20050423202844517, + "eval_rougeL": 0.30447787352072553, + "eval_rougeLsum": 0.30429425068099136, + "eval_runtime": 1124.7101, + "eval_samples_per_second": 1.296, + "eval_steps_per_second": 0.216, + "step": 6000 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003933276661332955, + "loss": 0.7798, + "step": 6010 + }, + { + "epoch": 0.93, + "learning_rate": 0.00039299522523895296, + "loss": 0.8611, + "step": 6020 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003926624081161604, + "loss": 0.8131, + "step": 6030 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003923292156405781, + "loss": 0.7202, + "step": 6040 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003919956486888544, + "loss": 0.7797, + "step": 6050 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003916617081386225, + "loss": 0.7561, + "step": 6060 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003913273948684987, + "loss": 0.71, + "step": 6070 + }, + { + "epoch": 0.94, + "learning_rate": 0.00039099270975808, + "loss": 0.7608, + "step": 6080 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003906576536879416, + "loss": 0.8031, + "step": 6090 + }, + { + "epoch": 0.94, + "learning_rate": 0.00039032222753963483, + "loss": 0.8034, + "step": 6100 + }, + { + "epoch": 0.94, + "eval_bleu": 0.11720116971140243, + "eval_loss": 0.7478321194648743, + "eval_meteor": 0.1819934943700329, + "eval_rouge1": 0.35727692353329465, + "eval_rouge2": 0.19816847975598717, + "eval_rougeL": 0.29895230165351805, + "eval_rougeLsum": 0.29907502151518195, + "eval_runtime": 1070.5188, + "eval_samples_per_second": 1.362, + "eval_steps_per_second": 0.227, + "step": 6100 + }, + { + "epoch": 0.94, + "learning_rate": 0.00038998643219568467, + "loss": 0.7886, + "step": 6110 + }, + { + "epoch": 0.94, + "learning_rate": 0.00038965026853958755, + "loss": 0.7854, + "step": 6120 + }, + { + "epoch": 0.95, + "learning_rate": 0.00038931373745580884, + "loss": 0.7956, + "step": 6130 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003889768398297807, + "loss": 0.7957, + "step": 6140 + }, + { + "epoch": 0.95, + "learning_rate": 0.00038863957654789957, + "loss": 0.7563, + "step": 6150 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003883019484975241, + "loss": 0.7558, + "step": 6160 + }, + { + "epoch": 0.95, + "learning_rate": 0.00038796395656697267, + "loss": 0.797, + "step": 6170 + }, + { + "epoch": 0.95, + "learning_rate": 0.00038762560164552095, + "loss": 0.7864, + "step": 6180 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003872868846233997, + "loss": 0.7932, + "step": 6190 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003869478063917924, + "loss": 0.7569, + "step": 6200 + }, + { + "epoch": 0.96, + "eval_bleu": 0.12021270355030027, + "eval_loss": 0.7468777298927307, + "eval_meteor": 0.18865042542151908, + "eval_rouge1": 0.36340810125388445, + "eval_rouge2": 0.20321855929268942, + "eval_rougeL": 0.3042800348780287, + "eval_rougeLsum": 0.3041477067076571, + "eval_runtime": 1060.9151, + "eval_samples_per_second": 1.374, + "eval_steps_per_second": 0.229, + "step": 6200 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003866083678428328, + "loss": 0.7893, + "step": 6210 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003862685698696028, + "loss": 0.7841, + "step": 6220 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003859284133661299, + "loss": 0.7696, + "step": 6230 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003855878992273849, + "loss": 0.7964, + "step": 6240 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003852470283492796, + "loss": 0.7731, + "step": 6250 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003849058016286644, + "loss": 0.7562, + "step": 6260 + }, + { + "epoch": 0.97, + "learning_rate": 0.00038456421996332593, + "loss": 0.7756, + "step": 6270 + }, + { + "epoch": 0.97, + "learning_rate": 0.00038422228425198456, + "loss": 0.7327, + "step": 6280 + }, + { + "epoch": 0.97, + "learning_rate": 0.00038387999539429255, + "loss": 0.7831, + "step": 6290 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003835373542908308, + "loss": 0.7728, + "step": 6300 + }, + { + "epoch": 0.97, + "eval_bleu": 0.13571042313085763, + "eval_loss": 0.7441371083259583, + "eval_meteor": 0.20429787752537404, + "eval_rouge1": 0.36910530156190763, + "eval_rouge2": 0.20076171169403834, + "eval_rougeL": 0.3028160316079058, + "eval_rougeLsum": 0.3028887886618019, + "eval_runtime": 1240.9106, + "eval_samples_per_second": 1.175, + "eval_steps_per_second": 0.196, + "step": 6300 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003831943618431074, + "loss": 0.8109, + "step": 6310 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003828510189535548, + "loss": 0.7687, + "step": 6320 + }, + { + "epoch": 0.98, + "learning_rate": 0.00038250732652552713, + "loss": 0.7796, + "step": 6330 + }, + { + "epoch": 0.98, + "learning_rate": 0.00038216328546329854, + "loss": 0.7713, + "step": 6340 + }, + { + "epoch": 0.98, + "learning_rate": 0.00038181889667206036, + "loss": 0.8039, + "step": 6350 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003814741610579189, + "loss": 0.7761, + "step": 6360 + }, + { + "epoch": 0.98, + "learning_rate": 0.00038112907952789264, + "loss": 0.7536, + "step": 6370 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003807836529899106, + "loss": 0.7478, + "step": 6380 + }, + { + "epoch": 0.99, + "learning_rate": 0.00038043788235280927, + "loss": 0.7639, + "step": 6390 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003800917685263307, + "loss": 0.7624, + "step": 6400 + }, + { + "epoch": 0.99, + "eval_bleu": 0.13360665201533722, + "eval_loss": 0.743972659111023, + "eval_meteor": 0.19919552001100382, + "eval_rouge1": 0.3659102912435709, + "eval_rouge2": 0.19789641111146775, + "eval_rougeL": 0.3016512273674288, + "eval_rougeLsum": 0.3015437367125981, + "eval_runtime": 1272.3138, + "eval_samples_per_second": 1.146, + "eval_steps_per_second": 0.191, + "step": 6400 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003797453124211196, + "loss": 0.7455, + "step": 6410 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003793985149487215, + "loss": 0.7817, + "step": 6420 + }, + { + "epoch": 0.99, + "learning_rate": 0.00037905137702158, + "loss": 0.7936, + "step": 6430 + }, + { + "epoch": 0.99, + "learning_rate": 0.00037870389955303426, + "loss": 0.7884, + "step": 6440 + }, + { + "epoch": 0.99, + "learning_rate": 0.00037835608345731717, + "loss": 0.7477, + "step": 6450 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003780079296495523, + "loss": 0.7333, + "step": 6460 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003776594390457517, + "loss": 0.7712, + "step": 6470 + }, + { + "epoch": 1.0, + "learning_rate": 0.00037731061256281395, + "loss": 0.8028, + "step": 6480 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003769614511185209, + "loss": 0.836, + "step": 6490 + }, + { + "epoch": 1.0, + "learning_rate": 0.00037661195563153577, + "loss": 0.7102, + "step": 6500 + }, + { + "epoch": 1.0, + "eval_bleu": 0.13683765315402233, + "eval_loss": 0.7432180643081665, + "eval_meteor": 0.20768677295384516, + "eval_rouge1": 0.3735959078332925, + "eval_rouge2": 0.20419374346780084, + "eval_rougeL": 0.30712118478863093, + "eval_rougeLsum": 0.30707788341285575, + "eval_runtime": 1270.0874, + "eval_samples_per_second": 1.148, + "eval_steps_per_second": 0.191, + "step": 6500 + }, + { + "epoch": 1.0, + "learning_rate": 0.000376262127021401, + "loss": 0.7216, + "step": 6510 + }, + { + "epoch": 1.01, + "learning_rate": 0.00037591196620853515, + "loss": 0.7167, + "step": 6520 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003755614741142309, + "loss": 0.7174, + "step": 6530 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003752106516606526, + "loss": 0.7206, + "step": 6540 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003748594997708339, + "loss": 0.7271, + "step": 6550 + }, + { + "epoch": 1.01, + "learning_rate": 0.00037450801936867497, + "loss": 0.7166, + "step": 6560 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003741562113789405, + "loss": 0.6894, + "step": 6570 + }, + { + "epoch": 1.01, + "learning_rate": 0.000373804076727257, + "loss": 0.7399, + "step": 6580 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003734516163401105, + "loss": 0.7341, + "step": 6590 + }, + { + "epoch": 1.02, + "learning_rate": 0.00037309883114484407, + "loss": 0.6979, + "step": 6600 + }, + { + "epoch": 1.02, + "eval_bleu": 0.11959061229637678, + "eval_loss": 0.7399081587791443, + "eval_meteor": 0.18578293382867828, + "eval_rouge1": 0.35998311194622934, + "eval_rouge2": 0.2008245839204704, + "eval_rougeL": 0.30212159744533995, + "eval_rougeLsum": 0.30208186381396035, + "eval_runtime": 1117.6606, + "eval_samples_per_second": 1.305, + "eval_steps_per_second": 0.217, + "step": 6600 + }, + { + "epoch": 1.02, + "learning_rate": 0.00037274572206965516, + "loss": 0.695, + "step": 6610 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003723922900435937, + "loss": 0.7373, + "step": 6620 + }, + { + "epoch": 1.02, + "learning_rate": 0.00037203853599655914, + "loss": 0.7002, + "step": 6630 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003716844608592981, + "loss": 0.7566, + "step": 6640 + }, + { + "epoch": 1.03, + "learning_rate": 0.00037133006556340216, + "loss": 0.7111, + "step": 6650 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003709753510413052, + "loss": 0.745, + "step": 6660 + }, + { + "epoch": 1.03, + "learning_rate": 0.00037062031822628094, + "loss": 0.6765, + "step": 6670 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003702649680524408, + "loss": 0.7619, + "step": 6680 + }, + { + "epoch": 1.03, + "learning_rate": 0.00036990930145473083, + "loss": 0.6821, + "step": 6690 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003695533193689298, + "loss": 0.7149, + "step": 6700 + }, + { + "epoch": 1.03, + "eval_bleu": 0.12635236721625973, + "eval_loss": 0.739450216293335, + "eval_meteor": 0.19553725175716402, + "eval_rouge1": 0.365661266915583, + "eval_rouge2": 0.20178360342416046, + "eval_rougeL": 0.3026326239453274, + "eval_rougeLsum": 0.30259399461990677, + "eval_runtime": 1155.7274, + "eval_samples_per_second": 1.262, + "eval_steps_per_second": 0.21, + "step": 6700 + }, + { + "epoch": 1.03, + "learning_rate": 0.00036919702273164657, + "loss": 0.7377, + "step": 6710 + }, + { + "epoch": 1.04, + "learning_rate": 0.00036884041248031753, + "loss": 0.7444, + "step": 6720 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003684834895532042, + "loss": 0.7286, + "step": 6730 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003681262548893909, + "loss": 0.7449, + "step": 6740 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003677687094287819, + "loss": 0.6915, + "step": 6750 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003674108541120995, + "loss": 0.7031, + "step": 6760 + }, + { + "epoch": 1.04, + "learning_rate": 0.00036705268988088103, + "loss": 0.7142, + "step": 6770 + }, + { + "epoch": 1.05, + "learning_rate": 0.00036669421767747656, + "loss": 0.7086, + "step": 6780 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003663354384450467, + "loss": 0.7481, + "step": 6790 + }, + { + "epoch": 1.05, + "learning_rate": 0.00036597635312755954, + "loss": 0.6722, + "step": 6800 + }, + { + "epoch": 1.05, + "eval_bleu": 0.11414956111209436, + "eval_loss": 0.7422959804534912, + "eval_meteor": 0.18158717314624995, + "eval_rouge1": 0.35938872641078123, + "eval_rouge2": 0.20238339161949742, + "eval_rougeL": 0.3036388613445834, + "eval_rougeLsum": 0.30368724785496093, + "eval_runtime": 1059.4123, + "eval_samples_per_second": 1.376, + "eval_steps_per_second": 0.229, + "step": 6800 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003656169626697889, + "loss": 0.6965, + "step": 6810 + }, + { + "epoch": 1.05, + "learning_rate": 0.000365257268017311, + "loss": 0.7239, + "step": 6820 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003648972701165027, + "loss": 0.7147, + "step": 6830 + }, + { + "epoch": 1.05, + "learning_rate": 0.00036453696991453865, + "loss": 0.6588, + "step": 6840 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003641763683593889, + "loss": 0.6452, + "step": 6850 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003638154663998163, + "loss": 0.7578, + "step": 6860 + }, + { + "epoch": 1.06, + "learning_rate": 0.00036345426498537417, + "loss": 0.6807, + "step": 6870 + }, + { + "epoch": 1.06, + "learning_rate": 0.00036309276506640365, + "loss": 0.7922, + "step": 6880 + }, + { + "epoch": 1.06, + "learning_rate": 0.00036273096759403123, + "loss": 0.6959, + "step": 6890 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003623688735201664, + "loss": 0.7319, + "step": 6900 + }, + { + "epoch": 1.06, + "eval_bleu": 0.13311801380318097, + "eval_loss": 0.739512026309967, + "eval_meteor": 0.20301601278830728, + "eval_rouge1": 0.3697671294885042, + "eval_rouge2": 0.2040980609334162, + "eval_rougeL": 0.30591621894549137, + "eval_rougeLsum": 0.30575850009870087, + "eval_runtime": 1168.6213, + "eval_samples_per_second": 1.248, + "eval_steps_per_second": 0.208, + "step": 6900 + }, + { + "epoch": 1.07, + "learning_rate": 0.00036200648379749903, + "loss": 0.7169, + "step": 6910 + }, + { + "epoch": 1.07, + "learning_rate": 0.00036164379937949666, + "loss": 0.7035, + "step": 6920 + }, + { + "epoch": 1.07, + "learning_rate": 0.00036128082122040224, + "loss": 0.6929, + "step": 6930 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003609175502752319, + "loss": 0.7502, + "step": 6940 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003605539874997716, + "loss": 0.729, + "step": 6950 + }, + { + "epoch": 1.07, + "learning_rate": 0.00036019013385057557, + "loss": 0.6907, + "step": 6960 + }, + { + "epoch": 1.07, + "learning_rate": 0.00035982599028496306, + "loss": 0.6899, + "step": 6970 + }, + { + "epoch": 1.08, + "learning_rate": 0.00035946155776101613, + "loss": 0.7194, + "step": 6980 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003590968372375774, + "loss": 0.6805, + "step": 6990 + }, + { + "epoch": 1.08, + "learning_rate": 0.00035873182967424667, + "loss": 0.6992, + "step": 7000 + }, + { + "epoch": 1.08, + "eval_bleu": 0.11900569290924122, + "eval_loss": 0.7383832335472107, + "eval_meteor": 0.18448493712506533, + "eval_rouge1": 0.35725738552943453, + "eval_rouge2": 0.19755022515559825, + "eval_rougeL": 0.2990729972948073, + "eval_rougeLsum": 0.2989527020663407, + "eval_runtime": 1155.0098, + "eval_samples_per_second": 1.262, + "eval_steps_per_second": 0.21, + "step": 7000 + }, + { + "epoch": 1.08, + "learning_rate": 0.00035836653603137954, + "loss": 0.6816, + "step": 7010 + }, + { + "epoch": 1.08, + "learning_rate": 0.000358000957270084, + "loss": 0.707, + "step": 7020 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003576350943522182, + "loss": 0.6911, + "step": 7030 + }, + { + "epoch": 1.09, + "learning_rate": 0.000357268948240388, + "loss": 0.6851, + "step": 7040 + }, + { + "epoch": 1.09, + "learning_rate": 0.00035690251989794444, + "loss": 0.742, + "step": 7050 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003565358102889809, + "loss": 0.7222, + "step": 7060 + }, + { + "epoch": 1.09, + "learning_rate": 0.00035616882037833083, + "loss": 0.6707, + "step": 7070 + }, + { + "epoch": 1.09, + "learning_rate": 0.00035580155113156545, + "loss": 0.717, + "step": 7080 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003554340035149906, + "loss": 0.6809, + "step": 7090 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003550661784956447, + "loss": 0.699, + "step": 7100 + }, + { + "epoch": 1.09, + "eval_bleu": 0.13124721878731666, + "eval_loss": 0.7341772317886353, + "eval_meteor": 0.20086043758202302, + "eval_rouge1": 0.3665359469102716, + "eval_rouge2": 0.20045162880972417, + "eval_rougeL": 0.30219876970116155, + "eval_rougeLsum": 0.30216060698118885, + "eval_runtime": 1217.1151, + "eval_samples_per_second": 1.198, + "eval_steps_per_second": 0.2, + "step": 7100 + }, + { + "epoch": 1.1, + "learning_rate": 0.00035469807704129595, + "loss": 0.7358, + "step": 7110 + }, + { + "epoch": 1.1, + "learning_rate": 0.00035432970012044005, + "loss": 0.7044, + "step": 7120 + }, + { + "epoch": 1.1, + "learning_rate": 0.00035396104870229705, + "loss": 0.7466, + "step": 7130 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003535921237568097, + "loss": 0.7178, + "step": 7140 + }, + { + "epoch": 1.1, + "learning_rate": 0.00035322292625464014, + "loss": 0.7379, + "step": 7150 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003528534571671677, + "loss": 0.6904, + "step": 7160 + }, + { + "epoch": 1.11, + "learning_rate": 0.00035248371746648624, + "loss": 0.7317, + "step": 7170 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003521137081254016, + "loss": 0.7052, + "step": 7180 + }, + { + "epoch": 1.11, + "learning_rate": 0.00035174343011742915, + "loss": 0.756, + "step": 7190 + }, + { + "epoch": 1.11, + "learning_rate": 0.000351372884416791, + "loss": 0.7159, + "step": 7200 + }, + { + "epoch": 1.11, + "eval_bleu": 0.13246869374366876, + "eval_loss": 0.7347835898399353, + "eval_meteor": 0.20244538384279492, + "eval_rouge1": 0.37153124241895075, + "eval_rouge2": 0.20420649618044395, + "eval_rougeL": 0.3075002644877919, + "eval_rougeLsum": 0.3076795818578708, + "eval_runtime": 1225.5167, + "eval_samples_per_second": 1.19, + "eval_steps_per_second": 0.198, + "step": 7200 + }, + { + "epoch": 1.11, + "learning_rate": 0.00035100207199841374, + "loss": 0.6935, + "step": 7210 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003506309938379255, + "loss": 0.7689, + "step": 7220 + }, + { + "epoch": 1.11, + "learning_rate": 0.00035025965091165385, + "loss": 0.7423, + "step": 7230 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003498880441966228, + "loss": 0.6649, + "step": 7240 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003495161746705503, + "loss": 0.7144, + "step": 7250 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003491440433118462, + "loss": 0.6854, + "step": 7260 + }, + { + "epoch": 1.12, + "learning_rate": 0.00034877165109960863, + "loss": 0.739, + "step": 7270 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003483989990136226, + "loss": 0.6962, + "step": 7280 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003480260880343565, + "loss": 0.7414, + "step": 7290 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003476529191429601, + "loss": 0.7418, + "step": 7300 + }, + { + "epoch": 1.13, + "eval_bleu": 0.12629642603617014, + "eval_loss": 0.7300452589988708, + "eval_meteor": 0.19607433639658048, + "eval_rouge1": 0.36940732143709704, + "eval_rouge2": 0.20324909716054756, + "eval_rougeL": 0.30713128476276175, + "eval_rougeLsum": 0.3072824822901492, + "eval_runtime": 1148.1498, + "eval_samples_per_second": 1.27, + "eval_steps_per_second": 0.212, + "step": 7300 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003472794933212616, + "loss": 0.7181, + "step": 7310 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003469058115517652, + "loss": 0.7546, + "step": 7320 + }, + { + "epoch": 1.13, + "learning_rate": 0.00034653187481764873, + "loss": 0.736, + "step": 7330 + }, + { + "epoch": 1.13, + "learning_rate": 0.00034615768410276065, + "loss": 0.744, + "step": 7340 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003457832403916177, + "loss": 0.7563, + "step": 7350 + }, + { + "epoch": 1.13, + "learning_rate": 0.00034540854466940215, + "loss": 0.6738, + "step": 7360 + }, + { + "epoch": 1.14, + "learning_rate": 0.0003450335979219595, + "loss": 0.7102, + "step": 7370 + }, + { + "epoch": 1.14, + "learning_rate": 0.0003446584011357957, + "loss": 0.7419, + "step": 7380 + }, + { + "epoch": 1.14, + "learning_rate": 0.0003442829552980746, + "loss": 0.7521, + "step": 7390 + }, + { + "epoch": 1.14, + "learning_rate": 0.000343907261396615, + "loss": 0.6713, + "step": 7400 + }, + { + "epoch": 1.14, + "eval_bleu": 0.12662344706646492, + "eval_loss": 0.7302644848823547, + "eval_meteor": 0.19680753344212212, + "eval_rouge1": 0.370672782958349, + "eval_rouge2": 0.2051175852415017, + "eval_rougeL": 0.30716206152120107, + "eval_rougeLsum": 0.30713312439209517, + "eval_runtime": 1127.3029, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.216, + "step": 7400 + }, + { + "epoch": 1.14, + "learning_rate": 0.00034353132041988876, + "loss": 0.7622, + "step": 7410 + }, + { + "epoch": 1.14, + "learning_rate": 0.00034315513335701764, + "loss": 0.6964, + "step": 7420 + }, + { + "epoch": 1.15, + "learning_rate": 0.0003427787011977709, + "loss": 0.7532, + "step": 7430 + }, + { + "epoch": 1.15, + "learning_rate": 0.00034240202493256264, + "loss": 0.6931, + "step": 7440 + }, + { + "epoch": 1.15, + "learning_rate": 0.0003420251055524491, + "loss": 0.7325, + "step": 7450 + }, + { + "epoch": 1.15, + "learning_rate": 0.0003416479440491264, + "loss": 0.6884, + "step": 7460 + }, + { + "epoch": 1.15, + "learning_rate": 0.00034127054141492756, + "loss": 0.7377, + "step": 7470 + }, + { + "epoch": 1.15, + "learning_rate": 0.0003408928986428202, + "loss": 0.7091, + "step": 7480 + }, + { + "epoch": 1.15, + "learning_rate": 0.0003405150167264034, + "loss": 0.7379, + "step": 7490 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003401368966599057, + "loss": 0.704, + "step": 7500 + }, + { + "epoch": 1.16, + "eval_bleu": 0.1258064652767695, + "eval_loss": 0.7285297513008118, + "eval_meteor": 0.19691865175723794, + "eval_rouge1": 0.36778390805748723, + "eval_rouge2": 0.20311221027278986, + "eval_rougeL": 0.3054394126025268, + "eval_rougeLsum": 0.3054229081295555, + "eval_runtime": 1127.1968, + "eval_samples_per_second": 1.293, + "eval_steps_per_second": 0.216, + "step": 7500 + }, + { + "epoch": 1.16, + "learning_rate": 0.00033975853943818223, + "loss": 0.7004, + "step": 7510 + }, + { + "epoch": 1.16, + "learning_rate": 0.00033937994605671214, + "loss": 0.7505, + "step": 7520 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003390011175115956, + "loss": 0.7212, + "step": 7530 + }, + { + "epoch": 1.16, + "learning_rate": 0.0003386220547995519, + "loss": 0.7163, + "step": 7540 + }, + { + "epoch": 1.16, + "learning_rate": 0.00033824275891791624, + "loss": 0.7683, + "step": 7550 + }, + { + "epoch": 1.17, + "learning_rate": 0.00033786323086463734, + "loss": 0.6846, + "step": 7560 + }, + { + "epoch": 1.17, + "learning_rate": 0.0003374834716382748, + "loss": 0.7276, + "step": 7570 + }, + { + "epoch": 1.17, + "learning_rate": 0.00033710348223799634, + "loss": 0.7359, + "step": 7580 + }, + { + "epoch": 1.17, + "learning_rate": 0.00033672326366357544, + "loss": 0.7125, + "step": 7590 + }, + { + "epoch": 1.17, + "learning_rate": 0.00033634281691538847, + "loss": 0.7155, + "step": 7600 + }, + { + "epoch": 1.17, + "eval_bleu": 0.12742902408441137, + "eval_loss": 0.7300394773483276, + "eval_meteor": 0.19972564219243125, + "eval_rouge1": 0.36676091746300093, + "eval_rouge2": 0.20014907900892553, + "eval_rougeL": 0.302702557140773, + "eval_rougeLsum": 0.30281701017902063, + "eval_runtime": 1175.8896, + "eval_samples_per_second": 1.24, + "eval_steps_per_second": 0.207, + "step": 7600 + }, + { + "epoch": 1.17, + "learning_rate": 0.00033596214299441213, + "loss": 0.6816, + "step": 7610 + }, + { + "epoch": 1.17, + "learning_rate": 0.0003355812429022208, + "loss": 0.656, + "step": 7620 + }, + { + "epoch": 1.18, + "learning_rate": 0.000335200117640984, + "loss": 0.7309, + "step": 7630 + }, + { + "epoch": 1.18, + "learning_rate": 0.00033481876821346367, + "loss": 0.7137, + "step": 7640 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003344371956230114, + "loss": 0.7229, + "step": 7650 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003340554008735663, + "loss": 0.7312, + "step": 7660 + }, + { + "epoch": 1.18, + "learning_rate": 0.0003336733849696516, + "loss": 0.6824, + "step": 7670 + }, + { + "epoch": 1.18, + "learning_rate": 0.00033329114891637244, + "loss": 0.7157, + "step": 7680 + }, + { + "epoch": 1.19, + "learning_rate": 0.00033290869371941343, + "loss": 0.7378, + "step": 7690 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003325260203850357, + "loss": 0.7284, + "step": 7700 + }, + { + "epoch": 1.19, + "eval_bleu": 0.1279752717045045, + "eval_loss": 0.7327857613563538, + "eval_meteor": 0.1978933380981099, + "eval_rouge1": 0.3660675412873057, + "eval_rouge2": 0.2007737061001636, + "eval_rougeL": 0.3036859494669802, + "eval_rougeLsum": 0.3035956514223758, + "eval_runtime": 1203.7754, + "eval_samples_per_second": 1.211, + "eval_steps_per_second": 0.202, + "step": 7700 + }, + { + "epoch": 1.19, + "learning_rate": 0.000332143129920074, + "loss": 0.7286, + "step": 7710 + }, + { + "epoch": 1.19, + "learning_rate": 0.00033176002333193475, + "loss": 0.7142, + "step": 7720 + }, + { + "epoch": 1.19, + "learning_rate": 0.0003313767016285929, + "loss": 0.7226, + "step": 7730 + }, + { + "epoch": 1.19, + "learning_rate": 0.00033099316581858924, + "loss": 0.6984, + "step": 7740 + }, + { + "epoch": 1.19, + "learning_rate": 0.000330609416911028, + "loss": 0.7486, + "step": 7750 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003302254559155741, + "loss": 0.6951, + "step": 7760 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003298412838424503, + "loss": 0.6734, + "step": 7770 + }, + { + "epoch": 1.2, + "learning_rate": 0.00032945690170243494, + "loss": 0.7295, + "step": 7780 + }, + { + "epoch": 1.2, + "learning_rate": 0.000329072310506859, + "loss": 0.686, + "step": 7790 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003286875112676035, + "loss": 0.6969, + "step": 7800 + }, + { + "epoch": 1.2, + "eval_bleu": 0.12667328365779612, + "eval_loss": 0.730004072189331, + "eval_meteor": 0.19859259473423635, + "eval_rouge1": 0.36608475297722565, + "eval_rouge2": 0.20181171212849097, + "eval_rougeL": 0.3048158401257285, + "eval_rougeLsum": 0.30473273024993836, + "eval_runtime": 1121.2619, + "eval_samples_per_second": 1.3, + "eval_steps_per_second": 0.217, + "step": 7800 + }, + { + "epoch": 1.2, + "learning_rate": 0.0003283025049970967, + "loss": 0.7053, + "step": 7810 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003279172927083117, + "loss": 0.7112, + "step": 7820 + }, + { + "epoch": 1.21, + "learning_rate": 0.00032753187541476357, + "loss": 0.7294, + "step": 7830 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003271462541305069, + "loss": 0.7703, + "step": 7840 + }, + { + "epoch": 1.21, + "learning_rate": 0.00032676042987013287, + "loss": 0.7219, + "step": 7850 + }, + { + "epoch": 1.21, + "learning_rate": 0.0003263744036487667, + "loss": 0.7527, + "step": 7860 + }, + { + "epoch": 1.21, + "learning_rate": 0.000325988176482065, + "loss": 0.7469, + "step": 7870 + }, + { + "epoch": 1.21, + "learning_rate": 0.00032560174938621326, + "loss": 0.7235, + "step": 7880 + }, + { + "epoch": 1.22, + "learning_rate": 0.00032521512337792247, + "loss": 0.7821, + "step": 7890 + }, + { + "epoch": 1.22, + "learning_rate": 0.0003248282994744276, + "loss": 0.7279, + "step": 7900 + }, + { + "epoch": 1.22, + "eval_bleu": 0.13693040140777551, + "eval_loss": 0.728911280632019, + "eval_meteor": 0.20888724574067633, + "eval_rouge1": 0.3680729526895363, + "eval_rouge2": 0.2008207536043628, + "eval_rougeL": 0.3017495392967735, + "eval_rougeLsum": 0.30193972403551483, + "eval_runtime": 1263.8659, + "eval_samples_per_second": 1.154, + "eval_steps_per_second": 0.192, + "step": 7900 + } + ], + "max_steps": 19458, + "num_train_epochs": 3, + "total_flos": 2.3091196043722752e+17, + "trial_name": null, + "trial_params": null +}