diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,14343 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.999985976918007, + "global_step": 106965, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.9985976721357454e-05, + "loss": 3.2989, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 2.9971953442714907e-05, + "loss": 2.6398, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 2.995793016407236e-05, + "loss": 2.3782, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 2.9943906885429814e-05, + "loss": 2.2384, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 2.9929883606787268e-05, + "loss": 2.1279, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 2.991586032814472e-05, + "loss": 1.9667, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 2.9901837049502174e-05, + "loss": 1.9143, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 2.9887813770859628e-05, + "loss": 1.8277, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 2.987379049221708e-05, + "loss": 1.6988, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 2.9859767213574534e-05, + "loss": 1.637, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 2.9845743934931988e-05, + "loss": 1.5378, + "step": 550 + }, + { + "epoch": 0.02, + "learning_rate": 2.983172065628944e-05, + "loss": 1.4824, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 2.9817697377646894e-05, + "loss": 1.4092, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 2.9803674099004348e-05, + "loss": 1.294, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 2.97896508203618e-05, + "loss": 1.2394, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 2.9775627541719254e-05, + "loss": 1.1931, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 2.9761604263076708e-05, + "loss": 1.1169, + "step": 850 + }, + { + "epoch": 0.03, + "learning_rate": 2.974758098443416e-05, + "loss": 1.0487, + "step": 900 + }, + { + "epoch": 0.03, + "learning_rate": 2.9733557705791615e-05, + "loss": 0.9585, + "step": 950 + }, + { + "epoch": 0.03, + "learning_rate": 2.9719534427149068e-05, + "loss": 0.9143, + "step": 1000 + }, + { + "epoch": 0.03, + "eval_bleu": 79.3692, + "eval_gen_len": 61.919, + "eval_loss": 0.5720869302749634, + "eval_rouge1": 87.2325, + "eval_rouge2": 78.8924, + "eval_rougeL": 85.959, + "eval_rougeLsum": 86.2188, + "eval_runtime": 3277.2108, + "eval_samples_per_second": 0.915, + "eval_steps_per_second": 0.229, + "step": 1000 + }, + { + "epoch": 0.03, + "learning_rate": 2.970551114850652e-05, + "loss": 0.8543, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 2.9691487869863975e-05, + "loss": 0.813, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 2.9677464591221428e-05, + "loss": 0.7565, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 2.966344131257888e-05, + "loss": 0.6924, + "step": 1200 + }, + { + "epoch": 0.04, + "learning_rate": 2.9649418033936335e-05, + "loss": 0.6407, + "step": 1250 + }, + { + "epoch": 0.04, + "learning_rate": 2.9635394755293788e-05, + "loss": 0.6138, + "step": 1300 + }, + { + "epoch": 0.04, + "learning_rate": 2.962137147665124e-05, + "loss": 0.5659, + "step": 1350 + }, + { + "epoch": 0.04, + "learning_rate": 2.9607348198008695e-05, + "loss": 0.5688, + "step": 1400 + }, + { + "epoch": 0.04, + "learning_rate": 2.9593324919366148e-05, + "loss": 0.516, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 2.95793016407236e-05, + "loss": 0.4945, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 2.9565278362081055e-05, + "loss": 0.4414, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 2.9551255083438508e-05, + "loss": 0.4591, + "step": 1600 + }, + { + "epoch": 0.05, + "learning_rate": 2.9537231804795965e-05, + "loss": 0.4378, + "step": 1650 + }, + { + "epoch": 0.05, + "learning_rate": 2.9523208526153415e-05, + "loss": 0.4287, + "step": 1700 + }, + { + "epoch": 0.05, + "learning_rate": 2.9509185247510868e-05, + "loss": 0.4494, + "step": 1750 + }, + { + "epoch": 0.05, + "learning_rate": 2.949516196886832e-05, + "loss": 0.4241, + "step": 1800 + }, + { + "epoch": 0.05, + "learning_rate": 2.9481138690225775e-05, + "loss": 0.4092, + "step": 1850 + }, + { + "epoch": 0.05, + "learning_rate": 2.946711541158323e-05, + "loss": 0.4149, + "step": 1900 + }, + { + "epoch": 0.05, + "learning_rate": 2.9453092132940682e-05, + "loss": 0.3603, + "step": 1950 + }, + { + "epoch": 0.06, + "learning_rate": 2.9439068854298135e-05, + "loss": 0.3801, + "step": 2000 + }, + { + "epoch": 0.06, + "eval_bleu": 93.6507, + "eval_gen_len": 64.828, + "eval_loss": 0.4033011794090271, + "eval_rouge1": 91.7405, + "eval_rouge2": 86.6147, + "eval_rougeL": 91.4332, + "eval_rougeLsum": 91.4208, + "eval_runtime": 3199.5288, + "eval_samples_per_second": 0.938, + "eval_steps_per_second": 0.234, + "step": 2000 + }, + { + "epoch": 0.06, + "learning_rate": 2.9425045575655592e-05, + "loss": 0.3768, + "step": 2050 + }, + { + "epoch": 0.06, + "learning_rate": 2.9411022297013042e-05, + "loss": 0.3616, + "step": 2100 + }, + { + "epoch": 0.06, + "learning_rate": 2.9396999018370495e-05, + "loss": 0.3987, + "step": 2150 + }, + { + "epoch": 0.06, + "learning_rate": 2.938297573972795e-05, + "loss": 0.3601, + "step": 2200 + }, + { + "epoch": 0.06, + "learning_rate": 2.9368952461085402e-05, + "loss": 0.3416, + "step": 2250 + }, + { + "epoch": 0.06, + "learning_rate": 2.9354929182442855e-05, + "loss": 0.3517, + "step": 2300 + }, + { + "epoch": 0.07, + "learning_rate": 2.934090590380031e-05, + "loss": 0.3315, + "step": 2350 + }, + { + "epoch": 0.07, + "learning_rate": 2.9326882625157762e-05, + "loss": 0.3501, + "step": 2400 + }, + { + "epoch": 0.07, + "learning_rate": 2.931285934651522e-05, + "loss": 0.3275, + "step": 2450 + }, + { + "epoch": 0.07, + "learning_rate": 2.929883606787267e-05, + "loss": 0.3098, + "step": 2500 + }, + { + "epoch": 0.07, + "learning_rate": 2.9284812789230122e-05, + "loss": 0.324, + "step": 2550 + }, + { + "epoch": 0.07, + "learning_rate": 2.9270789510587575e-05, + "loss": 0.3129, + "step": 2600 + }, + { + "epoch": 0.07, + "learning_rate": 2.925676623194503e-05, + "loss": 0.3394, + "step": 2650 + }, + { + "epoch": 0.08, + "learning_rate": 2.9242742953302482e-05, + "loss": 0.2813, + "step": 2700 + }, + { + "epoch": 0.08, + "learning_rate": 2.9228719674659935e-05, + "loss": 0.3168, + "step": 2750 + }, + { + "epoch": 0.08, + "learning_rate": 2.921469639601739e-05, + "loss": 0.3266, + "step": 2800 + }, + { + "epoch": 0.08, + "learning_rate": 2.9200673117374846e-05, + "loss": 0.2833, + "step": 2850 + }, + { + "epoch": 0.08, + "learning_rate": 2.9186649838732296e-05, + "loss": 0.3025, + "step": 2900 + }, + { + "epoch": 0.08, + "learning_rate": 2.917262656008975e-05, + "loss": 0.3095, + "step": 2950 + }, + { + "epoch": 0.08, + "learning_rate": 2.9158603281447202e-05, + "loss": 0.2844, + "step": 3000 + }, + { + "epoch": 0.08, + "eval_bleu": 94.365, + "eval_gen_len": 64.905, + "eval_loss": 0.3663557171821594, + "eval_rouge1": 91.9207, + "eval_rouge2": 87.0399, + "eval_rougeL": 91.6573, + "eval_rougeLsum": 91.6307, + "eval_runtime": 3129.011, + "eval_samples_per_second": 0.959, + "eval_steps_per_second": 0.24, + "step": 3000 + }, + { + "epoch": 0.09, + "learning_rate": 2.9144580002804656e-05, + "loss": 0.3001, + "step": 3050 + }, + { + "epoch": 0.09, + "learning_rate": 2.9130556724162112e-05, + "loss": 0.2903, + "step": 3100 + }, + { + "epoch": 0.09, + "learning_rate": 2.9116533445519562e-05, + "loss": 0.2751, + "step": 3150 + }, + { + "epoch": 0.09, + "learning_rate": 2.9102510166877016e-05, + "loss": 0.2725, + "step": 3200 + }, + { + "epoch": 0.09, + "learning_rate": 2.9088486888234472e-05, + "loss": 0.2897, + "step": 3250 + }, + { + "epoch": 0.09, + "learning_rate": 2.9074463609591922e-05, + "loss": 0.2878, + "step": 3300 + }, + { + "epoch": 0.09, + "learning_rate": 2.9060440330949376e-05, + "loss": 0.2966, + "step": 3350 + }, + { + "epoch": 0.1, + "learning_rate": 2.904641705230683e-05, + "loss": 0.3079, + "step": 3400 + }, + { + "epoch": 0.1, + "learning_rate": 2.9032393773664282e-05, + "loss": 0.2598, + "step": 3450 + }, + { + "epoch": 0.1, + "learning_rate": 2.901837049502174e-05, + "loss": 0.2982, + "step": 3500 + }, + { + "epoch": 0.1, + "learning_rate": 2.900434721637919e-05, + "loss": 0.2634, + "step": 3550 + }, + { + "epoch": 0.1, + "learning_rate": 2.8990323937736643e-05, + "loss": 0.2896, + "step": 3600 + }, + { + "epoch": 0.1, + "learning_rate": 2.89763006590941e-05, + "loss": 0.2822, + "step": 3650 + }, + { + "epoch": 0.1, + "learning_rate": 2.896227738045155e-05, + "loss": 0.278, + "step": 3700 + }, + { + "epoch": 0.11, + "learning_rate": 2.8948254101809003e-05, + "loss": 0.2818, + "step": 3750 + }, + { + "epoch": 0.11, + "learning_rate": 2.8934230823166456e-05, + "loss": 0.2788, + "step": 3800 + }, + { + "epoch": 0.11, + "learning_rate": 2.892020754452391e-05, + "loss": 0.2989, + "step": 3850 + }, + { + "epoch": 0.11, + "learning_rate": 2.8906184265881366e-05, + "loss": 0.3021, + "step": 3900 + }, + { + "epoch": 0.11, + "learning_rate": 2.8892160987238816e-05, + "loss": 0.281, + "step": 3950 + }, + { + "epoch": 0.11, + "learning_rate": 2.887813770859627e-05, + "loss": 0.2612, + "step": 4000 + }, + { + "epoch": 0.11, + "eval_bleu": 94.6072, + "eval_gen_len": 64.9013, + "eval_loss": 0.3458440899848938, + "eval_rouge1": 92.2699, + "eval_rouge2": 87.5817, + "eval_rougeL": 92.003, + "eval_rougeLsum": 91.9759, + "eval_runtime": 3105.6443, + "eval_samples_per_second": 0.966, + "eval_steps_per_second": 0.241, + "step": 4000 + }, + { + "epoch": 0.11, + "learning_rate": 2.8864114429953726e-05, + "loss": 0.2749, + "step": 4050 + }, + { + "epoch": 0.11, + "learning_rate": 2.8850091151311176e-05, + "loss": 0.2948, + "step": 4100 + }, + { + "epoch": 0.12, + "learning_rate": 2.883606787266863e-05, + "loss": 0.2429, + "step": 4150 + }, + { + "epoch": 0.12, + "learning_rate": 2.8822044594026083e-05, + "loss": 0.2793, + "step": 4200 + }, + { + "epoch": 0.12, + "learning_rate": 2.8808021315383536e-05, + "loss": 0.2899, + "step": 4250 + }, + { + "epoch": 0.12, + "learning_rate": 2.8793998036740993e-05, + "loss": 0.2384, + "step": 4300 + }, + { + "epoch": 0.12, + "learning_rate": 2.8779974758098443e-05, + "loss": 0.2783, + "step": 4350 + }, + { + "epoch": 0.12, + "learning_rate": 2.8765951479455896e-05, + "loss": 0.2709, + "step": 4400 + }, + { + "epoch": 0.12, + "learning_rate": 2.8751928200813353e-05, + "loss": 0.2453, + "step": 4450 + }, + { + "epoch": 0.13, + "learning_rate": 2.8737904922170803e-05, + "loss": 0.2626, + "step": 4500 + }, + { + "epoch": 0.13, + "learning_rate": 2.872388164352826e-05, + "loss": 0.2476, + "step": 4550 + }, + { + "epoch": 0.13, + "learning_rate": 2.870985836488571e-05, + "loss": 0.2207, + "step": 4600 + }, + { + "epoch": 0.13, + "learning_rate": 2.8695835086243163e-05, + "loss": 0.2553, + "step": 4650 + }, + { + "epoch": 0.13, + "learning_rate": 2.868181180760062e-05, + "loss": 0.2624, + "step": 4700 + }, + { + "epoch": 0.13, + "learning_rate": 2.866778852895807e-05, + "loss": 0.2505, + "step": 4750 + }, + { + "epoch": 0.13, + "learning_rate": 2.8653765250315523e-05, + "loss": 0.2356, + "step": 4800 + }, + { + "epoch": 0.14, + "learning_rate": 2.863974197167298e-05, + "loss": 0.2529, + "step": 4850 + }, + { + "epoch": 0.14, + "learning_rate": 2.862571869303043e-05, + "loss": 0.2331, + "step": 4900 + }, + { + "epoch": 0.14, + "learning_rate": 2.8611695414387887e-05, + "loss": 0.2487, + "step": 4950 + }, + { + "epoch": 0.14, + "learning_rate": 2.8597672135745337e-05, + "loss": 0.2609, + "step": 5000 + }, + { + "epoch": 0.14, + "eval_bleu": 94.6745, + "eval_gen_len": 64.809, + "eval_loss": 0.33044180274009705, + "eval_rouge1": 92.3236, + "eval_rouge2": 87.7132, + "eval_rougeL": 92.0699, + "eval_rougeLsum": 92.0344, + "eval_runtime": 3150.3076, + "eval_samples_per_second": 0.952, + "eval_steps_per_second": 0.238, + "step": 5000 + }, + { + "epoch": 0.14, + "learning_rate": 2.858364885710279e-05, + "loss": 0.2444, + "step": 5050 + }, + { + "epoch": 0.14, + "learning_rate": 2.8569625578460247e-05, + "loss": 0.2717, + "step": 5100 + }, + { + "epoch": 0.14, + "learning_rate": 2.8555602299817697e-05, + "loss": 0.2372, + "step": 5150 + }, + { + "epoch": 0.15, + "learning_rate": 2.854157902117515e-05, + "loss": 0.2448, + "step": 5200 + }, + { + "epoch": 0.15, + "learning_rate": 2.8527555742532607e-05, + "loss": 0.2752, + "step": 5250 + }, + { + "epoch": 0.15, + "learning_rate": 2.8513532463890057e-05, + "loss": 0.2762, + "step": 5300 + }, + { + "epoch": 0.15, + "learning_rate": 2.8499509185247513e-05, + "loss": 0.2355, + "step": 5350 + }, + { + "epoch": 0.15, + "learning_rate": 2.8485485906604963e-05, + "loss": 0.2565, + "step": 5400 + }, + { + "epoch": 0.15, + "learning_rate": 2.8471462627962417e-05, + "loss": 0.2795, + "step": 5450 + }, + { + "epoch": 0.15, + "learning_rate": 2.8457439349319873e-05, + "loss": 0.2305, + "step": 5500 + }, + { + "epoch": 0.16, + "learning_rate": 2.8443416070677323e-05, + "loss": 0.2515, + "step": 5550 + }, + { + "epoch": 0.16, + "learning_rate": 2.8429392792034777e-05, + "loss": 0.2473, + "step": 5600 + }, + { + "epoch": 0.16, + "learning_rate": 2.8415369513392234e-05, + "loss": 0.2531, + "step": 5650 + }, + { + "epoch": 0.16, + "learning_rate": 2.8401346234749684e-05, + "loss": 0.2243, + "step": 5700 + }, + { + "epoch": 0.16, + "learning_rate": 2.838732295610714e-05, + "loss": 0.2272, + "step": 5750 + }, + { + "epoch": 0.16, + "learning_rate": 2.837329967746459e-05, + "loss": 0.2601, + "step": 5800 + }, + { + "epoch": 0.16, + "learning_rate": 2.8359276398822044e-05, + "loss": 0.2539, + "step": 5850 + }, + { + "epoch": 0.17, + "learning_rate": 2.83452531201795e-05, + "loss": 0.2445, + "step": 5900 + }, + { + "epoch": 0.17, + "learning_rate": 2.833122984153695e-05, + "loss": 0.213, + "step": 5950 + }, + { + "epoch": 0.17, + "learning_rate": 2.8317206562894407e-05, + "loss": 0.2173, + "step": 6000 + }, + { + "epoch": 0.17, + "eval_bleu": 94.886, + "eval_gen_len": 64.737, + "eval_loss": 0.32196304202079773, + "eval_rouge1": 92.4893, + "eval_rouge2": 87.9435, + "eval_rougeL": 92.23, + "eval_rougeLsum": 92.1972, + "eval_runtime": 3194.0894, + "eval_samples_per_second": 0.939, + "eval_steps_per_second": 0.235, + "step": 6000 + }, + { + "epoch": 0.17, + "learning_rate": 2.830318328425186e-05, + "loss": 0.2123, + "step": 6050 + }, + { + "epoch": 0.17, + "learning_rate": 2.828916000560931e-05, + "loss": 0.2351, + "step": 6100 + }, + { + "epoch": 0.17, + "learning_rate": 2.8275136726966767e-05, + "loss": 0.2546, + "step": 6150 + }, + { + "epoch": 0.17, + "learning_rate": 2.8261113448324217e-05, + "loss": 0.2388, + "step": 6200 + }, + { + "epoch": 0.18, + "learning_rate": 2.824709016968167e-05, + "loss": 0.2309, + "step": 6250 + }, + { + "epoch": 0.18, + "learning_rate": 2.8233066891039127e-05, + "loss": 0.2379, + "step": 6300 + }, + { + "epoch": 0.18, + "learning_rate": 2.8219043612396577e-05, + "loss": 0.2269, + "step": 6350 + }, + { + "epoch": 0.18, + "learning_rate": 2.8205020333754034e-05, + "loss": 0.2362, + "step": 6400 + }, + { + "epoch": 0.18, + "learning_rate": 2.8190997055111487e-05, + "loss": 0.2325, + "step": 6450 + }, + { + "epoch": 0.18, + "learning_rate": 2.8176973776468937e-05, + "loss": 0.2519, + "step": 6500 + }, + { + "epoch": 0.18, + "learning_rate": 2.8162950497826394e-05, + "loss": 0.215, + "step": 6550 + }, + { + "epoch": 0.19, + "learning_rate": 2.8148927219183844e-05, + "loss": 0.2301, + "step": 6600 + }, + { + "epoch": 0.19, + "learning_rate": 2.8134903940541297e-05, + "loss": 0.247, + "step": 6650 + }, + { + "epoch": 0.19, + "learning_rate": 2.8120880661898754e-05, + "loss": 0.2261, + "step": 6700 + }, + { + "epoch": 0.19, + "learning_rate": 2.8106857383256204e-05, + "loss": 0.2349, + "step": 6750 + }, + { + "epoch": 0.19, + "learning_rate": 2.809283410461366e-05, + "loss": 0.2253, + "step": 6800 + }, + { + "epoch": 0.19, + "learning_rate": 2.8078810825971114e-05, + "loss": 0.2186, + "step": 6850 + }, + { + "epoch": 0.19, + "learning_rate": 2.8064787547328564e-05, + "loss": 0.238, + "step": 6900 + }, + { + "epoch": 0.19, + "learning_rate": 2.805076426868602e-05, + "loss": 0.2308, + "step": 6950 + }, + { + "epoch": 0.2, + "learning_rate": 2.803674099004347e-05, + "loss": 0.2252, + "step": 7000 + }, + { + "epoch": 0.2, + "eval_bleu": 94.9526, + "eval_gen_len": 64.719, + "eval_loss": 0.3162732422351837, + "eval_rouge1": 92.5629, + "eval_rouge2": 88.0962, + "eval_rougeL": 92.3033, + "eval_rougeLsum": 92.2707, + "eval_runtime": 3111.1893, + "eval_samples_per_second": 0.964, + "eval_steps_per_second": 0.241, + "step": 7000 + }, + { + "epoch": 0.2, + "learning_rate": 2.8022717711400924e-05, + "loss": 0.2267, + "step": 7050 + }, + { + "epoch": 0.2, + "learning_rate": 2.800869443275838e-05, + "loss": 0.2197, + "step": 7100 + }, + { + "epoch": 0.2, + "learning_rate": 2.799467115411583e-05, + "loss": 0.195, + "step": 7150 + }, + { + "epoch": 0.2, + "learning_rate": 2.7980647875473288e-05, + "loss": 0.1975, + "step": 7200 + }, + { + "epoch": 0.2, + "learning_rate": 2.796662459683074e-05, + "loss": 0.2261, + "step": 7250 + }, + { + "epoch": 0.2, + "learning_rate": 2.795260131818819e-05, + "loss": 0.2276, + "step": 7300 + }, + { + "epoch": 0.21, + "learning_rate": 2.7938578039545648e-05, + "loss": 0.2236, + "step": 7350 + }, + { + "epoch": 0.21, + "learning_rate": 2.7924554760903098e-05, + "loss": 0.2362, + "step": 7400 + }, + { + "epoch": 0.21, + "learning_rate": 2.7910531482260554e-05, + "loss": 0.1933, + "step": 7450 + }, + { + "epoch": 0.21, + "learning_rate": 2.7896508203618008e-05, + "loss": 0.256, + "step": 7500 + }, + { + "epoch": 0.21, + "learning_rate": 2.7882484924975458e-05, + "loss": 0.22, + "step": 7550 + }, + { + "epoch": 0.21, + "learning_rate": 2.7868461646332915e-05, + "loss": 0.2285, + "step": 7600 + }, + { + "epoch": 0.21, + "learning_rate": 2.7854438367690368e-05, + "loss": 0.2297, + "step": 7650 + }, + { + "epoch": 0.22, + "learning_rate": 2.7840415089047818e-05, + "loss": 0.2198, + "step": 7700 + }, + { + "epoch": 0.22, + "learning_rate": 2.7826391810405275e-05, + "loss": 0.219, + "step": 7750 + }, + { + "epoch": 0.22, + "learning_rate": 2.7812368531762725e-05, + "loss": 0.264, + "step": 7800 + }, + { + "epoch": 0.22, + "learning_rate": 2.779834525312018e-05, + "loss": 0.2218, + "step": 7850 + }, + { + "epoch": 0.22, + "learning_rate": 2.7784321974477635e-05, + "loss": 0.2343, + "step": 7900 + }, + { + "epoch": 0.22, + "learning_rate": 2.7770298695835085e-05, + "loss": 0.223, + "step": 7950 + }, + { + "epoch": 0.22, + "learning_rate": 2.775627541719254e-05, + "loss": 0.2111, + "step": 8000 + }, + { + "epoch": 0.22, + "eval_bleu": 95.0762, + "eval_gen_len": 64.6663, + "eval_loss": 0.31111225485801697, + "eval_rouge1": 92.7158, + "eval_rouge2": 88.3325, + "eval_rougeL": 92.4682, + "eval_rougeLsum": 92.4352, + "eval_runtime": 3140.711, + "eval_samples_per_second": 0.955, + "eval_steps_per_second": 0.239, + "step": 8000 + }, + { + "epoch": 0.23, + "learning_rate": 2.7742252138549995e-05, + "loss": 0.2171, + "step": 8050 + }, + { + "epoch": 0.23, + "learning_rate": 2.7728228859907445e-05, + "loss": 0.2263, + "step": 8100 + }, + { + "epoch": 0.23, + "learning_rate": 2.77142055812649e-05, + "loss": 0.2359, + "step": 8150 + }, + { + "epoch": 0.23, + "learning_rate": 2.770018230262235e-05, + "loss": 0.2058, + "step": 8200 + }, + { + "epoch": 0.23, + "learning_rate": 2.7686159023979808e-05, + "loss": 0.2044, + "step": 8250 + }, + { + "epoch": 0.23, + "learning_rate": 2.767213574533726e-05, + "loss": 0.2322, + "step": 8300 + }, + { + "epoch": 0.23, + "learning_rate": 2.765811246669471e-05, + "loss": 0.2289, + "step": 8350 + }, + { + "epoch": 0.24, + "learning_rate": 2.7644089188052168e-05, + "loss": 0.2228, + "step": 8400 + }, + { + "epoch": 0.24, + "learning_rate": 2.763006590940962e-05, + "loss": 0.2211, + "step": 8450 + }, + { + "epoch": 0.24, + "learning_rate": 2.761604263076707e-05, + "loss": 0.2297, + "step": 8500 + }, + { + "epoch": 0.24, + "learning_rate": 2.760201935212453e-05, + "loss": 0.2087, + "step": 8550 + }, + { + "epoch": 0.24, + "learning_rate": 2.7587996073481978e-05, + "loss": 0.2266, + "step": 8600 + }, + { + "epoch": 0.24, + "learning_rate": 2.7573972794839435e-05, + "loss": 0.2259, + "step": 8650 + }, + { + "epoch": 0.24, + "learning_rate": 2.755994951619689e-05, + "loss": 0.1985, + "step": 8700 + }, + { + "epoch": 0.25, + "learning_rate": 2.754592623755434e-05, + "loss": 0.2023, + "step": 8750 + }, + { + "epoch": 0.25, + "learning_rate": 2.7531902958911795e-05, + "loss": 0.2297, + "step": 8800 + }, + { + "epoch": 0.25, + "learning_rate": 2.751787968026925e-05, + "loss": 0.2179, + "step": 8850 + }, + { + "epoch": 0.25, + "learning_rate": 2.7503856401626702e-05, + "loss": 0.1993, + "step": 8900 + }, + { + "epoch": 0.25, + "learning_rate": 2.7489833122984155e-05, + "loss": 0.1946, + "step": 8950 + }, + { + "epoch": 0.25, + "learning_rate": 2.7475809844341605e-05, + "loss": 0.195, + "step": 9000 + }, + { + "epoch": 0.25, + "eval_bleu": 95.1941, + "eval_gen_len": 64.634, + "eval_loss": 0.3037899434566498, + "eval_rouge1": 92.7974, + "eval_rouge2": 88.5146, + "eval_rougeL": 92.5398, + "eval_rougeLsum": 92.5031, + "eval_runtime": 3164.9424, + "eval_samples_per_second": 0.948, + "eval_steps_per_second": 0.237, + "step": 9000 + }, + { + "epoch": 0.25, + "learning_rate": 2.7461786565699062e-05, + "loss": 0.2171, + "step": 9050 + }, + { + "epoch": 0.26, + "learning_rate": 2.7447763287056515e-05, + "loss": 0.2282, + "step": 9100 + }, + { + "epoch": 0.26, + "learning_rate": 2.7433740008413965e-05, + "loss": 0.2159, + "step": 9150 + }, + { + "epoch": 0.26, + "learning_rate": 2.7419716729771422e-05, + "loss": 0.221, + "step": 9200 + }, + { + "epoch": 0.26, + "learning_rate": 2.7405693451128875e-05, + "loss": 0.2357, + "step": 9250 + }, + { + "epoch": 0.26, + "learning_rate": 2.739167017248633e-05, + "loss": 0.221, + "step": 9300 + }, + { + "epoch": 0.26, + "learning_rate": 2.7377646893843782e-05, + "loss": 0.2064, + "step": 9350 + }, + { + "epoch": 0.26, + "learning_rate": 2.7363623615201232e-05, + "loss": 0.2112, + "step": 9400 + }, + { + "epoch": 0.27, + "learning_rate": 2.734960033655869e-05, + "loss": 0.2489, + "step": 9450 + }, + { + "epoch": 0.27, + "learning_rate": 2.7335577057916142e-05, + "loss": 0.2209, + "step": 9500 + }, + { + "epoch": 0.27, + "learning_rate": 2.7321553779273592e-05, + "loss": 0.2435, + "step": 9550 + }, + { + "epoch": 0.27, + "learning_rate": 2.730753050063105e-05, + "loss": 0.2135, + "step": 9600 + }, + { + "epoch": 0.27, + "learning_rate": 2.7293507221988502e-05, + "loss": 0.2398, + "step": 9650 + }, + { + "epoch": 0.27, + "learning_rate": 2.7279483943345956e-05, + "loss": 0.2005, + "step": 9700 + }, + { + "epoch": 0.27, + "learning_rate": 2.726546066470341e-05, + "loss": 0.2122, + "step": 9750 + }, + { + "epoch": 0.27, + "learning_rate": 2.725143738606086e-05, + "loss": 0.2027, + "step": 9800 + }, + { + "epoch": 0.28, + "learning_rate": 2.7237414107418316e-05, + "loss": 0.199, + "step": 9850 + }, + { + "epoch": 0.28, + "learning_rate": 2.722339082877577e-05, + "loss": 0.222, + "step": 9900 + }, + { + "epoch": 0.28, + "learning_rate": 2.7209367550133222e-05, + "loss": 0.2185, + "step": 9950 + }, + { + "epoch": 0.28, + "learning_rate": 2.7195344271490676e-05, + "loss": 0.1982, + "step": 10000 + }, + { + "epoch": 0.28, + "eval_bleu": 95.2522, + "eval_gen_len": 64.626, + "eval_loss": 0.3007320463657379, + "eval_rouge1": 92.7899, + "eval_rouge2": 88.4907, + "eval_rougeL": 92.5338, + "eval_rougeLsum": 92.4971, + "eval_runtime": 3103.217, + "eval_samples_per_second": 0.967, + "eval_steps_per_second": 0.242, + "step": 10000 + }, + { + "epoch": 0.28, + "learning_rate": 2.718132099284813e-05, + "loss": 0.2071, + "step": 10050 + }, + { + "epoch": 0.28, + "learning_rate": 2.7167297714205582e-05, + "loss": 0.204, + "step": 10100 + }, + { + "epoch": 0.28, + "learning_rate": 2.7153274435563036e-05, + "loss": 0.2138, + "step": 10150 + }, + { + "epoch": 0.29, + "learning_rate": 2.7139251156920486e-05, + "loss": 0.2045, + "step": 10200 + }, + { + "epoch": 0.29, + "learning_rate": 2.7125227878277942e-05, + "loss": 0.2005, + "step": 10250 + }, + { + "epoch": 0.29, + "learning_rate": 2.7111204599635396e-05, + "loss": 0.2392, + "step": 10300 + }, + { + "epoch": 0.29, + "learning_rate": 2.709718132099285e-05, + "loss": 0.2126, + "step": 10350 + }, + { + "epoch": 0.29, + "learning_rate": 2.7083158042350303e-05, + "loss": 0.2097, + "step": 10400 + }, + { + "epoch": 0.29, + "learning_rate": 2.7069134763707756e-05, + "loss": 0.2197, + "step": 10450 + }, + { + "epoch": 0.29, + "learning_rate": 2.705511148506521e-05, + "loss": 0.2013, + "step": 10500 + }, + { + "epoch": 0.3, + "learning_rate": 2.7041088206422663e-05, + "loss": 0.2208, + "step": 10550 + }, + { + "epoch": 0.3, + "learning_rate": 2.7027064927780113e-05, + "loss": 0.1938, + "step": 10600 + }, + { + "epoch": 0.3, + "learning_rate": 2.701304164913757e-05, + "loss": 0.2048, + "step": 10650 + }, + { + "epoch": 0.3, + "learning_rate": 2.6999018370495023e-05, + "loss": 0.2065, + "step": 10700 + }, + { + "epoch": 0.3, + "learning_rate": 2.6984995091852476e-05, + "loss": 0.2075, + "step": 10750 + }, + { + "epoch": 0.3, + "learning_rate": 2.697097181320993e-05, + "loss": 0.2279, + "step": 10800 + }, + { + "epoch": 0.3, + "learning_rate": 2.6956948534567383e-05, + "loss": 0.2072, + "step": 10850 + }, + { + "epoch": 0.31, + "learning_rate": 2.6942925255924836e-05, + "loss": 0.1998, + "step": 10900 + }, + { + "epoch": 0.31, + "learning_rate": 2.692890197728229e-05, + "loss": 0.1904, + "step": 10950 + }, + { + "epoch": 0.31, + "learning_rate": 2.691487869863974e-05, + "loss": 0.1762, + "step": 11000 + }, + { + "epoch": 0.31, + "eval_bleu": 95.3107, + "eval_gen_len": 64.546, + "eval_loss": 0.29688259959220886, + "eval_rouge1": 92.939, + "eval_rouge2": 88.7461, + "eval_rougeL": 92.6735, + "eval_rougeLsum": 92.6483, + "eval_runtime": 3120.8781, + "eval_samples_per_second": 0.961, + "eval_steps_per_second": 0.24, + "step": 11000 + }, + { + "epoch": 0.31, + "learning_rate": 2.6900855419997196e-05, + "loss": 0.2241, + "step": 11050 + }, + { + "epoch": 0.31, + "learning_rate": 2.688683214135465e-05, + "loss": 0.2087, + "step": 11100 + }, + { + "epoch": 0.31, + "learning_rate": 2.6872808862712103e-05, + "loss": 0.1918, + "step": 11150 + }, + { + "epoch": 0.31, + "learning_rate": 2.6858785584069556e-05, + "loss": 0.2117, + "step": 11200 + }, + { + "epoch": 0.32, + "learning_rate": 2.684476230542701e-05, + "loss": 0.213, + "step": 11250 + }, + { + "epoch": 0.32, + "learning_rate": 2.6830739026784463e-05, + "loss": 0.2338, + "step": 11300 + }, + { + "epoch": 0.32, + "learning_rate": 2.6816715748141916e-05, + "loss": 0.2056, + "step": 11350 + }, + { + "epoch": 0.32, + "learning_rate": 2.680269246949937e-05, + "loss": 0.2087, + "step": 11400 + }, + { + "epoch": 0.32, + "learning_rate": 2.6788669190856823e-05, + "loss": 0.2014, + "step": 11450 + }, + { + "epoch": 0.32, + "learning_rate": 2.6774645912214276e-05, + "loss": 0.2464, + "step": 11500 + }, + { + "epoch": 0.32, + "learning_rate": 2.676062263357173e-05, + "loss": 0.2132, + "step": 11550 + }, + { + "epoch": 0.33, + "learning_rate": 2.6746599354929183e-05, + "loss": 0.2199, + "step": 11600 + }, + { + "epoch": 0.33, + "learning_rate": 2.6732576076286636e-05, + "loss": 0.187, + "step": 11650 + }, + { + "epoch": 0.33, + "learning_rate": 2.671855279764409e-05, + "loss": 0.2053, + "step": 11700 + }, + { + "epoch": 0.33, + "learning_rate": 2.6704529519001543e-05, + "loss": 0.1936, + "step": 11750 + }, + { + "epoch": 0.33, + "learning_rate": 2.6690506240358997e-05, + "loss": 0.2214, + "step": 11800 + }, + { + "epoch": 0.33, + "learning_rate": 2.667648296171645e-05, + "loss": 0.2056, + "step": 11850 + }, + { + "epoch": 0.33, + "learning_rate": 2.6662459683073903e-05, + "loss": 0.181, + "step": 11900 + }, + { + "epoch": 0.34, + "learning_rate": 2.6648436404431357e-05, + "loss": 0.2127, + "step": 11950 + }, + { + "epoch": 0.34, + "learning_rate": 2.663441312578881e-05, + "loss": 0.1798, + "step": 12000 + }, + { + "epoch": 0.34, + "eval_bleu": 95.3546, + "eval_gen_len": 64.4707, + "eval_loss": 0.2918665111064911, + "eval_rouge1": 92.9828, + "eval_rouge2": 88.8459, + "eval_rougeL": 92.7418, + "eval_rougeLsum": 92.6996, + "eval_runtime": 3099.2674, + "eval_samples_per_second": 0.968, + "eval_steps_per_second": 0.242, + "step": 12000 + }, + { + "epoch": 0.34, + "learning_rate": 2.6620389847146263e-05, + "loss": 0.207, + "step": 12050 + }, + { + "epoch": 0.34, + "learning_rate": 2.6606366568503717e-05, + "loss": 0.1874, + "step": 12100 + }, + { + "epoch": 0.34, + "learning_rate": 2.659234328986117e-05, + "loss": 0.1933, + "step": 12150 + }, + { + "epoch": 0.34, + "learning_rate": 2.6578320011218623e-05, + "loss": 0.2117, + "step": 12200 + }, + { + "epoch": 0.34, + "learning_rate": 2.6564296732576077e-05, + "loss": 0.2147, + "step": 12250 + }, + { + "epoch": 0.34, + "learning_rate": 2.655027345393353e-05, + "loss": 0.2206, + "step": 12300 + }, + { + "epoch": 0.35, + "learning_rate": 2.6536250175290983e-05, + "loss": 0.2034, + "step": 12350 + }, + { + "epoch": 0.35, + "learning_rate": 2.6522226896648437e-05, + "loss": 0.2019, + "step": 12400 + }, + { + "epoch": 0.35, + "learning_rate": 2.650820361800589e-05, + "loss": 0.1982, + "step": 12450 + }, + { + "epoch": 0.35, + "learning_rate": 2.6494180339363344e-05, + "loss": 0.1851, + "step": 12500 + }, + { + "epoch": 0.35, + "learning_rate": 2.6480157060720797e-05, + "loss": 0.2089, + "step": 12550 + }, + { + "epoch": 0.35, + "learning_rate": 2.646613378207825e-05, + "loss": 0.1985, + "step": 12600 + }, + { + "epoch": 0.35, + "learning_rate": 2.6452110503435704e-05, + "loss": 0.1879, + "step": 12650 + }, + { + "epoch": 0.36, + "learning_rate": 2.6438087224793157e-05, + "loss": 0.2285, + "step": 12700 + }, + { + "epoch": 0.36, + "learning_rate": 2.642406394615061e-05, + "loss": 0.2068, + "step": 12750 + }, + { + "epoch": 0.36, + "learning_rate": 2.6410040667508064e-05, + "loss": 0.2248, + "step": 12800 + }, + { + "epoch": 0.36, + "learning_rate": 2.6396017388865517e-05, + "loss": 0.2017, + "step": 12850 + }, + { + "epoch": 0.36, + "learning_rate": 2.638199411022297e-05, + "loss": 0.2064, + "step": 12900 + }, + { + "epoch": 0.36, + "learning_rate": 2.6367970831580424e-05, + "loss": 0.1967, + "step": 12950 + }, + { + "epoch": 0.36, + "learning_rate": 2.6353947552937877e-05, + "loss": 0.2037, + "step": 13000 + }, + { + "epoch": 0.36, + "eval_bleu": 95.472, + "eval_gen_len": 64.5277, + "eval_loss": 0.28798067569732666, + "eval_rouge1": 93.1329, + "eval_rouge2": 89.0529, + "eval_rougeL": 92.8733, + "eval_rougeLsum": 92.8293, + "eval_runtime": 3148.5331, + "eval_samples_per_second": 0.953, + "eval_steps_per_second": 0.238, + "step": 13000 + }, + { + "epoch": 0.37, + "learning_rate": 2.633992427429533e-05, + "loss": 0.1902, + "step": 13050 + }, + { + "epoch": 0.37, + "learning_rate": 2.6325900995652784e-05, + "loss": 0.1787, + "step": 13100 + }, + { + "epoch": 0.37, + "learning_rate": 2.6311877717010237e-05, + "loss": 0.1822, + "step": 13150 + }, + { + "epoch": 0.37, + "learning_rate": 2.629785443836769e-05, + "loss": 0.2024, + "step": 13200 + }, + { + "epoch": 0.37, + "learning_rate": 2.6283831159725147e-05, + "loss": 0.2068, + "step": 13250 + }, + { + "epoch": 0.37, + "learning_rate": 2.6269807881082597e-05, + "loss": 0.2084, + "step": 13300 + }, + { + "epoch": 0.37, + "learning_rate": 2.625578460244005e-05, + "loss": 0.1967, + "step": 13350 + }, + { + "epoch": 0.38, + "learning_rate": 2.6241761323797504e-05, + "loss": 0.1902, + "step": 13400 + }, + { + "epoch": 0.38, + "learning_rate": 2.6227738045154957e-05, + "loss": 0.1975, + "step": 13450 + }, + { + "epoch": 0.38, + "learning_rate": 2.621371476651241e-05, + "loss": 0.2069, + "step": 13500 + }, + { + "epoch": 0.38, + "learning_rate": 2.6199691487869864e-05, + "loss": 0.2188, + "step": 13550 + }, + { + "epoch": 0.38, + "learning_rate": 2.6185668209227317e-05, + "loss": 0.2088, + "step": 13600 + }, + { + "epoch": 0.38, + "learning_rate": 2.6171644930584774e-05, + "loss": 0.1875, + "step": 13650 + }, + { + "epoch": 0.38, + "learning_rate": 2.6157621651942224e-05, + "loss": 0.1881, + "step": 13700 + }, + { + "epoch": 0.39, + "learning_rate": 2.6143598373299678e-05, + "loss": 0.1907, + "step": 13750 + }, + { + "epoch": 0.39, + "learning_rate": 2.612957509465713e-05, + "loss": 0.1968, + "step": 13800 + }, + { + "epoch": 0.39, + "learning_rate": 2.6115551816014584e-05, + "loss": 0.2087, + "step": 13850 + }, + { + "epoch": 0.39, + "learning_rate": 2.6101528537372038e-05, + "loss": 0.1805, + "step": 13900 + }, + { + "epoch": 0.39, + "learning_rate": 2.608750525872949e-05, + "loss": 0.1897, + "step": 13950 + }, + { + "epoch": 0.39, + "learning_rate": 2.6073481980086944e-05, + "loss": 0.2091, + "step": 14000 + }, + { + "epoch": 0.39, + "eval_bleu": 95.499, + "eval_gen_len": 64.423, + "eval_loss": 0.2838546335697174, + "eval_rouge1": 93.0724, + "eval_rouge2": 89.0302, + "eval_rougeL": 92.8261, + "eval_rougeLsum": 92.7827, + "eval_runtime": 3139.4968, + "eval_samples_per_second": 0.956, + "eval_steps_per_second": 0.239, + "step": 14000 + }, + { + "epoch": 0.39, + "learning_rate": 2.60594587014444e-05, + "loss": 0.1831, + "step": 14050 + }, + { + "epoch": 0.4, + "learning_rate": 2.604543542280185e-05, + "loss": 0.1918, + "step": 14100 + }, + { + "epoch": 0.4, + "learning_rate": 2.6031412144159304e-05, + "loss": 0.1973, + "step": 14150 + }, + { + "epoch": 0.4, + "learning_rate": 2.6017388865516758e-05, + "loss": 0.1948, + "step": 14200 + }, + { + "epoch": 0.4, + "learning_rate": 2.600336558687421e-05, + "loss": 0.202, + "step": 14250 + }, + { + "epoch": 0.4, + "learning_rate": 2.5989342308231664e-05, + "loss": 0.2092, + "step": 14300 + }, + { + "epoch": 0.4, + "learning_rate": 2.5975319029589118e-05, + "loss": 0.189, + "step": 14350 + }, + { + "epoch": 0.4, + "learning_rate": 2.596129575094657e-05, + "loss": 0.2014, + "step": 14400 + }, + { + "epoch": 0.41, + "learning_rate": 2.5947272472304028e-05, + "loss": 0.1835, + "step": 14450 + }, + { + "epoch": 0.41, + "learning_rate": 2.5933249193661478e-05, + "loss": 0.1941, + "step": 14500 + }, + { + "epoch": 0.41, + "learning_rate": 2.591922591501893e-05, + "loss": 0.2027, + "step": 14550 + }, + { + "epoch": 0.41, + "learning_rate": 2.5905202636376385e-05, + "loss": 0.1794, + "step": 14600 + }, + { + "epoch": 0.41, + "learning_rate": 2.5891179357733838e-05, + "loss": 0.2, + "step": 14650 + }, + { + "epoch": 0.41, + "learning_rate": 2.5877156079091295e-05, + "loss": 0.2091, + "step": 14700 + }, + { + "epoch": 0.41, + "learning_rate": 2.5863132800448745e-05, + "loss": 0.1813, + "step": 14750 + }, + { + "epoch": 0.42, + "learning_rate": 2.5849109521806198e-05, + "loss": 0.1793, + "step": 14800 + }, + { + "epoch": 0.42, + "learning_rate": 2.5835086243163655e-05, + "loss": 0.1852, + "step": 14850 + }, + { + "epoch": 0.42, + "learning_rate": 2.5821062964521105e-05, + "loss": 0.2122, + "step": 14900 + }, + { + "epoch": 0.42, + "learning_rate": 2.5807039685878558e-05, + "loss": 0.1758, + "step": 14950 + }, + { + "epoch": 0.42, + "learning_rate": 2.579301640723601e-05, + "loss": 0.1998, + "step": 15000 + }, + { + "epoch": 0.42, + "eval_bleu": 95.5491, + "eval_gen_len": 64.419, + "eval_loss": 0.28141552209854126, + "eval_rouge1": 93.1634, + "eval_rouge2": 89.1339, + "eval_rougeL": 92.9186, + "eval_rougeLsum": 92.904, + "eval_runtime": 3158.2947, + "eval_samples_per_second": 0.95, + "eval_steps_per_second": 0.237, + "step": 15000 + }, + { + "epoch": 0.42, + "learning_rate": 2.5778993128593465e-05, + "loss": 0.1902, + "step": 15050 + }, + { + "epoch": 0.42, + "learning_rate": 2.576496984995092e-05, + "loss": 0.1829, + "step": 15100 + }, + { + "epoch": 0.42, + "learning_rate": 2.575094657130837e-05, + "loss": 0.2212, + "step": 15150 + }, + { + "epoch": 0.43, + "learning_rate": 2.5736923292665825e-05, + "loss": 0.1878, + "step": 15200 + }, + { + "epoch": 0.43, + "learning_rate": 2.572290001402328e-05, + "loss": 0.2088, + "step": 15250 + }, + { + "epoch": 0.43, + "learning_rate": 2.570887673538073e-05, + "loss": 0.217, + "step": 15300 + }, + { + "epoch": 0.43, + "learning_rate": 2.5694853456738185e-05, + "loss": 0.1859, + "step": 15350 + }, + { + "epoch": 0.43, + "learning_rate": 2.568083017809564e-05, + "loss": 0.1975, + "step": 15400 + }, + { + "epoch": 0.43, + "learning_rate": 2.566680689945309e-05, + "loss": 0.1848, + "step": 15450 + }, + { + "epoch": 0.43, + "learning_rate": 2.565278362081055e-05, + "loss": 0.2097, + "step": 15500 + }, + { + "epoch": 0.44, + "learning_rate": 2.5638760342168e-05, + "loss": 0.1984, + "step": 15550 + }, + { + "epoch": 0.44, + "learning_rate": 2.5624737063525452e-05, + "loss": 0.1905, + "step": 15600 + }, + { + "epoch": 0.44, + "learning_rate": 2.561071378488291e-05, + "loss": 0.2155, + "step": 15650 + }, + { + "epoch": 0.44, + "learning_rate": 2.559669050624036e-05, + "loss": 0.1933, + "step": 15700 + }, + { + "epoch": 0.44, + "learning_rate": 2.5582667227597815e-05, + "loss": 0.1853, + "step": 15750 + }, + { + "epoch": 0.44, + "learning_rate": 2.5568643948955265e-05, + "loss": 0.1849, + "step": 15800 + }, + { + "epoch": 0.44, + "learning_rate": 2.555462067031272e-05, + "loss": 0.179, + "step": 15850 + }, + { + "epoch": 0.45, + "learning_rate": 2.5540597391670175e-05, + "loss": 0.1907, + "step": 15900 + }, + { + "epoch": 0.45, + "learning_rate": 2.5526574113027625e-05, + "loss": 0.2032, + "step": 15950 + }, + { + "epoch": 0.45, + "learning_rate": 2.551255083438508e-05, + "loss": 0.1855, + "step": 16000 + }, + { + "epoch": 0.45, + "eval_bleu": 95.5711, + "eval_gen_len": 64.4307, + "eval_loss": 0.27981552481651306, + "eval_rouge1": 93.2066, + "eval_rouge2": 89.2588, + "eval_rougeL": 92.9732, + "eval_rougeLsum": 92.9506, + "eval_runtime": 3083.9167, + "eval_samples_per_second": 0.973, + "eval_steps_per_second": 0.243, + "step": 16000 + }, + { + "epoch": 0.45, + "learning_rate": 2.5498527555742535e-05, + "loss": 0.19, + "step": 16050 + }, + { + "epoch": 0.45, + "learning_rate": 2.5484504277099985e-05, + "loss": 0.1877, + "step": 16100 + }, + { + "epoch": 0.45, + "learning_rate": 2.5470480998457442e-05, + "loss": 0.1868, + "step": 16150 + }, + { + "epoch": 0.45, + "learning_rate": 2.5456457719814892e-05, + "loss": 0.179, + "step": 16200 + }, + { + "epoch": 0.46, + "learning_rate": 2.5442434441172345e-05, + "loss": 0.2247, + "step": 16250 + }, + { + "epoch": 0.46, + "learning_rate": 2.5428411162529802e-05, + "loss": 0.1745, + "step": 16300 + }, + { + "epoch": 0.46, + "learning_rate": 2.5414387883887252e-05, + "loss": 0.2015, + "step": 16350 + }, + { + "epoch": 0.46, + "learning_rate": 2.5400364605244705e-05, + "loss": 0.212, + "step": 16400 + }, + { + "epoch": 0.46, + "learning_rate": 2.5386341326602162e-05, + "loss": 0.2018, + "step": 16450 + }, + { + "epoch": 0.46, + "learning_rate": 2.5372318047959612e-05, + "loss": 0.1888, + "step": 16500 + }, + { + "epoch": 0.46, + "learning_rate": 2.535829476931707e-05, + "loss": 0.1806, + "step": 16550 + }, + { + "epoch": 0.47, + "learning_rate": 2.534427149067452e-05, + "loss": 0.1791, + "step": 16600 + }, + { + "epoch": 0.47, + "learning_rate": 2.5330248212031972e-05, + "loss": 0.1949, + "step": 16650 + }, + { + "epoch": 0.47, + "learning_rate": 2.531622493338943e-05, + "loss": 0.1858, + "step": 16700 + }, + { + "epoch": 0.47, + "learning_rate": 2.530220165474688e-05, + "loss": 0.1879, + "step": 16750 + }, + { + "epoch": 0.47, + "learning_rate": 2.5288178376104332e-05, + "loss": 0.1936, + "step": 16800 + }, + { + "epoch": 0.47, + "learning_rate": 2.527415509746179e-05, + "loss": 0.1692, + "step": 16850 + }, + { + "epoch": 0.47, + "learning_rate": 2.526013181881924e-05, + "loss": 0.1852, + "step": 16900 + }, + { + "epoch": 0.48, + "learning_rate": 2.5246108540176696e-05, + "loss": 0.2128, + "step": 16950 + }, + { + "epoch": 0.48, + "learning_rate": 2.5232085261534146e-05, + "loss": 0.184, + "step": 17000 + }, + { + "epoch": 0.48, + "eval_bleu": 95.6325, + "eval_gen_len": 64.3623, + "eval_loss": 0.2761005759239197, + "eval_rouge1": 93.269, + "eval_rouge2": 89.3661, + "eval_rougeL": 93.0637, + "eval_rougeLsum": 93.0433, + "eval_runtime": 3126.3939, + "eval_samples_per_second": 0.96, + "eval_steps_per_second": 0.24, + "step": 17000 + }, + { + "epoch": 0.48, + "learning_rate": 2.52180619828916e-05, + "loss": 0.2114, + "step": 17050 + }, + { + "epoch": 0.48, + "learning_rate": 2.5204038704249056e-05, + "loss": 0.1933, + "step": 17100 + }, + { + "epoch": 0.48, + "learning_rate": 2.5190015425606506e-05, + "loss": 0.1978, + "step": 17150 + }, + { + "epoch": 0.48, + "learning_rate": 2.5175992146963963e-05, + "loss": 0.2033, + "step": 17200 + }, + { + "epoch": 0.48, + "learning_rate": 2.5161968868321416e-05, + "loss": 0.1854, + "step": 17250 + }, + { + "epoch": 0.49, + "learning_rate": 2.5147945589678866e-05, + "loss": 0.1792, + "step": 17300 + }, + { + "epoch": 0.49, + "learning_rate": 2.5133922311036323e-05, + "loss": 0.1804, + "step": 17350 + }, + { + "epoch": 0.49, + "learning_rate": 2.5119899032393773e-05, + "loss": 0.1901, + "step": 17400 + }, + { + "epoch": 0.49, + "learning_rate": 2.5105875753751226e-05, + "loss": 0.2045, + "step": 17450 + }, + { + "epoch": 0.49, + "learning_rate": 2.5091852475108683e-05, + "loss": 0.1796, + "step": 17500 + }, + { + "epoch": 0.49, + "learning_rate": 2.5077829196466133e-05, + "loss": 0.1763, + "step": 17550 + }, + { + "epoch": 0.49, + "learning_rate": 2.506380591782359e-05, + "loss": 0.1882, + "step": 17600 + }, + { + "epoch": 0.5, + "learning_rate": 2.5049782639181043e-05, + "loss": 0.191, + "step": 17650 + }, + { + "epoch": 0.5, + "learning_rate": 2.5035759360538493e-05, + "loss": 0.1646, + "step": 17700 + }, + { + "epoch": 0.5, + "learning_rate": 2.502173608189595e-05, + "loss": 0.1898, + "step": 17750 + }, + { + "epoch": 0.5, + "learning_rate": 2.50077128032534e-05, + "loss": 0.1717, + "step": 17800 + }, + { + "epoch": 0.5, + "learning_rate": 2.4993689524610853e-05, + "loss": 0.1676, + "step": 17850 + }, + { + "epoch": 0.5, + "learning_rate": 2.497966624596831e-05, + "loss": 0.1978, + "step": 17900 + }, + { + "epoch": 0.5, + "learning_rate": 2.496564296732576e-05, + "loss": 0.216, + "step": 17950 + }, + { + "epoch": 0.5, + "learning_rate": 2.4951619688683216e-05, + "loss": 0.2025, + "step": 18000 + }, + { + "epoch": 0.5, + "eval_bleu": 95.6412, + "eval_gen_len": 64.4067, + "eval_loss": 0.27226048707962036, + "eval_rouge1": 93.2987, + "eval_rouge2": 89.3773, + "eval_rougeL": 93.084, + "eval_rougeLsum": 93.0595, + "eval_runtime": 3091.5775, + "eval_samples_per_second": 0.97, + "eval_steps_per_second": 0.243, + "step": 18000 + }, + { + "epoch": 0.51, + "learning_rate": 2.493759641004067e-05, + "loss": 0.1948, + "step": 18050 + }, + { + "epoch": 0.51, + "learning_rate": 2.492357313139812e-05, + "loss": 0.1896, + "step": 18100 + }, + { + "epoch": 0.51, + "learning_rate": 2.4909549852755576e-05, + "loss": 0.1767, + "step": 18150 + }, + { + "epoch": 0.51, + "learning_rate": 2.4895526574113026e-05, + "loss": 0.2107, + "step": 18200 + }, + { + "epoch": 0.51, + "learning_rate": 2.488150329547048e-05, + "loss": 0.1878, + "step": 18250 + }, + { + "epoch": 0.51, + "learning_rate": 2.4867480016827936e-05, + "loss": 0.1735, + "step": 18300 + }, + { + "epoch": 0.51, + "learning_rate": 2.4853456738185386e-05, + "loss": 0.1607, + "step": 18350 + }, + { + "epoch": 0.52, + "learning_rate": 2.4839433459542843e-05, + "loss": 0.1611, + "step": 18400 + }, + { + "epoch": 0.52, + "learning_rate": 2.4825410180900297e-05, + "loss": 0.1889, + "step": 18450 + }, + { + "epoch": 0.52, + "learning_rate": 2.4811386902257746e-05, + "loss": 0.1962, + "step": 18500 + }, + { + "epoch": 0.52, + "learning_rate": 2.4797363623615203e-05, + "loss": 0.1767, + "step": 18550 + }, + { + "epoch": 0.52, + "learning_rate": 2.4783340344972653e-05, + "loss": 0.2027, + "step": 18600 + }, + { + "epoch": 0.52, + "learning_rate": 2.476931706633011e-05, + "loss": 0.1994, + "step": 18650 + }, + { + "epoch": 0.52, + "learning_rate": 2.4755293787687563e-05, + "loss": 0.1725, + "step": 18700 + }, + { + "epoch": 0.53, + "learning_rate": 2.4741270509045013e-05, + "loss": 0.1931, + "step": 18750 + }, + { + "epoch": 0.53, + "learning_rate": 2.472724723040247e-05, + "loss": 0.1897, + "step": 18800 + }, + { + "epoch": 0.53, + "learning_rate": 2.4713223951759923e-05, + "loss": 0.191, + "step": 18850 + }, + { + "epoch": 0.53, + "learning_rate": 2.4699200673117373e-05, + "loss": 0.1868, + "step": 18900 + }, + { + "epoch": 0.53, + "learning_rate": 2.468517739447483e-05, + "loss": 0.1738, + "step": 18950 + }, + { + "epoch": 0.53, + "learning_rate": 2.467115411583228e-05, + "loss": 0.1808, + "step": 19000 + }, + { + "epoch": 0.53, + "eval_bleu": 95.6749, + "eval_gen_len": 64.317, + "eval_loss": 0.2700382173061371, + "eval_rouge1": 93.4064, + "eval_rouge2": 89.5527, + "eval_rougeL": 93.2047, + "eval_rougeLsum": 93.1846, + "eval_runtime": 3097.9008, + "eval_samples_per_second": 0.968, + "eval_steps_per_second": 0.242, + "step": 19000 + }, + { + "epoch": 0.53, + "learning_rate": 2.4657130837189737e-05, + "loss": 0.1747, + "step": 19050 + }, + { + "epoch": 0.54, + "learning_rate": 2.464310755854719e-05, + "loss": 0.189, + "step": 19100 + }, + { + "epoch": 0.54, + "learning_rate": 2.462908427990464e-05, + "loss": 0.172, + "step": 19150 + }, + { + "epoch": 0.54, + "learning_rate": 2.4615061001262097e-05, + "loss": 0.1793, + "step": 19200 + }, + { + "epoch": 0.54, + "learning_rate": 2.460103772261955e-05, + "loss": 0.1855, + "step": 19250 + }, + { + "epoch": 0.54, + "learning_rate": 2.4587014443977e-05, + "loss": 0.1845, + "step": 19300 + }, + { + "epoch": 0.54, + "learning_rate": 2.4572991165334457e-05, + "loss": 0.1907, + "step": 19350 + }, + { + "epoch": 0.54, + "learning_rate": 2.4558967886691907e-05, + "loss": 0.1713, + "step": 19400 + }, + { + "epoch": 0.55, + "learning_rate": 2.4544944608049364e-05, + "loss": 0.1983, + "step": 19450 + }, + { + "epoch": 0.55, + "learning_rate": 2.4530921329406817e-05, + "loss": 0.1654, + "step": 19500 + }, + { + "epoch": 0.55, + "learning_rate": 2.4516898050764267e-05, + "loss": 0.2049, + "step": 19550 + }, + { + "epoch": 0.55, + "learning_rate": 2.4502874772121724e-05, + "loss": 0.1826, + "step": 19600 + }, + { + "epoch": 0.55, + "learning_rate": 2.4488851493479177e-05, + "loss": 0.1622, + "step": 19650 + }, + { + "epoch": 0.55, + "learning_rate": 2.4474828214836627e-05, + "loss": 0.196, + "step": 19700 + }, + { + "epoch": 0.55, + "learning_rate": 2.4460804936194084e-05, + "loss": 0.1805, + "step": 19750 + }, + { + "epoch": 0.56, + "learning_rate": 2.4446781657551534e-05, + "loss": 0.176, + "step": 19800 + }, + { + "epoch": 0.56, + "learning_rate": 2.443275837890899e-05, + "loss": 0.1756, + "step": 19850 + }, + { + "epoch": 0.56, + "learning_rate": 2.4418735100266444e-05, + "loss": 0.2069, + "step": 19900 + }, + { + "epoch": 0.56, + "learning_rate": 2.4404711821623894e-05, + "loss": 0.2099, + "step": 19950 + }, + { + "epoch": 0.56, + "learning_rate": 2.439068854298135e-05, + "loss": 0.1844, + "step": 20000 + }, + { + "epoch": 0.56, + "eval_bleu": 95.7094, + "eval_gen_len": 64.34, + "eval_loss": 0.2679564356803894, + "eval_rouge1": 93.3936, + "eval_rouge2": 89.5638, + "eval_rougeL": 93.2067, + "eval_rougeLsum": 93.1818, + "eval_runtime": 3170.5236, + "eval_samples_per_second": 0.946, + "eval_steps_per_second": 0.237, + "step": 20000 + }, + { + "epoch": 0.56, + "learning_rate": 2.4376665264338804e-05, + "loss": 0.1718, + "step": 20050 + }, + { + "epoch": 0.56, + "learning_rate": 2.4362641985696257e-05, + "loss": 0.1938, + "step": 20100 + }, + { + "epoch": 0.57, + "learning_rate": 2.434861870705371e-05, + "loss": 0.1808, + "step": 20150 + }, + { + "epoch": 0.57, + "learning_rate": 2.433459542841116e-05, + "loss": 0.1805, + "step": 20200 + }, + { + "epoch": 0.57, + "learning_rate": 2.4320572149768617e-05, + "loss": 0.1761, + "step": 20250 + }, + { + "epoch": 0.57, + "learning_rate": 2.430654887112607e-05, + "loss": 0.1826, + "step": 20300 + }, + { + "epoch": 0.57, + "learning_rate": 2.429252559248352e-05, + "loss": 0.1822, + "step": 20350 + }, + { + "epoch": 0.57, + "learning_rate": 2.4278502313840977e-05, + "loss": 0.1838, + "step": 20400 + }, + { + "epoch": 0.57, + "learning_rate": 2.426447903519843e-05, + "loss": 0.1723, + "step": 20450 + }, + { + "epoch": 0.57, + "learning_rate": 2.4250455756555884e-05, + "loss": 0.1816, + "step": 20500 + }, + { + "epoch": 0.58, + "learning_rate": 2.4236432477913338e-05, + "loss": 0.1973, + "step": 20550 + }, + { + "epoch": 0.58, + "learning_rate": 2.4222409199270788e-05, + "loss": 0.2059, + "step": 20600 + }, + { + "epoch": 0.58, + "learning_rate": 2.4208385920628244e-05, + "loss": 0.1743, + "step": 20650 + }, + { + "epoch": 0.58, + "learning_rate": 2.4194362641985698e-05, + "loss": 0.2011, + "step": 20700 + }, + { + "epoch": 0.58, + "learning_rate": 2.4180339363343148e-05, + "loss": 0.1914, + "step": 20750 + }, + { + "epoch": 0.58, + "learning_rate": 2.4166316084700604e-05, + "loss": 0.1716, + "step": 20800 + }, + { + "epoch": 0.58, + "learning_rate": 2.4152292806058058e-05, + "loss": 0.179, + "step": 20850 + }, + { + "epoch": 0.59, + "learning_rate": 2.413826952741551e-05, + "loss": 0.2066, + "step": 20900 + }, + { + "epoch": 0.59, + "learning_rate": 2.4124246248772964e-05, + "loss": 0.1964, + "step": 20950 + }, + { + "epoch": 0.59, + "learning_rate": 2.4110222970130414e-05, + "loss": 0.1892, + "step": 21000 + }, + { + "epoch": 0.59, + "eval_bleu": 95.7337, + "eval_gen_len": 64.2923, + "eval_loss": 0.2668701708316803, + "eval_rouge1": 93.4236, + "eval_rouge2": 89.643, + "eval_rougeL": 93.2265, + "eval_rougeLsum": 93.2057, + "eval_runtime": 3109.7477, + "eval_samples_per_second": 0.965, + "eval_steps_per_second": 0.241, + "step": 21000 + }, + { + "epoch": 0.59, + "learning_rate": 2.409619969148787e-05, + "loss": 0.1852, + "step": 21050 + }, + { + "epoch": 0.59, + "learning_rate": 2.4082176412845324e-05, + "loss": 0.1833, + "step": 21100 + }, + { + "epoch": 0.59, + "learning_rate": 2.4068153134202774e-05, + "loss": 0.1668, + "step": 21150 + }, + { + "epoch": 0.59, + "learning_rate": 2.405412985556023e-05, + "loss": 0.1971, + "step": 21200 + }, + { + "epoch": 0.6, + "learning_rate": 2.4040106576917685e-05, + "loss": 0.1819, + "step": 21250 + }, + { + "epoch": 0.6, + "learning_rate": 2.4026083298275138e-05, + "loss": 0.1922, + "step": 21300 + }, + { + "epoch": 0.6, + "learning_rate": 2.401206001963259e-05, + "loss": 0.1799, + "step": 21350 + }, + { + "epoch": 0.6, + "learning_rate": 2.399803674099004e-05, + "loss": 0.1892, + "step": 21400 + }, + { + "epoch": 0.6, + "learning_rate": 2.3984013462347498e-05, + "loss": 0.1631, + "step": 21450 + }, + { + "epoch": 0.6, + "learning_rate": 2.396999018370495e-05, + "loss": 0.1728, + "step": 21500 + }, + { + "epoch": 0.6, + "learning_rate": 2.3955966905062405e-05, + "loss": 0.1833, + "step": 21550 + }, + { + "epoch": 0.61, + "learning_rate": 2.3941943626419858e-05, + "loss": 0.1988, + "step": 21600 + }, + { + "epoch": 0.61, + "learning_rate": 2.392792034777731e-05, + "loss": 0.176, + "step": 21650 + }, + { + "epoch": 0.61, + "learning_rate": 2.3913897069134765e-05, + "loss": 0.1664, + "step": 21700 + }, + { + "epoch": 0.61, + "learning_rate": 2.3899873790492218e-05, + "loss": 0.1749, + "step": 21750 + }, + { + "epoch": 0.61, + "learning_rate": 2.3885850511849668e-05, + "loss": 0.1856, + "step": 21800 + }, + { + "epoch": 0.61, + "learning_rate": 2.3871827233207125e-05, + "loss": 0.1845, + "step": 21850 + }, + { + "epoch": 0.61, + "learning_rate": 2.3857803954564578e-05, + "loss": 0.1823, + "step": 21900 + }, + { + "epoch": 0.62, + "learning_rate": 2.384378067592203e-05, + "loss": 0.1882, + "step": 21950 + }, + { + "epoch": 0.62, + "learning_rate": 2.3829757397279485e-05, + "loss": 0.1754, + "step": 22000 + }, + { + "epoch": 0.62, + "eval_bleu": 95.7216, + "eval_gen_len": 64.29, + "eval_loss": 0.26628053188323975, + "eval_rouge1": 93.494, + "eval_rouge2": 89.7041, + "eval_rougeL": 93.2775, + "eval_rougeLsum": 93.255, + "eval_runtime": 3108.636, + "eval_samples_per_second": 0.965, + "eval_steps_per_second": 0.241, + "step": 22000 + }, + { + "epoch": 0.62, + "learning_rate": 2.3815734118636938e-05, + "loss": 0.1727, + "step": 22050 + }, + { + "epoch": 0.62, + "learning_rate": 2.380171083999439e-05, + "loss": 0.2006, + "step": 22100 + }, + { + "epoch": 0.62, + "learning_rate": 2.3787687561351845e-05, + "loss": 0.1731, + "step": 22150 + }, + { + "epoch": 0.62, + "learning_rate": 2.3773664282709295e-05, + "loss": 0.1718, + "step": 22200 + }, + { + "epoch": 0.62, + "learning_rate": 2.3759641004066752e-05, + "loss": 0.1935, + "step": 22250 + }, + { + "epoch": 0.63, + "learning_rate": 2.3745617725424205e-05, + "loss": 0.1855, + "step": 22300 + }, + { + "epoch": 0.63, + "learning_rate": 2.373159444678166e-05, + "loss": 0.1788, + "step": 22350 + }, + { + "epoch": 0.63, + "learning_rate": 2.3717571168139112e-05, + "loss": 0.1722, + "step": 22400 + }, + { + "epoch": 0.63, + "learning_rate": 2.3703547889496565e-05, + "loss": 0.1685, + "step": 22450 + }, + { + "epoch": 0.63, + "learning_rate": 2.368952461085402e-05, + "loss": 0.1998, + "step": 22500 + }, + { + "epoch": 0.63, + "learning_rate": 2.3675501332211472e-05, + "loss": 0.1891, + "step": 22550 + }, + { + "epoch": 0.63, + "learning_rate": 2.3661478053568922e-05, + "loss": 0.1865, + "step": 22600 + }, + { + "epoch": 0.64, + "learning_rate": 2.364745477492638e-05, + "loss": 0.2003, + "step": 22650 + }, + { + "epoch": 0.64, + "learning_rate": 2.3633431496283832e-05, + "loss": 0.1833, + "step": 22700 + }, + { + "epoch": 0.64, + "learning_rate": 2.3619408217641285e-05, + "loss": 0.1709, + "step": 22750 + }, + { + "epoch": 0.64, + "learning_rate": 2.360538493899874e-05, + "loss": 0.1784, + "step": 22800 + }, + { + "epoch": 0.64, + "learning_rate": 2.3591361660356192e-05, + "loss": 0.1976, + "step": 22850 + }, + { + "epoch": 0.64, + "learning_rate": 2.3577338381713645e-05, + "loss": 0.2096, + "step": 22900 + }, + { + "epoch": 0.64, + "learning_rate": 2.35633151030711e-05, + "loss": 0.1795, + "step": 22950 + }, + { + "epoch": 0.65, + "learning_rate": 2.3549291824428552e-05, + "loss": 0.1843, + "step": 23000 + }, + { + "epoch": 0.65, + "eval_bleu": 95.7705, + "eval_gen_len": 64.281, + "eval_loss": 0.263296902179718, + "eval_rouge1": 93.5248, + "eval_rouge2": 89.7445, + "eval_rougeL": 93.3203, + "eval_rougeLsum": 93.3217, + "eval_runtime": 3090.4348, + "eval_samples_per_second": 0.971, + "eval_steps_per_second": 0.243, + "step": 23000 + }, + { + "epoch": 0.65, + "learning_rate": 2.3535268545786005e-05, + "loss": 0.171, + "step": 23050 + }, + { + "epoch": 0.65, + "learning_rate": 2.352124526714346e-05, + "loss": 0.1777, + "step": 23100 + }, + { + "epoch": 0.65, + "learning_rate": 2.3507221988500912e-05, + "loss": 0.2083, + "step": 23150 + }, + { + "epoch": 0.65, + "learning_rate": 2.3493198709858365e-05, + "loss": 0.163, + "step": 23200 + }, + { + "epoch": 0.65, + "learning_rate": 2.347917543121582e-05, + "loss": 0.1884, + "step": 23250 + }, + { + "epoch": 0.65, + "learning_rate": 2.3465152152573272e-05, + "loss": 0.1772, + "step": 23300 + }, + { + "epoch": 0.65, + "learning_rate": 2.3451128873930726e-05, + "loss": 0.1746, + "step": 23350 + }, + { + "epoch": 0.66, + "learning_rate": 2.343710559528818e-05, + "loss": 0.1971, + "step": 23400 + }, + { + "epoch": 0.66, + "learning_rate": 2.3423082316645632e-05, + "loss": 0.1697, + "step": 23450 + }, + { + "epoch": 0.66, + "learning_rate": 2.3409059038003086e-05, + "loss": 0.1635, + "step": 23500 + }, + { + "epoch": 0.66, + "learning_rate": 2.339503575936054e-05, + "loss": 0.1725, + "step": 23550 + }, + { + "epoch": 0.66, + "learning_rate": 2.3381012480717992e-05, + "loss": 0.1669, + "step": 23600 + }, + { + "epoch": 0.66, + "learning_rate": 2.3366989202075446e-05, + "loss": 0.1871, + "step": 23650 + }, + { + "epoch": 0.66, + "learning_rate": 2.33529659234329e-05, + "loss": 0.1641, + "step": 23700 + }, + { + "epoch": 0.67, + "learning_rate": 2.3338942644790352e-05, + "loss": 0.1909, + "step": 23750 + }, + { + "epoch": 0.67, + "learning_rate": 2.3324919366147806e-05, + "loss": 0.1782, + "step": 23800 + }, + { + "epoch": 0.67, + "learning_rate": 2.331089608750526e-05, + "loss": 0.1802, + "step": 23850 + }, + { + "epoch": 0.67, + "learning_rate": 2.3296872808862713e-05, + "loss": 0.1669, + "step": 23900 + }, + { + "epoch": 0.67, + "learning_rate": 2.3282849530220166e-05, + "loss": 0.1745, + "step": 23950 + }, + { + "epoch": 0.67, + "learning_rate": 2.326882625157762e-05, + "loss": 0.1607, + "step": 24000 + }, + { + "epoch": 0.67, + "eval_bleu": 95.7802, + "eval_gen_len": 64.2037, + "eval_loss": 0.26373326778411865, + "eval_rouge1": 93.5949, + "eval_rouge2": 89.9557, + "eval_rougeL": 93.4252, + "eval_rougeLsum": 93.3955, + "eval_runtime": 3090.8367, + "eval_samples_per_second": 0.971, + "eval_steps_per_second": 0.243, + "step": 24000 + }, + { + "epoch": 0.67, + "learning_rate": 2.3254802972935073e-05, + "loss": 0.1758, + "step": 24050 + }, + { + "epoch": 0.68, + "learning_rate": 2.3240779694292526e-05, + "loss": 0.1948, + "step": 24100 + }, + { + "epoch": 0.68, + "learning_rate": 2.322675641564998e-05, + "loss": 0.1836, + "step": 24150 + }, + { + "epoch": 0.68, + "learning_rate": 2.3212733137007433e-05, + "loss": 0.1999, + "step": 24200 + }, + { + "epoch": 0.68, + "learning_rate": 2.3198709858364886e-05, + "loss": 0.181, + "step": 24250 + }, + { + "epoch": 0.68, + "learning_rate": 2.318468657972234e-05, + "loss": 0.1729, + "step": 24300 + }, + { + "epoch": 0.68, + "learning_rate": 2.3170663301079793e-05, + "loss": 0.1814, + "step": 24350 + }, + { + "epoch": 0.68, + "learning_rate": 2.3156640022437246e-05, + "loss": 0.1751, + "step": 24400 + }, + { + "epoch": 0.69, + "learning_rate": 2.3142616743794703e-05, + "loss": 0.1655, + "step": 24450 + }, + { + "epoch": 0.69, + "learning_rate": 2.3128593465152153e-05, + "loss": 0.1742, + "step": 24500 + }, + { + "epoch": 0.69, + "learning_rate": 2.3114570186509606e-05, + "loss": 0.154, + "step": 24550 + }, + { + "epoch": 0.69, + "learning_rate": 2.310054690786706e-05, + "loss": 0.1758, + "step": 24600 + }, + { + "epoch": 0.69, + "learning_rate": 2.3086523629224513e-05, + "loss": 0.1921, + "step": 24650 + }, + { + "epoch": 0.69, + "learning_rate": 2.3072500350581966e-05, + "loss": 0.1803, + "step": 24700 + }, + { + "epoch": 0.69, + "learning_rate": 2.305847707193942e-05, + "loss": 0.2012, + "step": 24750 + }, + { + "epoch": 0.7, + "learning_rate": 2.3044453793296873e-05, + "loss": 0.163, + "step": 24800 + }, + { + "epoch": 0.7, + "learning_rate": 2.303043051465433e-05, + "loss": 0.1679, + "step": 24850 + }, + { + "epoch": 0.7, + "learning_rate": 2.301640723601178e-05, + "loss": 0.1846, + "step": 24900 + }, + { + "epoch": 0.7, + "learning_rate": 2.3002383957369233e-05, + "loss": 0.1813, + "step": 24950 + }, + { + "epoch": 0.7, + "learning_rate": 2.2988360678726686e-05, + "loss": 0.1583, + "step": 25000 + }, + { + "epoch": 0.7, + "eval_bleu": 95.7853, + "eval_gen_len": 64.1823, + "eval_loss": 0.26211991906166077, + "eval_rouge1": 93.6077, + "eval_rouge2": 89.8774, + "eval_rougeL": 93.4105, + "eval_rougeLsum": 93.3911, + "eval_runtime": 3088.3555, + "eval_samples_per_second": 0.971, + "eval_steps_per_second": 0.243, + "step": 25000 + }, + { + "epoch": 0.7, + "learning_rate": 2.297433740008414e-05, + "loss": 0.203, + "step": 25050 + }, + { + "epoch": 0.7, + "learning_rate": 2.2960314121441593e-05, + "loss": 0.1676, + "step": 25100 + }, + { + "epoch": 0.71, + "learning_rate": 2.2946290842799046e-05, + "loss": 0.1866, + "step": 25150 + }, + { + "epoch": 0.71, + "learning_rate": 2.29322675641565e-05, + "loss": 0.1928, + "step": 25200 + }, + { + "epoch": 0.71, + "learning_rate": 2.2918244285513957e-05, + "loss": 0.1836, + "step": 25250 + }, + { + "epoch": 0.71, + "learning_rate": 2.2904221006871407e-05, + "loss": 0.1706, + "step": 25300 + }, + { + "epoch": 0.71, + "learning_rate": 2.289019772822886e-05, + "loss": 0.2018, + "step": 25350 + }, + { + "epoch": 0.71, + "learning_rate": 2.2876174449586313e-05, + "loss": 0.1771, + "step": 25400 + }, + { + "epoch": 0.71, + "learning_rate": 2.2862151170943767e-05, + "loss": 0.1808, + "step": 25450 + }, + { + "epoch": 0.72, + "learning_rate": 2.284812789230122e-05, + "loss": 0.1968, + "step": 25500 + }, + { + "epoch": 0.72, + "learning_rate": 2.2834104613658673e-05, + "loss": 0.1776, + "step": 25550 + }, + { + "epoch": 0.72, + "learning_rate": 2.2820081335016127e-05, + "loss": 0.1657, + "step": 25600 + }, + { + "epoch": 0.72, + "learning_rate": 2.2806058056373583e-05, + "loss": 0.1627, + "step": 25650 + }, + { + "epoch": 0.72, + "learning_rate": 2.2792034777731033e-05, + "loss": 0.1687, + "step": 25700 + }, + { + "epoch": 0.72, + "learning_rate": 2.2778011499088487e-05, + "loss": 0.1691, + "step": 25750 + }, + { + "epoch": 0.72, + "learning_rate": 2.276398822044594e-05, + "loss": 0.1875, + "step": 25800 + }, + { + "epoch": 0.72, + "learning_rate": 2.2749964941803393e-05, + "loss": 0.1757, + "step": 25850 + }, + { + "epoch": 0.73, + "learning_rate": 2.273594166316085e-05, + "loss": 0.2009, + "step": 25900 + }, + { + "epoch": 0.73, + "learning_rate": 2.27219183845183e-05, + "loss": 0.1762, + "step": 25950 + }, + { + "epoch": 0.73, + "learning_rate": 2.2707895105875754e-05, + "loss": 0.1533, + "step": 26000 + }, + { + "epoch": 0.73, + "eval_bleu": 95.8266, + "eval_gen_len": 64.2703, + "eval_loss": 0.26142334938049316, + "eval_rouge1": 93.5866, + "eval_rouge2": 89.888, + "eval_rougeL": 93.3841, + "eval_rougeLsum": 93.3821, + "eval_runtime": 3090.7679, + "eval_samples_per_second": 0.971, + "eval_steps_per_second": 0.243, + "step": 26000 + }, + { + "epoch": 0.73, + "learning_rate": 2.269387182723321e-05, + "loss": 0.1756, + "step": 26050 + }, + { + "epoch": 0.73, + "learning_rate": 2.267984854859066e-05, + "loss": 0.1793, + "step": 26100 + }, + { + "epoch": 0.73, + "learning_rate": 2.2665825269948114e-05, + "loss": 0.1646, + "step": 26150 + }, + { + "epoch": 0.73, + "learning_rate": 2.2651801991305567e-05, + "loss": 0.1537, + "step": 26200 + }, + { + "epoch": 0.74, + "learning_rate": 2.263777871266302e-05, + "loss": 0.1593, + "step": 26250 + }, + { + "epoch": 0.74, + "learning_rate": 2.2623755434020477e-05, + "loss": 0.182, + "step": 26300 + }, + { + "epoch": 0.74, + "learning_rate": 2.2609732155377927e-05, + "loss": 0.1721, + "step": 26350 + }, + { + "epoch": 0.74, + "learning_rate": 2.259570887673538e-05, + "loss": 0.1922, + "step": 26400 + }, + { + "epoch": 0.74, + "learning_rate": 2.2581685598092837e-05, + "loss": 0.1703, + "step": 26450 + }, + { + "epoch": 0.74, + "learning_rate": 2.2567662319450287e-05, + "loss": 0.1731, + "step": 26500 + }, + { + "epoch": 0.74, + "learning_rate": 2.255363904080774e-05, + "loss": 0.1689, + "step": 26550 + }, + { + "epoch": 0.75, + "learning_rate": 2.2539615762165194e-05, + "loss": 0.1537, + "step": 26600 + }, + { + "epoch": 0.75, + "learning_rate": 2.2525592483522647e-05, + "loss": 0.161, + "step": 26650 + }, + { + "epoch": 0.75, + "learning_rate": 2.2511569204880104e-05, + "loss": 0.1812, + "step": 26700 + }, + { + "epoch": 0.75, + "learning_rate": 2.2497545926237554e-05, + "loss": 0.188, + "step": 26750 + }, + { + "epoch": 0.75, + "learning_rate": 2.2483522647595007e-05, + "loss": 0.2057, + "step": 26800 + }, + { + "epoch": 0.75, + "learning_rate": 2.2469499368952464e-05, + "loss": 0.1778, + "step": 26850 + }, + { + "epoch": 0.75, + "learning_rate": 2.2455476090309914e-05, + "loss": 0.1757, + "step": 26900 + }, + { + "epoch": 0.76, + "learning_rate": 2.2441452811667367e-05, + "loss": 0.196, + "step": 26950 + }, + { + "epoch": 0.76, + "learning_rate": 2.242742953302482e-05, + "loss": 0.1552, + "step": 27000 + }, + { + "epoch": 0.76, + "eval_bleu": 95.8418, + "eval_gen_len": 64.2587, + "eval_loss": 0.2597999572753906, + "eval_rouge1": 93.5569, + "eval_rouge2": 89.8854, + "eval_rougeL": 93.3668, + "eval_rougeLsum": 93.3569, + "eval_runtime": 3122.2549, + "eval_samples_per_second": 0.961, + "eval_steps_per_second": 0.24, + "step": 27000 + }, + { + "epoch": 0.76, + "learning_rate": 2.2413406254382274e-05, + "loss": 0.1963, + "step": 27050 + }, + { + "epoch": 0.76, + "learning_rate": 2.239938297573973e-05, + "loss": 0.1837, + "step": 27100 + }, + { + "epoch": 0.76, + "learning_rate": 2.238535969709718e-05, + "loss": 0.1722, + "step": 27150 + }, + { + "epoch": 0.76, + "learning_rate": 2.2371336418454634e-05, + "loss": 0.1698, + "step": 27200 + }, + { + "epoch": 0.76, + "learning_rate": 2.235731313981209e-05, + "loss": 0.1932, + "step": 27250 + }, + { + "epoch": 0.77, + "learning_rate": 2.234328986116954e-05, + "loss": 0.1519, + "step": 27300 + }, + { + "epoch": 0.77, + "learning_rate": 2.2329266582526998e-05, + "loss": 0.1744, + "step": 27350 + }, + { + "epoch": 0.77, + "learning_rate": 2.2315243303884448e-05, + "loss": 0.1672, + "step": 27400 + }, + { + "epoch": 0.77, + "learning_rate": 2.23012200252419e-05, + "loss": 0.1641, + "step": 27450 + }, + { + "epoch": 0.77, + "learning_rate": 2.2287196746599358e-05, + "loss": 0.1815, + "step": 27500 + }, + { + "epoch": 0.77, + "learning_rate": 2.2273173467956808e-05, + "loss": 0.1567, + "step": 27550 + }, + { + "epoch": 0.77, + "learning_rate": 2.225915018931426e-05, + "loss": 0.1621, + "step": 27600 + }, + { + "epoch": 0.78, + "learning_rate": 2.2245126910671718e-05, + "loss": 0.1489, + "step": 27650 + }, + { + "epoch": 0.78, + "learning_rate": 2.2231103632029168e-05, + "loss": 0.1692, + "step": 27700 + }, + { + "epoch": 0.78, + "learning_rate": 2.2217080353386624e-05, + "loss": 0.1856, + "step": 27750 + }, + { + "epoch": 0.78, + "learning_rate": 2.2203057074744074e-05, + "loss": 0.169, + "step": 27800 + }, + { + "epoch": 0.78, + "learning_rate": 2.2189033796101528e-05, + "loss": 0.1749, + "step": 27850 + }, + { + "epoch": 0.78, + "learning_rate": 2.2175010517458985e-05, + "loss": 0.1764, + "step": 27900 + }, + { + "epoch": 0.78, + "learning_rate": 2.2160987238816434e-05, + "loss": 0.1578, + "step": 27950 + }, + { + "epoch": 0.79, + "learning_rate": 2.2146963960173888e-05, + "loss": 0.1749, + "step": 28000 + }, + { + "epoch": 0.79, + "eval_bleu": 95.8187, + "eval_gen_len": 64.232, + "eval_loss": 0.25857558846473694, + "eval_rouge1": 93.7097, + "eval_rouge2": 89.9991, + "eval_rougeL": 93.5074, + "eval_rougeLsum": 93.4927, + "eval_runtime": 3115.4186, + "eval_samples_per_second": 0.963, + "eval_steps_per_second": 0.241, + "step": 28000 + }, + { + "epoch": 0.79, + "learning_rate": 2.2132940681531345e-05, + "loss": 0.1808, + "step": 28050 + }, + { + "epoch": 0.79, + "learning_rate": 2.2118917402888795e-05, + "loss": 0.1781, + "step": 28100 + }, + { + "epoch": 0.79, + "learning_rate": 2.210489412424625e-05, + "loss": 0.1684, + "step": 28150 + }, + { + "epoch": 0.79, + "learning_rate": 2.20908708456037e-05, + "loss": 0.1749, + "step": 28200 + }, + { + "epoch": 0.79, + "learning_rate": 2.2076847566961155e-05, + "loss": 0.1758, + "step": 28250 + }, + { + "epoch": 0.79, + "learning_rate": 2.206282428831861e-05, + "loss": 0.1811, + "step": 28300 + }, + { + "epoch": 0.8, + "learning_rate": 2.204880100967606e-05, + "loss": 0.1652, + "step": 28350 + }, + { + "epoch": 0.8, + "learning_rate": 2.2034777731033515e-05, + "loss": 0.1735, + "step": 28400 + }, + { + "epoch": 0.8, + "learning_rate": 2.202075445239097e-05, + "loss": 0.1735, + "step": 28450 + }, + { + "epoch": 0.8, + "learning_rate": 2.200673117374842e-05, + "loss": 0.1881, + "step": 28500 + }, + { + "epoch": 0.8, + "learning_rate": 2.1992707895105878e-05, + "loss": 0.1892, + "step": 28550 + }, + { + "epoch": 0.8, + "learning_rate": 2.1978684616463328e-05, + "loss": 0.1665, + "step": 28600 + }, + { + "epoch": 0.8, + "learning_rate": 2.196466133782078e-05, + "loss": 0.1866, + "step": 28650 + }, + { + "epoch": 0.8, + "learning_rate": 2.1950638059178238e-05, + "loss": 0.1756, + "step": 28700 + }, + { + "epoch": 0.81, + "learning_rate": 2.1936614780535688e-05, + "loss": 0.1945, + "step": 28750 + }, + { + "epoch": 0.81, + "learning_rate": 2.1922591501893145e-05, + "loss": 0.1882, + "step": 28800 + }, + { + "epoch": 0.81, + "learning_rate": 2.1908568223250598e-05, + "loss": 0.1693, + "step": 28850 + }, + { + "epoch": 0.81, + "learning_rate": 2.1894544944608048e-05, + "loss": 0.1653, + "step": 28900 + }, + { + "epoch": 0.81, + "learning_rate": 2.1880521665965505e-05, + "loss": 0.1688, + "step": 28950 + }, + { + "epoch": 0.81, + "learning_rate": 2.1866498387322955e-05, + "loss": 0.1725, + "step": 29000 + }, + { + "epoch": 0.81, + "eval_bleu": 95.8531, + "eval_gen_len": 64.207, + "eval_loss": 0.25807130336761475, + "eval_rouge1": 93.6865, + "eval_rouge2": 90.02, + "eval_rougeL": 93.4949, + "eval_rougeLsum": 93.4886, + "eval_runtime": 3103.2566, + "eval_samples_per_second": 0.967, + "eval_steps_per_second": 0.242, + "step": 29000 + }, + { + "epoch": 0.81, + "learning_rate": 2.185247510868041e-05, + "loss": 0.1597, + "step": 29050 + }, + { + "epoch": 0.82, + "learning_rate": 2.1838451830037865e-05, + "loss": 0.1639, + "step": 29100 + }, + { + "epoch": 0.82, + "learning_rate": 2.1824428551395315e-05, + "loss": 0.1748, + "step": 29150 + }, + { + "epoch": 0.82, + "learning_rate": 2.1810405272752772e-05, + "loss": 0.1734, + "step": 29200 + }, + { + "epoch": 0.82, + "learning_rate": 2.1796381994110225e-05, + "loss": 0.1783, + "step": 29250 + }, + { + "epoch": 0.82, + "learning_rate": 2.1782358715467675e-05, + "loss": 0.1872, + "step": 29300 + }, + { + "epoch": 0.82, + "learning_rate": 2.1768335436825132e-05, + "loss": 0.1727, + "step": 29350 + }, + { + "epoch": 0.82, + "learning_rate": 2.1754312158182582e-05, + "loss": 0.1607, + "step": 29400 + }, + { + "epoch": 0.83, + "learning_rate": 2.1740288879540035e-05, + "loss": 0.1702, + "step": 29450 + }, + { + "epoch": 0.83, + "learning_rate": 2.1726265600897492e-05, + "loss": 0.162, + "step": 29500 + }, + { + "epoch": 0.83, + "learning_rate": 2.1712242322254942e-05, + "loss": 0.2007, + "step": 29550 + }, + { + "epoch": 0.83, + "learning_rate": 2.16982190436124e-05, + "loss": 0.2098, + "step": 29600 + }, + { + "epoch": 0.83, + "learning_rate": 2.1684195764969852e-05, + "loss": 0.1707, + "step": 29650 + }, + { + "epoch": 0.83, + "learning_rate": 2.1670172486327302e-05, + "loss": 0.1658, + "step": 29700 + }, + { + "epoch": 0.83, + "learning_rate": 2.165614920768476e-05, + "loss": 0.1736, + "step": 29750 + }, + { + "epoch": 0.84, + "learning_rate": 2.164212592904221e-05, + "loss": 0.1759, + "step": 29800 + }, + { + "epoch": 0.84, + "learning_rate": 2.1628102650399662e-05, + "loss": 0.1915, + "step": 29850 + }, + { + "epoch": 0.84, + "learning_rate": 2.161407937175712e-05, + "loss": 0.1755, + "step": 29900 + }, + { + "epoch": 0.84, + "learning_rate": 2.160005609311457e-05, + "loss": 0.171, + "step": 29950 + }, + { + "epoch": 0.84, + "learning_rate": 2.1586032814472026e-05, + "loss": 0.1917, + "step": 30000 + }, + { + "epoch": 0.84, + "eval_bleu": 95.8729, + "eval_gen_len": 64.2307, + "eval_loss": 0.2573055922985077, + "eval_rouge1": 93.7203, + "eval_rouge2": 90.0218, + "eval_rougeL": 93.5266, + "eval_rougeLsum": 93.517, + "eval_runtime": 3112.1338, + "eval_samples_per_second": 0.964, + "eval_steps_per_second": 0.241, + "step": 30000 + }, + { + "epoch": 0.84, + "learning_rate": 2.157200953582948e-05, + "loss": 0.165, + "step": 30050 + }, + { + "epoch": 0.84, + "learning_rate": 2.155798625718693e-05, + "loss": 0.1712, + "step": 30100 + }, + { + "epoch": 0.85, + "learning_rate": 2.1543962978544386e-05, + "loss": 0.1821, + "step": 30150 + }, + { + "epoch": 0.85, + "learning_rate": 2.1529939699901836e-05, + "loss": 0.1696, + "step": 30200 + }, + { + "epoch": 0.85, + "learning_rate": 2.1515916421259292e-05, + "loss": 0.1666, + "step": 30250 + }, + { + "epoch": 0.85, + "learning_rate": 2.1501893142616746e-05, + "loss": 0.1935, + "step": 30300 + }, + { + "epoch": 0.85, + "learning_rate": 2.1487869863974196e-05, + "loss": 0.1819, + "step": 30350 + }, + { + "epoch": 0.85, + "learning_rate": 2.1473846585331652e-05, + "loss": 0.1606, + "step": 30400 + }, + { + "epoch": 0.85, + "learning_rate": 2.1459823306689106e-05, + "loss": 0.1591, + "step": 30450 + }, + { + "epoch": 0.86, + "learning_rate": 2.1445800028046556e-05, + "loss": 0.1602, + "step": 30500 + }, + { + "epoch": 0.86, + "learning_rate": 2.1431776749404012e-05, + "loss": 0.1569, + "step": 30550 + }, + { + "epoch": 0.86, + "learning_rate": 2.1417753470761462e-05, + "loss": 0.1721, + "step": 30600 + }, + { + "epoch": 0.86, + "learning_rate": 2.140373019211892e-05, + "loss": 0.1757, + "step": 30650 + }, + { + "epoch": 0.86, + "learning_rate": 2.1389706913476373e-05, + "loss": 0.192, + "step": 30700 + }, + { + "epoch": 0.86, + "learning_rate": 2.1375683634833823e-05, + "loss": 0.1957, + "step": 30750 + }, + { + "epoch": 0.86, + "learning_rate": 2.136166035619128e-05, + "loss": 0.1645, + "step": 30800 + }, + { + "epoch": 0.87, + "learning_rate": 2.1347637077548733e-05, + "loss": 0.1751, + "step": 30850 + }, + { + "epoch": 0.87, + "learning_rate": 2.1333613798906183e-05, + "loss": 0.1767, + "step": 30900 + }, + { + "epoch": 0.87, + "learning_rate": 2.131959052026364e-05, + "loss": 0.1697, + "step": 30950 + }, + { + "epoch": 0.87, + "learning_rate": 2.130556724162109e-05, + "loss": 0.1897, + "step": 31000 + }, + { + "epoch": 0.87, + "eval_bleu": 95.8468, + "eval_gen_len": 64.2037, + "eval_loss": 0.25732484459877014, + "eval_rouge1": 93.6811, + "eval_rouge2": 89.9877, + "eval_rougeL": 93.478, + "eval_rougeLsum": 93.4745, + "eval_runtime": 3156.1925, + "eval_samples_per_second": 0.951, + "eval_steps_per_second": 0.238, + "step": 31000 + }, + { + "epoch": 0.87, + "learning_rate": 2.1291543962978546e-05, + "loss": 0.1742, + "step": 31050 + }, + { + "epoch": 0.87, + "learning_rate": 2.1277520684336e-05, + "loss": 0.1663, + "step": 31100 + }, + { + "epoch": 0.87, + "learning_rate": 2.126349740569345e-05, + "loss": 0.1831, + "step": 31150 + }, + { + "epoch": 0.88, + "learning_rate": 2.1249474127050906e-05, + "loss": 0.1653, + "step": 31200 + }, + { + "epoch": 0.88, + "learning_rate": 2.123545084840836e-05, + "loss": 0.1739, + "step": 31250 + }, + { + "epoch": 0.88, + "learning_rate": 2.122142756976581e-05, + "loss": 0.1741, + "step": 31300 + }, + { + "epoch": 0.88, + "learning_rate": 2.1207404291123266e-05, + "loss": 0.1755, + "step": 31350 + }, + { + "epoch": 0.88, + "learning_rate": 2.1193381012480716e-05, + "loss": 0.164, + "step": 31400 + }, + { + "epoch": 0.88, + "learning_rate": 2.1179357733838173e-05, + "loss": 0.1624, + "step": 31450 + }, + { + "epoch": 0.88, + "learning_rate": 2.1165334455195626e-05, + "loss": 0.1716, + "step": 31500 + }, + { + "epoch": 0.88, + "learning_rate": 2.1151311176553076e-05, + "loss": 0.1759, + "step": 31550 + }, + { + "epoch": 0.89, + "learning_rate": 2.1137287897910533e-05, + "loss": 0.1845, + "step": 31600 + }, + { + "epoch": 0.89, + "learning_rate": 2.1123264619267986e-05, + "loss": 0.1637, + "step": 31650 + }, + { + "epoch": 0.89, + "learning_rate": 2.110924134062544e-05, + "loss": 0.1939, + "step": 31700 + }, + { + "epoch": 0.89, + "learning_rate": 2.1095218061982893e-05, + "loss": 0.1747, + "step": 31750 + }, + { + "epoch": 0.89, + "learning_rate": 2.1081194783340343e-05, + "loss": 0.1727, + "step": 31800 + }, + { + "epoch": 0.89, + "learning_rate": 2.10671715046978e-05, + "loss": 0.1626, + "step": 31850 + }, + { + "epoch": 0.89, + "learning_rate": 2.1053148226055253e-05, + "loss": 0.1885, + "step": 31900 + }, + { + "epoch": 0.9, + "learning_rate": 2.1039124947412703e-05, + "loss": 0.1812, + "step": 31950 + }, + { + "epoch": 0.9, + "learning_rate": 2.102510166877016e-05, + "loss": 0.1925, + "step": 32000 + }, + { + "epoch": 0.9, + "eval_bleu": 95.8275, + "eval_gen_len": 64.1743, + "eval_loss": 0.2557416260242462, + "eval_rouge1": 93.6863, + "eval_rouge2": 89.9362, + "eval_rougeL": 93.482, + "eval_rougeLsum": 93.4648, + "eval_runtime": 3095.3083, + "eval_samples_per_second": 0.969, + "eval_steps_per_second": 0.242, + "step": 32000 + }, + { + "epoch": 0.9, + "learning_rate": 2.1011078390127613e-05, + "loss": 0.1858, + "step": 32050 + }, + { + "epoch": 0.9, + "learning_rate": 2.0997055111485067e-05, + "loss": 0.1588, + "step": 32100 + }, + { + "epoch": 0.9, + "learning_rate": 2.098303183284252e-05, + "loss": 0.1797, + "step": 32150 + }, + { + "epoch": 0.9, + "learning_rate": 2.096900855419997e-05, + "loss": 0.1846, + "step": 32200 + }, + { + "epoch": 0.9, + "learning_rate": 2.0954985275557427e-05, + "loss": 0.185, + "step": 32250 + }, + { + "epoch": 0.91, + "learning_rate": 2.094096199691488e-05, + "loss": 0.1565, + "step": 32300 + }, + { + "epoch": 0.91, + "learning_rate": 2.092693871827233e-05, + "loss": 0.2177, + "step": 32350 + }, + { + "epoch": 0.91, + "learning_rate": 2.0912915439629787e-05, + "loss": 0.1861, + "step": 32400 + }, + { + "epoch": 0.91, + "learning_rate": 2.089889216098724e-05, + "loss": 0.1667, + "step": 32450 + }, + { + "epoch": 0.91, + "learning_rate": 2.0884868882344693e-05, + "loss": 0.1788, + "step": 32500 + }, + { + "epoch": 0.91, + "learning_rate": 2.0870845603702147e-05, + "loss": 0.2086, + "step": 32550 + }, + { + "epoch": 0.91, + "learning_rate": 2.0856822325059597e-05, + "loss": 0.1607, + "step": 32600 + }, + { + "epoch": 0.92, + "learning_rate": 2.0842799046417053e-05, + "loss": 0.1655, + "step": 32650 + }, + { + "epoch": 0.92, + "learning_rate": 2.0828775767774507e-05, + "loss": 0.1798, + "step": 32700 + }, + { + "epoch": 0.92, + "learning_rate": 2.0814752489131957e-05, + "loss": 0.1953, + "step": 32750 + }, + { + "epoch": 0.92, + "learning_rate": 2.0800729210489414e-05, + "loss": 0.166, + "step": 32800 + }, + { + "epoch": 0.92, + "learning_rate": 2.0786705931846867e-05, + "loss": 0.1718, + "step": 32850 + }, + { + "epoch": 0.92, + "learning_rate": 2.077268265320432e-05, + "loss": 0.1888, + "step": 32900 + }, + { + "epoch": 0.92, + "learning_rate": 2.0758659374561774e-05, + "loss": 0.1788, + "step": 32950 + }, + { + "epoch": 0.93, + "learning_rate": 2.0744636095919224e-05, + "loss": 0.1853, + "step": 33000 + }, + { + "epoch": 0.93, + "eval_bleu": 95.8702, + "eval_gen_len": 64.183, + "eval_loss": 0.2551117241382599, + "eval_rouge1": 93.7191, + "eval_rouge2": 90.0474, + "eval_rougeL": 93.5286, + "eval_rougeLsum": 93.516, + "eval_runtime": 3134.3467, + "eval_samples_per_second": 0.957, + "eval_steps_per_second": 0.239, + "step": 33000 + }, + { + "epoch": 0.93, + "learning_rate": 2.073061281727668e-05, + "loss": 0.1964, + "step": 33050 + }, + { + "epoch": 0.93, + "learning_rate": 2.0716589538634134e-05, + "loss": 0.178, + "step": 33100 + }, + { + "epoch": 0.93, + "learning_rate": 2.0702566259991587e-05, + "loss": 0.1911, + "step": 33150 + }, + { + "epoch": 0.93, + "learning_rate": 2.068854298134904e-05, + "loss": 0.1705, + "step": 33200 + }, + { + "epoch": 0.93, + "learning_rate": 2.0674519702706494e-05, + "loss": 0.1835, + "step": 33250 + }, + { + "epoch": 0.93, + "learning_rate": 2.0660496424063947e-05, + "loss": 0.1617, + "step": 33300 + }, + { + "epoch": 0.94, + "learning_rate": 2.06464731454214e-05, + "loss": 0.169, + "step": 33350 + }, + { + "epoch": 0.94, + "learning_rate": 2.063244986677885e-05, + "loss": 0.1738, + "step": 33400 + }, + { + "epoch": 0.94, + "learning_rate": 2.0618426588136307e-05, + "loss": 0.1648, + "step": 33450 + }, + { + "epoch": 0.94, + "learning_rate": 2.060440330949376e-05, + "loss": 0.1822, + "step": 33500 + }, + { + "epoch": 0.94, + "learning_rate": 2.0590380030851214e-05, + "loss": 0.1719, + "step": 33550 + }, + { + "epoch": 0.94, + "learning_rate": 2.0576356752208667e-05, + "loss": 0.1899, + "step": 33600 + }, + { + "epoch": 0.94, + "learning_rate": 2.056233347356612e-05, + "loss": 0.1754, + "step": 33650 + }, + { + "epoch": 0.95, + "learning_rate": 2.0548310194923574e-05, + "loss": 0.1693, + "step": 33700 + }, + { + "epoch": 0.95, + "learning_rate": 2.0534286916281027e-05, + "loss": 0.1578, + "step": 33750 + }, + { + "epoch": 0.95, + "learning_rate": 2.0520263637638477e-05, + "loss": 0.1757, + "step": 33800 + }, + { + "epoch": 0.95, + "learning_rate": 2.0506240358995934e-05, + "loss": 0.1735, + "step": 33850 + }, + { + "epoch": 0.95, + "learning_rate": 2.0492217080353387e-05, + "loss": 0.1732, + "step": 33900 + }, + { + "epoch": 0.95, + "learning_rate": 2.047819380171084e-05, + "loss": 0.1645, + "step": 33950 + }, + { + "epoch": 0.95, + "learning_rate": 2.0464170523068294e-05, + "loss": 0.166, + "step": 34000 + }, + { + "epoch": 0.95, + "eval_bleu": 95.8966, + "eval_gen_len": 64.227, + "eval_loss": 0.2542245090007782, + "eval_rouge1": 93.6823, + "eval_rouge2": 89.9822, + "eval_rougeL": 93.4675, + "eval_rougeLsum": 93.4534, + "eval_runtime": 3105.0348, + "eval_samples_per_second": 0.966, + "eval_steps_per_second": 0.242, + "step": 34000 + }, + { + "epoch": 0.95, + "learning_rate": 2.0450147244425747e-05, + "loss": 0.1736, + "step": 34050 + }, + { + "epoch": 0.96, + "learning_rate": 2.04361239657832e-05, + "loss": 0.1888, + "step": 34100 + }, + { + "epoch": 0.96, + "learning_rate": 2.0422100687140654e-05, + "loss": 0.1701, + "step": 34150 + }, + { + "epoch": 0.96, + "learning_rate": 2.0408077408498108e-05, + "loss": 0.151, + "step": 34200 + }, + { + "epoch": 0.96, + "learning_rate": 2.039405412985556e-05, + "loss": 0.1776, + "step": 34250 + }, + { + "epoch": 0.96, + "learning_rate": 2.0380030851213014e-05, + "loss": 0.1928, + "step": 34300 + }, + { + "epoch": 0.96, + "learning_rate": 2.0366007572570468e-05, + "loss": 0.1762, + "step": 34350 + }, + { + "epoch": 0.96, + "learning_rate": 2.035198429392792e-05, + "loss": 0.1631, + "step": 34400 + }, + { + "epoch": 0.97, + "learning_rate": 2.0337961015285374e-05, + "loss": 0.1913, + "step": 34450 + }, + { + "epoch": 0.97, + "learning_rate": 2.0323937736642828e-05, + "loss": 0.1662, + "step": 34500 + }, + { + "epoch": 0.97, + "learning_rate": 2.030991445800028e-05, + "loss": 0.1691, + "step": 34550 + }, + { + "epoch": 0.97, + "learning_rate": 2.0295891179357734e-05, + "loss": 0.1664, + "step": 34600 + }, + { + "epoch": 0.97, + "learning_rate": 2.0281867900715188e-05, + "loss": 0.1526, + "step": 34650 + }, + { + "epoch": 0.97, + "learning_rate": 2.026784462207264e-05, + "loss": 0.1816, + "step": 34700 + }, + { + "epoch": 0.97, + "learning_rate": 2.0253821343430095e-05, + "loss": 0.1892, + "step": 34750 + }, + { + "epoch": 0.98, + "learning_rate": 2.0239798064787548e-05, + "loss": 0.1632, + "step": 34800 + }, + { + "epoch": 0.98, + "learning_rate": 2.0225774786145e-05, + "loss": 0.1542, + "step": 34850 + }, + { + "epoch": 0.98, + "learning_rate": 2.0211751507502455e-05, + "loss": 0.1529, + "step": 34900 + }, + { + "epoch": 0.98, + "learning_rate": 2.0197728228859908e-05, + "loss": 0.1597, + "step": 34950 + }, + { + "epoch": 0.98, + "learning_rate": 2.018370495021736e-05, + "loss": 0.1795, + "step": 35000 + }, + { + "epoch": 0.98, + "eval_bleu": 95.9192, + "eval_gen_len": 64.2247, + "eval_loss": 0.2535455822944641, + "eval_rouge1": 93.7504, + "eval_rouge2": 90.1079, + "eval_rougeL": 93.5482, + "eval_rougeLsum": 93.5289, + "eval_runtime": 3098.1952, + "eval_samples_per_second": 0.968, + "eval_steps_per_second": 0.242, + "step": 35000 + }, + { + "epoch": 0.98, + "learning_rate": 2.0169681671574815e-05, + "loss": 0.1634, + "step": 35050 + }, + { + "epoch": 0.98, + "learning_rate": 2.0155658392932268e-05, + "loss": 0.1723, + "step": 35100 + }, + { + "epoch": 0.99, + "learning_rate": 2.014163511428972e-05, + "loss": 0.1624, + "step": 35150 + }, + { + "epoch": 0.99, + "learning_rate": 2.0127611835647175e-05, + "loss": 0.1437, + "step": 35200 + }, + { + "epoch": 0.99, + "learning_rate": 2.0113588557004628e-05, + "loss": 0.1638, + "step": 35250 + }, + { + "epoch": 0.99, + "learning_rate": 2.009956527836208e-05, + "loss": 0.153, + "step": 35300 + }, + { + "epoch": 0.99, + "learning_rate": 2.0085541999719535e-05, + "loss": 0.1749, + "step": 35350 + }, + { + "epoch": 0.99, + "learning_rate": 2.0071518721076988e-05, + "loss": 0.1802, + "step": 35400 + }, + { + "epoch": 0.99, + "learning_rate": 2.005749544243444e-05, + "loss": 0.1794, + "step": 35450 + }, + { + "epoch": 1.0, + "learning_rate": 2.0043472163791895e-05, + "loss": 0.1778, + "step": 35500 + }, + { + "epoch": 1.0, + "learning_rate": 2.0029448885149348e-05, + "loss": 0.1608, + "step": 35550 + }, + { + "epoch": 1.0, + "learning_rate": 2.00154256065068e-05, + "loss": 0.1722, + "step": 35600 + }, + { + "epoch": 1.0, + "learning_rate": 2.0001402327864255e-05, + "loss": 0.1674, + "step": 35650 + }, + { + "epoch": 1.0, + "learning_rate": 1.9987379049221708e-05, + "loss": 0.1786, + "step": 35700 + }, + { + "epoch": 1.0, + "learning_rate": 1.997335577057916e-05, + "loss": 0.1597, + "step": 35750 + }, + { + "epoch": 1.0, + "learning_rate": 1.9959332491936615e-05, + "loss": 0.1684, + "step": 35800 + }, + { + "epoch": 1.01, + "learning_rate": 1.994530921329407e-05, + "loss": 0.1526, + "step": 35850 + }, + { + "epoch": 1.01, + "learning_rate": 1.9931285934651522e-05, + "loss": 0.1523, + "step": 35900 + }, + { + "epoch": 1.01, + "learning_rate": 1.9917262656008975e-05, + "loss": 0.1743, + "step": 35950 + }, + { + "epoch": 1.01, + "learning_rate": 1.990323937736643e-05, + "loss": 0.152, + "step": 36000 + }, + { + "epoch": 1.01, + "eval_bleu": 95.9457, + "eval_gen_len": 64.2747, + "eval_loss": 0.2530899941921234, + "eval_rouge1": 93.7477, + "eval_rouge2": 90.0914, + "eval_rougeL": 93.5409, + "eval_rougeLsum": 93.5214, + "eval_runtime": 3128.4817, + "eval_samples_per_second": 0.959, + "eval_steps_per_second": 0.24, + "step": 36000 + }, + { + "epoch": 1.01, + "learning_rate": 1.9889216098723885e-05, + "loss": 0.1547, + "step": 36050 + }, + { + "epoch": 1.01, + "learning_rate": 1.9875192820081335e-05, + "loss": 0.1586, + "step": 36100 + }, + { + "epoch": 1.01, + "learning_rate": 1.986116954143879e-05, + "loss": 0.1778, + "step": 36150 + }, + { + "epoch": 1.02, + "learning_rate": 1.9847146262796242e-05, + "loss": 0.1466, + "step": 36200 + }, + { + "epoch": 1.02, + "learning_rate": 1.9833122984153695e-05, + "loss": 0.1561, + "step": 36250 + }, + { + "epoch": 1.02, + "learning_rate": 1.981909970551115e-05, + "loss": 0.1611, + "step": 36300 + }, + { + "epoch": 1.02, + "learning_rate": 1.9805076426868602e-05, + "loss": 0.1547, + "step": 36350 + }, + { + "epoch": 1.02, + "learning_rate": 1.9791053148226055e-05, + "loss": 0.1413, + "step": 36400 + }, + { + "epoch": 1.02, + "learning_rate": 1.9777029869583512e-05, + "loss": 0.1641, + "step": 36450 + }, + { + "epoch": 1.02, + "learning_rate": 1.9763006590940962e-05, + "loss": 0.1591, + "step": 36500 + }, + { + "epoch": 1.03, + "learning_rate": 1.9748983312298415e-05, + "loss": 0.1604, + "step": 36550 + }, + { + "epoch": 1.03, + "learning_rate": 1.973496003365587e-05, + "loss": 0.163, + "step": 36600 + }, + { + "epoch": 1.03, + "learning_rate": 1.9720936755013322e-05, + "loss": 0.1517, + "step": 36650 + }, + { + "epoch": 1.03, + "learning_rate": 1.9706913476370775e-05, + "loss": 0.1675, + "step": 36700 + }, + { + "epoch": 1.03, + "learning_rate": 1.969289019772823e-05, + "loss": 0.1729, + "step": 36750 + }, + { + "epoch": 1.03, + "learning_rate": 1.9678866919085682e-05, + "loss": 0.1564, + "step": 36800 + }, + { + "epoch": 1.03, + "learning_rate": 1.966484364044314e-05, + "loss": 0.1807, + "step": 36850 + }, + { + "epoch": 1.03, + "learning_rate": 1.965082036180059e-05, + "loss": 0.161, + "step": 36900 + }, + { + "epoch": 1.04, + "learning_rate": 1.9636797083158042e-05, + "loss": 0.1514, + "step": 36950 + }, + { + "epoch": 1.04, + "learning_rate": 1.9622773804515496e-05, + "loss": 0.1548, + "step": 37000 + }, + { + "epoch": 1.04, + "eval_bleu": 95.9217, + "eval_gen_len": 64.223, + "eval_loss": 0.25345203280448914, + "eval_rouge1": 93.7839, + "eval_rouge2": 90.1221, + "eval_rougeL": 93.5635, + "eval_rougeLsum": 93.554, + "eval_runtime": 3095.5847, + "eval_samples_per_second": 0.969, + "eval_steps_per_second": 0.242, + "step": 37000 + }, + { + "epoch": 1.04, + "learning_rate": 1.960875052587295e-05, + "loss": 0.1647, + "step": 37050 + }, + { + "epoch": 1.04, + "learning_rate": 1.9594727247230402e-05, + "loss": 0.1629, + "step": 37100 + }, + { + "epoch": 1.04, + "learning_rate": 1.9580703968587856e-05, + "loss": 0.1805, + "step": 37150 + }, + { + "epoch": 1.04, + "learning_rate": 1.956668068994531e-05, + "loss": 0.1727, + "step": 37200 + }, + { + "epoch": 1.04, + "learning_rate": 1.9552657411302766e-05, + "loss": 0.1967, + "step": 37250 + }, + { + "epoch": 1.05, + "learning_rate": 1.9538634132660216e-05, + "loss": 0.1551, + "step": 37300 + }, + { + "epoch": 1.05, + "learning_rate": 1.952461085401767e-05, + "loss": 0.1554, + "step": 37350 + }, + { + "epoch": 1.05, + "learning_rate": 1.9510587575375122e-05, + "loss": 0.1491, + "step": 37400 + }, + { + "epoch": 1.05, + "learning_rate": 1.9496564296732576e-05, + "loss": 0.1572, + "step": 37450 + }, + { + "epoch": 1.05, + "learning_rate": 1.9482541018090033e-05, + "loss": 0.1706, + "step": 37500 + }, + { + "epoch": 1.05, + "learning_rate": 1.9468517739447483e-05, + "loss": 0.1632, + "step": 37550 + }, + { + "epoch": 1.05, + "learning_rate": 1.9454494460804936e-05, + "loss": 0.1609, + "step": 37600 + }, + { + "epoch": 1.06, + "learning_rate": 1.9440471182162393e-05, + "loss": 0.1672, + "step": 37650 + }, + { + "epoch": 1.06, + "learning_rate": 1.9426447903519843e-05, + "loss": 0.1535, + "step": 37700 + }, + { + "epoch": 1.06, + "learning_rate": 1.9412424624877296e-05, + "loss": 0.1545, + "step": 37750 + }, + { + "epoch": 1.06, + "learning_rate": 1.939840134623475e-05, + "loss": 0.1606, + "step": 37800 + }, + { + "epoch": 1.06, + "learning_rate": 1.9384378067592203e-05, + "loss": 0.1475, + "step": 37850 + }, + { + "epoch": 1.06, + "learning_rate": 1.937035478894966e-05, + "loss": 0.1663, + "step": 37900 + }, + { + "epoch": 1.06, + "learning_rate": 1.935633151030711e-05, + "loss": 0.1698, + "step": 37950 + }, + { + "epoch": 1.07, + "learning_rate": 1.9342308231664563e-05, + "loss": 0.1565, + "step": 38000 + }, + { + "epoch": 1.07, + "eval_bleu": 95.9359, + "eval_gen_len": 64.2767, + "eval_loss": 0.25277698040008545, + "eval_rouge1": 93.7483, + "eval_rouge2": 90.0899, + "eval_rougeL": 93.5433, + "eval_rougeLsum": 93.5171, + "eval_runtime": 3125.498, + "eval_samples_per_second": 0.96, + "eval_steps_per_second": 0.24, + "step": 38000 + }, + { + "epoch": 1.07, + "learning_rate": 1.932828495302202e-05, + "loss": 0.1686, + "step": 38050 + }, + { + "epoch": 1.07, + "learning_rate": 1.931426167437947e-05, + "loss": 0.1466, + "step": 38100 + }, + { + "epoch": 1.07, + "learning_rate": 1.9300238395736923e-05, + "loss": 0.1817, + "step": 38150 + }, + { + "epoch": 1.07, + "learning_rate": 1.9286215117094376e-05, + "loss": 0.1765, + "step": 38200 + }, + { + "epoch": 1.07, + "learning_rate": 1.927219183845183e-05, + "loss": 0.1642, + "step": 38250 + }, + { + "epoch": 1.07, + "learning_rate": 1.9258168559809286e-05, + "loss": 0.1732, + "step": 38300 + }, + { + "epoch": 1.08, + "learning_rate": 1.9244145281166736e-05, + "loss": 0.1462, + "step": 38350 + }, + { + "epoch": 1.08, + "learning_rate": 1.923012200252419e-05, + "loss": 0.167, + "step": 38400 + }, + { + "epoch": 1.08, + "learning_rate": 1.9216098723881646e-05, + "loss": 0.1484, + "step": 38450 + }, + { + "epoch": 1.08, + "learning_rate": 1.9202075445239096e-05, + "loss": 0.1661, + "step": 38500 + }, + { + "epoch": 1.08, + "learning_rate": 1.918805216659655e-05, + "loss": 0.1457, + "step": 38550 + }, + { + "epoch": 1.08, + "learning_rate": 1.9174028887954003e-05, + "loss": 0.1445, + "step": 38600 + }, + { + "epoch": 1.08, + "learning_rate": 1.9160005609311456e-05, + "loss": 0.1499, + "step": 38650 + }, + { + "epoch": 1.09, + "learning_rate": 1.9145982330668913e-05, + "loss": 0.1495, + "step": 38700 + }, + { + "epoch": 1.09, + "learning_rate": 1.9131959052026363e-05, + "loss": 0.1752, + "step": 38750 + }, + { + "epoch": 1.09, + "learning_rate": 1.9117935773383816e-05, + "loss": 0.174, + "step": 38800 + }, + { + "epoch": 1.09, + "learning_rate": 1.9103912494741273e-05, + "loss": 0.1513, + "step": 38850 + }, + { + "epoch": 1.09, + "learning_rate": 1.9089889216098723e-05, + "loss": 0.1727, + "step": 38900 + }, + { + "epoch": 1.09, + "learning_rate": 1.907586593745618e-05, + "loss": 0.1738, + "step": 38950 + }, + { + "epoch": 1.09, + "learning_rate": 1.906184265881363e-05, + "loss": 0.1695, + "step": 39000 + }, + { + "epoch": 1.09, + "eval_bleu": 95.9169, + "eval_gen_len": 64.224, + "eval_loss": 0.2530539035797119, + "eval_rouge1": 93.7543, + "eval_rouge2": 90.0728, + "eval_rougeL": 93.5572, + "eval_rougeLsum": 93.5471, + "eval_runtime": 3118.1985, + "eval_samples_per_second": 0.962, + "eval_steps_per_second": 0.241, + "step": 39000 + }, + { + "epoch": 1.1, + "learning_rate": 1.9047819380171083e-05, + "loss": 0.1575, + "step": 39050 + }, + { + "epoch": 1.1, + "learning_rate": 1.903379610152854e-05, + "loss": 0.1706, + "step": 39100 + }, + { + "epoch": 1.1, + "learning_rate": 1.901977282288599e-05, + "loss": 0.1483, + "step": 39150 + }, + { + "epoch": 1.1, + "learning_rate": 1.9005749544243443e-05, + "loss": 0.1535, + "step": 39200 + }, + { + "epoch": 1.1, + "learning_rate": 1.89917262656009e-05, + "loss": 0.1454, + "step": 39250 + }, + { + "epoch": 1.1, + "learning_rate": 1.897770298695835e-05, + "loss": 0.1614, + "step": 39300 + }, + { + "epoch": 1.1, + "learning_rate": 1.8963679708315807e-05, + "loss": 0.1584, + "step": 39350 + }, + { + "epoch": 1.11, + "learning_rate": 1.8949656429673257e-05, + "loss": 0.1733, + "step": 39400 + }, + { + "epoch": 1.11, + "learning_rate": 1.893563315103071e-05, + "loss": 0.1391, + "step": 39450 + }, + { + "epoch": 1.11, + "learning_rate": 1.8921609872388167e-05, + "loss": 0.1517, + "step": 39500 + }, + { + "epoch": 1.11, + "learning_rate": 1.8907586593745617e-05, + "loss": 0.152, + "step": 39550 + }, + { + "epoch": 1.11, + "learning_rate": 1.889356331510307e-05, + "loss": 0.1458, + "step": 39600 + }, + { + "epoch": 1.11, + "learning_rate": 1.8879540036460527e-05, + "loss": 0.1675, + "step": 39650 + }, + { + "epoch": 1.11, + "learning_rate": 1.8865516757817977e-05, + "loss": 0.1554, + "step": 39700 + }, + { + "epoch": 1.11, + "learning_rate": 1.8851493479175434e-05, + "loss": 0.169, + "step": 39750 + }, + { + "epoch": 1.12, + "learning_rate": 1.8837470200532884e-05, + "loss": 0.1841, + "step": 39800 + }, + { + "epoch": 1.12, + "learning_rate": 1.8823446921890337e-05, + "loss": 0.1717, + "step": 39850 + }, + { + "epoch": 1.12, + "learning_rate": 1.8809423643247794e-05, + "loss": 0.1386, + "step": 39900 + }, + { + "epoch": 1.12, + "learning_rate": 1.8795400364605244e-05, + "loss": 0.1642, + "step": 39950 + }, + { + "epoch": 1.12, + "learning_rate": 1.87813770859627e-05, + "loss": 0.1804, + "step": 40000 + }, + { + "epoch": 1.12, + "eval_bleu": 95.9129, + "eval_gen_len": 64.2003, + "eval_loss": 0.2509741187095642, + "eval_rouge1": 93.8069, + "eval_rouge2": 90.1198, + "eval_rougeL": 93.5952, + "eval_rougeLsum": 93.5824, + "eval_runtime": 3104.9942, + "eval_samples_per_second": 0.966, + "eval_steps_per_second": 0.242, + "step": 40000 + }, + { + "epoch": 1.12, + "learning_rate": 1.8767353807320154e-05, + "loss": 0.1632, + "step": 40050 + }, + { + "epoch": 1.12, + "learning_rate": 1.8753330528677604e-05, + "loss": 0.1432, + "step": 40100 + }, + { + "epoch": 1.13, + "learning_rate": 1.873930725003506e-05, + "loss": 0.1485, + "step": 40150 + }, + { + "epoch": 1.13, + "learning_rate": 1.872528397139251e-05, + "loss": 0.1397, + "step": 40200 + }, + { + "epoch": 1.13, + "learning_rate": 1.8711260692749964e-05, + "loss": 0.1595, + "step": 40250 + }, + { + "epoch": 1.13, + "learning_rate": 1.869723741410742e-05, + "loss": 0.1539, + "step": 40300 + }, + { + "epoch": 1.13, + "learning_rate": 1.868321413546487e-05, + "loss": 0.1635, + "step": 40350 + }, + { + "epoch": 1.13, + "learning_rate": 1.8669190856822327e-05, + "loss": 0.1709, + "step": 40400 + }, + { + "epoch": 1.13, + "learning_rate": 1.865516757817978e-05, + "loss": 0.1554, + "step": 40450 + }, + { + "epoch": 1.14, + "learning_rate": 1.864114429953723e-05, + "loss": 0.1597, + "step": 40500 + }, + { + "epoch": 1.14, + "learning_rate": 1.8627121020894687e-05, + "loss": 0.1652, + "step": 40550 + }, + { + "epoch": 1.14, + "learning_rate": 1.8613097742252137e-05, + "loss": 0.1794, + "step": 40600 + }, + { + "epoch": 1.14, + "learning_rate": 1.859907446360959e-05, + "loss": 0.159, + "step": 40650 + }, + { + "epoch": 1.14, + "learning_rate": 1.8585051184967047e-05, + "loss": 0.1611, + "step": 40700 + }, + { + "epoch": 1.14, + "learning_rate": 1.8571027906324497e-05, + "loss": 0.1545, + "step": 40750 + }, + { + "epoch": 1.14, + "learning_rate": 1.8557004627681954e-05, + "loss": 0.1458, + "step": 40800 + }, + { + "epoch": 1.15, + "learning_rate": 1.8542981349039408e-05, + "loss": 0.1745, + "step": 40850 + }, + { + "epoch": 1.15, + "learning_rate": 1.8528958070396858e-05, + "loss": 0.1612, + "step": 40900 + }, + { + "epoch": 1.15, + "learning_rate": 1.8514934791754314e-05, + "loss": 0.1589, + "step": 40950 + }, + { + "epoch": 1.15, + "learning_rate": 1.8500911513111764e-05, + "loss": 0.1421, + "step": 41000 + }, + { + "epoch": 1.15, + "eval_bleu": 95.9507, + "eval_gen_len": 64.2133, + "eval_loss": 0.2519792318344116, + "eval_rouge1": 93.805, + "eval_rouge2": 90.1779, + "eval_rougeL": 93.6222, + "eval_rougeLsum": 93.6155, + "eval_runtime": 3133.4577, + "eval_samples_per_second": 0.957, + "eval_steps_per_second": 0.239, + "step": 41000 + }, + { + "epoch": 1.15, + "learning_rate": 1.8486888234469218e-05, + "loss": 0.1424, + "step": 41050 + }, + { + "epoch": 1.15, + "learning_rate": 1.8472864955826674e-05, + "loss": 0.1578, + "step": 41100 + }, + { + "epoch": 1.15, + "learning_rate": 1.8458841677184124e-05, + "loss": 0.1553, + "step": 41150 + }, + { + "epoch": 1.16, + "learning_rate": 1.844481839854158e-05, + "loss": 0.1969, + "step": 41200 + }, + { + "epoch": 1.16, + "learning_rate": 1.8430795119899034e-05, + "loss": 0.1674, + "step": 41250 + }, + { + "epoch": 1.16, + "learning_rate": 1.8416771841256484e-05, + "loss": 0.1763, + "step": 41300 + }, + { + "epoch": 1.16, + "learning_rate": 1.840274856261394e-05, + "loss": 0.1751, + "step": 41350 + }, + { + "epoch": 1.16, + "learning_rate": 1.838872528397139e-05, + "loss": 0.1588, + "step": 41400 + }, + { + "epoch": 1.16, + "learning_rate": 1.8374702005328848e-05, + "loss": 0.173, + "step": 41450 + }, + { + "epoch": 1.16, + "learning_rate": 1.83606787266863e-05, + "loss": 0.1759, + "step": 41500 + }, + { + "epoch": 1.17, + "learning_rate": 1.834665544804375e-05, + "loss": 0.1614, + "step": 41550 + }, + { + "epoch": 1.17, + "learning_rate": 1.8332632169401208e-05, + "loss": 0.1463, + "step": 41600 + }, + { + "epoch": 1.17, + "learning_rate": 1.831860889075866e-05, + "loss": 0.1825, + "step": 41650 + }, + { + "epoch": 1.17, + "learning_rate": 1.830458561211611e-05, + "loss": 0.1483, + "step": 41700 + }, + { + "epoch": 1.17, + "learning_rate": 1.8290562333473568e-05, + "loss": 0.1651, + "step": 41750 + }, + { + "epoch": 1.17, + "learning_rate": 1.8276539054831018e-05, + "loss": 0.1602, + "step": 41800 + }, + { + "epoch": 1.17, + "learning_rate": 1.8262515776188475e-05, + "loss": 0.1462, + "step": 41850 + }, + { + "epoch": 1.18, + "learning_rate": 1.8248492497545928e-05, + "loss": 0.1686, + "step": 41900 + }, + { + "epoch": 1.18, + "learning_rate": 1.8234469218903378e-05, + "loss": 0.1474, + "step": 41950 + }, + { + "epoch": 1.18, + "learning_rate": 1.8220445940260835e-05, + "loss": 0.1594, + "step": 42000 + }, + { + "epoch": 1.18, + "eval_bleu": 95.9473, + "eval_gen_len": 64.235, + "eval_loss": 0.2515351176261902, + "eval_rouge1": 93.7671, + "eval_rouge2": 90.1013, + "eval_rougeL": 93.575, + "eval_rougeLsum": 93.5369, + "eval_runtime": 3108.2504, + "eval_samples_per_second": 0.965, + "eval_steps_per_second": 0.241, + "step": 42000 + }, + { + "epoch": 1.18, + "learning_rate": 1.8206422661618288e-05, + "loss": 0.1723, + "step": 42050 + }, + { + "epoch": 1.18, + "learning_rate": 1.8192399382975738e-05, + "loss": 0.1553, + "step": 42100 + }, + { + "epoch": 1.18, + "learning_rate": 1.8178376104333195e-05, + "loss": 0.1709, + "step": 42150 + }, + { + "epoch": 1.18, + "learning_rate": 1.8164352825690645e-05, + "loss": 0.1861, + "step": 42200 + }, + { + "epoch": 1.18, + "learning_rate": 1.81503295470481e-05, + "loss": 0.1631, + "step": 42250 + }, + { + "epoch": 1.19, + "learning_rate": 1.8136306268405555e-05, + "loss": 0.1657, + "step": 42300 + }, + { + "epoch": 1.19, + "learning_rate": 1.8122282989763005e-05, + "loss": 0.1516, + "step": 42350 + }, + { + "epoch": 1.19, + "learning_rate": 1.810825971112046e-05, + "loss": 0.1532, + "step": 42400 + }, + { + "epoch": 1.19, + "learning_rate": 1.8094236432477915e-05, + "loss": 0.1428, + "step": 42450 + }, + { + "epoch": 1.19, + "learning_rate": 1.8080213153835365e-05, + "loss": 0.1493, + "step": 42500 + }, + { + "epoch": 1.19, + "learning_rate": 1.806618987519282e-05, + "loss": 0.1378, + "step": 42550 + }, + { + "epoch": 1.19, + "learning_rate": 1.805216659655027e-05, + "loss": 0.144, + "step": 42600 + }, + { + "epoch": 1.2, + "learning_rate": 1.803814331790773e-05, + "loss": 0.1685, + "step": 42650 + }, + { + "epoch": 1.2, + "learning_rate": 1.8024120039265182e-05, + "loss": 0.1612, + "step": 42700 + }, + { + "epoch": 1.2, + "learning_rate": 1.8010096760622632e-05, + "loss": 0.1678, + "step": 42750 + }, + { + "epoch": 1.2, + "learning_rate": 1.799607348198009e-05, + "loss": 0.1843, + "step": 42800 + }, + { + "epoch": 1.2, + "learning_rate": 1.7982050203337542e-05, + "loss": 0.1592, + "step": 42850 + }, + { + "epoch": 1.2, + "learning_rate": 1.7968026924694995e-05, + "loss": 0.1376, + "step": 42900 + }, + { + "epoch": 1.2, + "learning_rate": 1.795400364605245e-05, + "loss": 0.1777, + "step": 42950 + }, + { + "epoch": 1.21, + "learning_rate": 1.79399803674099e-05, + "loss": 0.1343, + "step": 43000 + }, + { + "epoch": 1.21, + "eval_bleu": 95.9595, + "eval_gen_len": 64.2177, + "eval_loss": 0.25027555227279663, + "eval_rouge1": 93.7701, + "eval_rouge2": 90.1732, + "eval_rougeL": 93.607, + "eval_rougeLsum": 93.6046, + "eval_runtime": 3107.9422, + "eval_samples_per_second": 0.965, + "eval_steps_per_second": 0.241, + "step": 43000 + }, + { + "epoch": 1.21, + "learning_rate": 1.7925957088767355e-05, + "loss": 0.1539, + "step": 43050 + }, + { + "epoch": 1.21, + "learning_rate": 1.791193381012481e-05, + "loss": 0.1527, + "step": 43100 + }, + { + "epoch": 1.21, + "learning_rate": 1.789791053148226e-05, + "loss": 0.1821, + "step": 43150 + }, + { + "epoch": 1.21, + "learning_rate": 1.7883887252839715e-05, + "loss": 0.1645, + "step": 43200 + }, + { + "epoch": 1.21, + "learning_rate": 1.786986397419717e-05, + "loss": 0.1596, + "step": 43250 + }, + { + "epoch": 1.21, + "learning_rate": 1.7855840695554622e-05, + "loss": 0.1394, + "step": 43300 + }, + { + "epoch": 1.22, + "learning_rate": 1.7841817416912075e-05, + "loss": 0.1526, + "step": 43350 + }, + { + "epoch": 1.22, + "learning_rate": 1.7827794138269525e-05, + "loss": 0.1529, + "step": 43400 + }, + { + "epoch": 1.22, + "learning_rate": 1.7813770859626982e-05, + "loss": 0.1662, + "step": 43450 + }, + { + "epoch": 1.22, + "learning_rate": 1.7799747580984435e-05, + "loss": 0.1572, + "step": 43500 + }, + { + "epoch": 1.22, + "learning_rate": 1.7785724302341885e-05, + "loss": 0.168, + "step": 43550 + }, + { + "epoch": 1.22, + "learning_rate": 1.7771701023699342e-05, + "loss": 0.1442, + "step": 43600 + }, + { + "epoch": 1.22, + "learning_rate": 1.7757677745056796e-05, + "loss": 0.15, + "step": 43650 + }, + { + "epoch": 1.23, + "learning_rate": 1.774365446641425e-05, + "loss": 0.1666, + "step": 43700 + }, + { + "epoch": 1.23, + "learning_rate": 1.7729631187771702e-05, + "loss": 0.1609, + "step": 43750 + }, + { + "epoch": 1.23, + "learning_rate": 1.7715607909129152e-05, + "loss": 0.1732, + "step": 43800 + }, + { + "epoch": 1.23, + "learning_rate": 1.770158463048661e-05, + "loss": 0.163, + "step": 43850 + }, + { + "epoch": 1.23, + "learning_rate": 1.7687561351844062e-05, + "loss": 0.1482, + "step": 43900 + }, + { + "epoch": 1.23, + "learning_rate": 1.7673538073201512e-05, + "loss": 0.1489, + "step": 43950 + }, + { + "epoch": 1.23, + "learning_rate": 1.765951479455897e-05, + "loss": 0.1435, + "step": 44000 + }, + { + "epoch": 1.23, + "eval_bleu": 95.9319, + "eval_gen_len": 64.194, + "eval_loss": 0.2497410923242569, + "eval_rouge1": 93.7912, + "eval_rouge2": 90.1359, + "eval_rougeL": 93.5982, + "eval_rougeLsum": 93.5886, + "eval_runtime": 3092.2489, + "eval_samples_per_second": 0.97, + "eval_steps_per_second": 0.243, + "step": 44000 + }, + { + "epoch": 1.24, + "learning_rate": 1.7645491515916422e-05, + "loss": 0.1686, + "step": 44050 + }, + { + "epoch": 1.24, + "learning_rate": 1.7631468237273876e-05, + "loss": 0.1699, + "step": 44100 + }, + { + "epoch": 1.24, + "learning_rate": 1.761744495863133e-05, + "loss": 0.1564, + "step": 44150 + }, + { + "epoch": 1.24, + "learning_rate": 1.760342167998878e-05, + "loss": 0.1613, + "step": 44200 + }, + { + "epoch": 1.24, + "learning_rate": 1.7589398401346236e-05, + "loss": 0.1526, + "step": 44250 + }, + { + "epoch": 1.24, + "learning_rate": 1.757537512270369e-05, + "loss": 0.1549, + "step": 44300 + }, + { + "epoch": 1.24, + "learning_rate": 1.7561351844061143e-05, + "loss": 0.181, + "step": 44350 + }, + { + "epoch": 1.25, + "learning_rate": 1.7547328565418596e-05, + "loss": 0.1665, + "step": 44400 + }, + { + "epoch": 1.25, + "learning_rate": 1.753330528677605e-05, + "loss": 0.1805, + "step": 44450 + }, + { + "epoch": 1.25, + "learning_rate": 1.7519282008133503e-05, + "loss": 0.1414, + "step": 44500 + }, + { + "epoch": 1.25, + "learning_rate": 1.7505258729490956e-05, + "loss": 0.1581, + "step": 44550 + }, + { + "epoch": 1.25, + "learning_rate": 1.7491235450848406e-05, + "loss": 0.15, + "step": 44600 + }, + { + "epoch": 1.25, + "learning_rate": 1.7477212172205863e-05, + "loss": 0.1401, + "step": 44650 + }, + { + "epoch": 1.25, + "learning_rate": 1.7463188893563316e-05, + "loss": 0.144, + "step": 44700 + }, + { + "epoch": 1.26, + "learning_rate": 1.744916561492077e-05, + "loss": 0.1459, + "step": 44750 + }, + { + "epoch": 1.26, + "learning_rate": 1.7435142336278223e-05, + "loss": 0.164, + "step": 44800 + }, + { + "epoch": 1.26, + "learning_rate": 1.7421119057635676e-05, + "loss": 0.1428, + "step": 44850 + }, + { + "epoch": 1.26, + "learning_rate": 1.740709577899313e-05, + "loss": 0.1529, + "step": 44900 + }, + { + "epoch": 1.26, + "learning_rate": 1.7393072500350583e-05, + "loss": 0.1631, + "step": 44950 + }, + { + "epoch": 1.26, + "learning_rate": 1.7379049221708033e-05, + "loss": 0.1613, + "step": 45000 + }, + { + "epoch": 1.26, + "eval_bleu": 95.9702, + "eval_gen_len": 64.2347, + "eval_loss": 0.24982018768787384, + "eval_rouge1": 93.7924, + "eval_rouge2": 90.1761, + "eval_rougeL": 93.6063, + "eval_rougeLsum": 93.5896, + "eval_runtime": 3101.0672, + "eval_samples_per_second": 0.967, + "eval_steps_per_second": 0.242, + "step": 45000 + }, + { + "epoch": 1.26, + "learning_rate": 1.736502594306549e-05, + "loss": 0.165, + "step": 45050 + }, + { + "epoch": 1.26, + "learning_rate": 1.7351002664422943e-05, + "loss": 0.1467, + "step": 45100 + }, + { + "epoch": 1.27, + "learning_rate": 1.7336979385780396e-05, + "loss": 0.1935, + "step": 45150 + }, + { + "epoch": 1.27, + "learning_rate": 1.732295610713785e-05, + "loss": 0.1451, + "step": 45200 + }, + { + "epoch": 1.27, + "learning_rate": 1.7308932828495303e-05, + "loss": 0.1428, + "step": 45250 + }, + { + "epoch": 1.27, + "learning_rate": 1.7294909549852756e-05, + "loss": 0.1494, + "step": 45300 + }, + { + "epoch": 1.27, + "learning_rate": 1.728088627121021e-05, + "loss": 0.1626, + "step": 45350 + }, + { + "epoch": 1.27, + "learning_rate": 1.726686299256766e-05, + "loss": 0.1591, + "step": 45400 + }, + { + "epoch": 1.27, + "learning_rate": 1.7252839713925116e-05, + "loss": 0.1611, + "step": 45450 + }, + { + "epoch": 1.28, + "learning_rate": 1.723881643528257e-05, + "loss": 0.1549, + "step": 45500 + }, + { + "epoch": 1.28, + "learning_rate": 1.7224793156640023e-05, + "loss": 0.1818, + "step": 45550 + }, + { + "epoch": 1.28, + "learning_rate": 1.7210769877997477e-05, + "loss": 0.1738, + "step": 45600 + }, + { + "epoch": 1.28, + "learning_rate": 1.719674659935493e-05, + "loss": 0.1516, + "step": 45650 + }, + { + "epoch": 1.28, + "learning_rate": 1.7182723320712383e-05, + "loss": 0.1508, + "step": 45700 + }, + { + "epoch": 1.28, + "learning_rate": 1.7168700042069837e-05, + "loss": 0.1578, + "step": 45750 + }, + { + "epoch": 1.28, + "learning_rate": 1.715467676342729e-05, + "loss": 0.1601, + "step": 45800 + }, + { + "epoch": 1.29, + "learning_rate": 1.7140653484784743e-05, + "loss": 0.1603, + "step": 45850 + }, + { + "epoch": 1.29, + "learning_rate": 1.7126630206142197e-05, + "loss": 0.1698, + "step": 45900 + }, + { + "epoch": 1.29, + "learning_rate": 1.711260692749965e-05, + "loss": 0.1406, + "step": 45950 + }, + { + "epoch": 1.29, + "learning_rate": 1.7098583648857103e-05, + "loss": 0.1636, + "step": 46000 + }, + { + "epoch": 1.29, + "eval_bleu": 95.9319, + "eval_gen_len": 64.244, + "eval_loss": 0.25013232231140137, + "eval_rouge1": 93.7318, + "eval_rouge2": 90.1123, + "eval_rougeL": 93.5419, + "eval_rougeLsum": 93.5341, + "eval_runtime": 3080.401, + "eval_samples_per_second": 0.974, + "eval_steps_per_second": 0.243, + "step": 46000 + }, + { + "epoch": 1.29, + "learning_rate": 1.7084560370214557e-05, + "loss": 0.1589, + "step": 46050 + }, + { + "epoch": 1.29, + "learning_rate": 1.707053709157201e-05, + "loss": 0.1574, + "step": 46100 + }, + { + "epoch": 1.29, + "learning_rate": 1.7056513812929463e-05, + "loss": 0.1478, + "step": 46150 + }, + { + "epoch": 1.3, + "learning_rate": 1.7042490534286917e-05, + "loss": 0.1528, + "step": 46200 + }, + { + "epoch": 1.3, + "learning_rate": 1.702846725564437e-05, + "loss": 0.1469, + "step": 46250 + }, + { + "epoch": 1.3, + "learning_rate": 1.7014443977001824e-05, + "loss": 0.1489, + "step": 46300 + }, + { + "epoch": 1.3, + "learning_rate": 1.7000420698359277e-05, + "loss": 0.1464, + "step": 46350 + }, + { + "epoch": 1.3, + "learning_rate": 1.698639741971673e-05, + "loss": 0.144, + "step": 46400 + }, + { + "epoch": 1.3, + "learning_rate": 1.6972374141074184e-05, + "loss": 0.1694, + "step": 46450 + }, + { + "epoch": 1.3, + "learning_rate": 1.6958350862431637e-05, + "loss": 0.1569, + "step": 46500 + }, + { + "epoch": 1.31, + "learning_rate": 1.694432758378909e-05, + "loss": 0.1658, + "step": 46550 + }, + { + "epoch": 1.31, + "learning_rate": 1.6930304305146544e-05, + "loss": 0.166, + "step": 46600 + }, + { + "epoch": 1.31, + "learning_rate": 1.6916281026503997e-05, + "loss": 0.1837, + "step": 46650 + }, + { + "epoch": 1.31, + "learning_rate": 1.690225774786145e-05, + "loss": 0.1858, + "step": 46700 + }, + { + "epoch": 1.31, + "learning_rate": 1.6888234469218904e-05, + "loss": 0.149, + "step": 46750 + }, + { + "epoch": 1.31, + "learning_rate": 1.6874211190576357e-05, + "loss": 0.1679, + "step": 46800 + }, + { + "epoch": 1.31, + "learning_rate": 1.686018791193381e-05, + "loss": 0.1571, + "step": 46850 + }, + { + "epoch": 1.32, + "learning_rate": 1.6846164633291264e-05, + "loss": 0.1439, + "step": 46900 + }, + { + "epoch": 1.32, + "learning_rate": 1.6832141354648717e-05, + "loss": 0.1661, + "step": 46950 + }, + { + "epoch": 1.32, + "learning_rate": 1.681811807600617e-05, + "loss": 0.1661, + "step": 47000 + }, + { + "epoch": 1.32, + "eval_bleu": 95.9777, + "eval_gen_len": 64.2717, + "eval_loss": 0.24935156106948853, + "eval_rouge1": 93.831, + "eval_rouge2": 90.2688, + "eval_rougeL": 93.6413, + "eval_rougeLsum": 93.6338, + "eval_runtime": 3079.0307, + "eval_samples_per_second": 0.974, + "eval_steps_per_second": 0.244, + "step": 47000 + }, + { + "epoch": 1.32, + "learning_rate": 1.6804094797363624e-05, + "loss": 0.1605, + "step": 47050 + }, + { + "epoch": 1.32, + "learning_rate": 1.6790071518721077e-05, + "loss": 0.1488, + "step": 47100 + }, + { + "epoch": 1.32, + "learning_rate": 1.677604824007853e-05, + "loss": 0.1515, + "step": 47150 + }, + { + "epoch": 1.32, + "learning_rate": 1.6762024961435984e-05, + "loss": 0.1651, + "step": 47200 + }, + { + "epoch": 1.33, + "learning_rate": 1.674800168279344e-05, + "loss": 0.166, + "step": 47250 + }, + { + "epoch": 1.33, + "learning_rate": 1.673397840415089e-05, + "loss": 0.1507, + "step": 47300 + }, + { + "epoch": 1.33, + "learning_rate": 1.6719955125508344e-05, + "loss": 0.1538, + "step": 47350 + }, + { + "epoch": 1.33, + "learning_rate": 1.6705931846865797e-05, + "loss": 0.1574, + "step": 47400 + }, + { + "epoch": 1.33, + "learning_rate": 1.669190856822325e-05, + "loss": 0.1534, + "step": 47450 + }, + { + "epoch": 1.33, + "learning_rate": 1.6677885289580704e-05, + "loss": 0.1617, + "step": 47500 + }, + { + "epoch": 1.33, + "learning_rate": 1.6663862010938157e-05, + "loss": 0.1506, + "step": 47550 + }, + { + "epoch": 1.34, + "learning_rate": 1.664983873229561e-05, + "loss": 0.1554, + "step": 47600 + }, + { + "epoch": 1.34, + "learning_rate": 1.6635815453653068e-05, + "loss": 0.1405, + "step": 47650 + }, + { + "epoch": 1.34, + "learning_rate": 1.6621792175010518e-05, + "loss": 0.162, + "step": 47700 + }, + { + "epoch": 1.34, + "learning_rate": 1.660776889636797e-05, + "loss": 0.1456, + "step": 47750 + }, + { + "epoch": 1.34, + "learning_rate": 1.6593745617725424e-05, + "loss": 0.1664, + "step": 47800 + }, + { + "epoch": 1.34, + "learning_rate": 1.6579722339082878e-05, + "loss": 0.1626, + "step": 47850 + }, + { + "epoch": 1.34, + "learning_rate": 1.656569906044033e-05, + "loss": 0.1568, + "step": 47900 + }, + { + "epoch": 1.34, + "learning_rate": 1.6551675781797784e-05, + "loss": 0.154, + "step": 47950 + }, + { + "epoch": 1.35, + "learning_rate": 1.6537652503155238e-05, + "loss": 0.1484, + "step": 48000 + }, + { + "epoch": 1.35, + "eval_bleu": 95.9573, + "eval_gen_len": 64.25, + "eval_loss": 0.24856652319431305, + "eval_rouge1": 93.8022, + "eval_rouge2": 90.2009, + "eval_rougeL": 93.6171, + "eval_rougeLsum": 93.5939, + "eval_runtime": 3131.2312, + "eval_samples_per_second": 0.958, + "eval_steps_per_second": 0.24, + "step": 48000 + }, + { + "epoch": 1.35, + "learning_rate": 1.6523629224512694e-05, + "loss": 0.1558, + "step": 48050 + }, + { + "epoch": 1.35, + "learning_rate": 1.6509605945870144e-05, + "loss": 0.144, + "step": 48100 + }, + { + "epoch": 1.35, + "learning_rate": 1.6495582667227598e-05, + "loss": 0.16, + "step": 48150 + }, + { + "epoch": 1.35, + "learning_rate": 1.648155938858505e-05, + "loss": 0.1812, + "step": 48200 + }, + { + "epoch": 1.35, + "learning_rate": 1.6467536109942504e-05, + "loss": 0.1516, + "step": 48250 + }, + { + "epoch": 1.35, + "learning_rate": 1.6453512831299958e-05, + "loss": 0.1506, + "step": 48300 + }, + { + "epoch": 1.36, + "learning_rate": 1.643948955265741e-05, + "loss": 0.1515, + "step": 48350 + }, + { + "epoch": 1.36, + "learning_rate": 1.6425466274014865e-05, + "loss": 0.1418, + "step": 48400 + }, + { + "epoch": 1.36, + "learning_rate": 1.641144299537232e-05, + "loss": 0.162, + "step": 48450 + }, + { + "epoch": 1.36, + "learning_rate": 1.639741971672977e-05, + "loss": 0.1552, + "step": 48500 + }, + { + "epoch": 1.36, + "learning_rate": 1.6383396438087225e-05, + "loss": 0.1613, + "step": 48550 + }, + { + "epoch": 1.36, + "learning_rate": 1.6369373159444678e-05, + "loss": 0.1621, + "step": 48600 + }, + { + "epoch": 1.36, + "learning_rate": 1.635534988080213e-05, + "loss": 0.1594, + "step": 48650 + }, + { + "epoch": 1.37, + "learning_rate": 1.6341326602159588e-05, + "loss": 0.1447, + "step": 48700 + }, + { + "epoch": 1.37, + "learning_rate": 1.6327303323517038e-05, + "loss": 0.1508, + "step": 48750 + }, + { + "epoch": 1.37, + "learning_rate": 1.631328004487449e-05, + "loss": 0.1663, + "step": 48800 + }, + { + "epoch": 1.37, + "learning_rate": 1.6299256766231948e-05, + "loss": 0.1546, + "step": 48850 + }, + { + "epoch": 1.37, + "learning_rate": 1.6285233487589398e-05, + "loss": 0.1435, + "step": 48900 + }, + { + "epoch": 1.37, + "learning_rate": 1.627121020894685e-05, + "loss": 0.1645, + "step": 48950 + }, + { + "epoch": 1.37, + "learning_rate": 1.6257186930304305e-05, + "loss": 0.1532, + "step": 49000 + }, + { + "epoch": 1.37, + "eval_bleu": 95.9496, + "eval_gen_len": 64.2453, + "eval_loss": 0.24891996383666992, + "eval_rouge1": 93.7774, + "eval_rouge2": 90.1343, + "eval_rougeL": 93.5686, + "eval_rougeLsum": 93.5586, + "eval_runtime": 3102.2649, + "eval_samples_per_second": 0.967, + "eval_steps_per_second": 0.242, + "step": 49000 + }, + { + "epoch": 1.38, + "learning_rate": 1.6243163651661758e-05, + "loss": 0.1465, + "step": 49050 + }, + { + "epoch": 1.38, + "learning_rate": 1.6229140373019215e-05, + "loss": 0.1652, + "step": 49100 + }, + { + "epoch": 1.38, + "learning_rate": 1.6215117094376665e-05, + "loss": 0.1538, + "step": 49150 + }, + { + "epoch": 1.38, + "learning_rate": 1.6201093815734118e-05, + "loss": 0.1608, + "step": 49200 + }, + { + "epoch": 1.38, + "learning_rate": 1.6187070537091575e-05, + "loss": 0.1735, + "step": 49250 + }, + { + "epoch": 1.38, + "learning_rate": 1.6173047258449025e-05, + "loss": 0.1752, + "step": 49300 + }, + { + "epoch": 1.38, + "learning_rate": 1.615902397980648e-05, + "loss": 0.1812, + "step": 49350 + }, + { + "epoch": 1.39, + "learning_rate": 1.614500070116393e-05, + "loss": 0.1568, + "step": 49400 + }, + { + "epoch": 1.39, + "learning_rate": 1.6130977422521385e-05, + "loss": 0.1543, + "step": 49450 + }, + { + "epoch": 1.39, + "learning_rate": 1.6116954143878842e-05, + "loss": 0.1625, + "step": 49500 + }, + { + "epoch": 1.39, + "learning_rate": 1.6102930865236292e-05, + "loss": 0.1547, + "step": 49550 + }, + { + "epoch": 1.39, + "learning_rate": 1.6088907586593745e-05, + "loss": 0.1455, + "step": 49600 + }, + { + "epoch": 1.39, + "learning_rate": 1.6074884307951202e-05, + "loss": 0.1623, + "step": 49650 + }, + { + "epoch": 1.39, + "learning_rate": 1.6060861029308652e-05, + "loss": 0.1532, + "step": 49700 + }, + { + "epoch": 1.4, + "learning_rate": 1.6046837750666105e-05, + "loss": 0.1539, + "step": 49750 + }, + { + "epoch": 1.4, + "learning_rate": 1.603281447202356e-05, + "loss": 0.1541, + "step": 49800 + }, + { + "epoch": 1.4, + "learning_rate": 1.6018791193381012e-05, + "loss": 0.1538, + "step": 49850 + }, + { + "epoch": 1.4, + "learning_rate": 1.600476791473847e-05, + "loss": 0.1656, + "step": 49900 + }, + { + "epoch": 1.4, + "learning_rate": 1.599074463609592e-05, + "loss": 0.1598, + "step": 49950 + }, + { + "epoch": 1.4, + "learning_rate": 1.5976721357453372e-05, + "loss": 0.1523, + "step": 50000 + }, + { + "epoch": 1.4, + "eval_bleu": 95.9721, + "eval_gen_len": 64.2117, + "eval_loss": 0.2481638789176941, + "eval_rouge1": 93.8515, + "eval_rouge2": 90.2059, + "eval_rougeL": 93.6666, + "eval_rougeLsum": 93.6523, + "eval_runtime": 3096.96, + "eval_samples_per_second": 0.969, + "eval_steps_per_second": 0.242, + "step": 50000 + }, + { + "epoch": 1.4, + "learning_rate": 1.596269807881083e-05, + "loss": 0.1373, + "step": 50050 + }, + { + "epoch": 1.41, + "learning_rate": 1.594867480016828e-05, + "loss": 0.1535, + "step": 50100 + }, + { + "epoch": 1.41, + "learning_rate": 1.5934651521525735e-05, + "loss": 0.1483, + "step": 50150 + }, + { + "epoch": 1.41, + "learning_rate": 1.5920628242883185e-05, + "loss": 0.1558, + "step": 50200 + }, + { + "epoch": 1.41, + "learning_rate": 1.590660496424064e-05, + "loss": 0.1669, + "step": 50250 + }, + { + "epoch": 1.41, + "learning_rate": 1.5892581685598096e-05, + "loss": 0.1654, + "step": 50300 + }, + { + "epoch": 1.41, + "learning_rate": 1.5878558406955545e-05, + "loss": 0.178, + "step": 50350 + }, + { + "epoch": 1.41, + "learning_rate": 1.5864535128313e-05, + "loss": 0.1556, + "step": 50400 + }, + { + "epoch": 1.41, + "learning_rate": 1.5850511849670456e-05, + "loss": 0.1705, + "step": 50450 + }, + { + "epoch": 1.42, + "learning_rate": 1.5836488571027906e-05, + "loss": 0.1585, + "step": 50500 + }, + { + "epoch": 1.42, + "learning_rate": 1.5822465292385362e-05, + "loss": 0.1307, + "step": 50550 + }, + { + "epoch": 1.42, + "learning_rate": 1.5808442013742812e-05, + "loss": 0.1471, + "step": 50600 + }, + { + "epoch": 1.42, + "learning_rate": 1.5794418735100266e-05, + "loss": 0.1732, + "step": 50650 + }, + { + "epoch": 1.42, + "learning_rate": 1.5780395456457722e-05, + "loss": 0.1465, + "step": 50700 + }, + { + "epoch": 1.42, + "learning_rate": 1.5766372177815172e-05, + "loss": 0.1387, + "step": 50750 + }, + { + "epoch": 1.42, + "learning_rate": 1.5752348899172626e-05, + "loss": 0.1711, + "step": 50800 + }, + { + "epoch": 1.43, + "learning_rate": 1.5738325620530082e-05, + "loss": 0.1361, + "step": 50850 + }, + { + "epoch": 1.43, + "learning_rate": 1.5724302341887532e-05, + "loss": 0.1503, + "step": 50900 + }, + { + "epoch": 1.43, + "learning_rate": 1.571027906324499e-05, + "loss": 0.1615, + "step": 50950 + }, + { + "epoch": 1.43, + "learning_rate": 1.569625578460244e-05, + "loss": 0.1597, + "step": 51000 + }, + { + "epoch": 1.43, + "eval_bleu": 96.0124, + "eval_gen_len": 64.2523, + "eval_loss": 0.24883191287517548, + "eval_rouge1": 93.8575, + "eval_rouge2": 90.2696, + "eval_rougeL": 93.6678, + "eval_rougeLsum": 93.6672, + "eval_runtime": 3113.1853, + "eval_samples_per_second": 0.964, + "eval_steps_per_second": 0.241, + "step": 51000 + }, + { + "epoch": 1.43, + "learning_rate": 1.5682232505959892e-05, + "loss": 0.1595, + "step": 51050 + }, + { + "epoch": 1.43, + "learning_rate": 1.566820922731735e-05, + "loss": 0.1589, + "step": 51100 + }, + { + "epoch": 1.43, + "learning_rate": 1.56541859486748e-05, + "loss": 0.1436, + "step": 51150 + }, + { + "epoch": 1.44, + "learning_rate": 1.5640162670032253e-05, + "loss": 0.1417, + "step": 51200 + }, + { + "epoch": 1.44, + "learning_rate": 1.562613939138971e-05, + "loss": 0.1727, + "step": 51250 + }, + { + "epoch": 1.44, + "learning_rate": 1.561211611274716e-05, + "loss": 0.1563, + "step": 51300 + }, + { + "epoch": 1.44, + "learning_rate": 1.5598092834104616e-05, + "loss": 0.1521, + "step": 51350 + }, + { + "epoch": 1.44, + "learning_rate": 1.5584069555462066e-05, + "loss": 0.1583, + "step": 51400 + }, + { + "epoch": 1.44, + "learning_rate": 1.557004627681952e-05, + "loss": 0.1627, + "step": 51450 + }, + { + "epoch": 1.44, + "learning_rate": 1.5556022998176976e-05, + "loss": 0.142, + "step": 51500 + }, + { + "epoch": 1.45, + "learning_rate": 1.5541999719534426e-05, + "loss": 0.165, + "step": 51550 + }, + { + "epoch": 1.45, + "learning_rate": 1.5527976440891883e-05, + "loss": 0.1666, + "step": 51600 + }, + { + "epoch": 1.45, + "learning_rate": 1.5513953162249336e-05, + "loss": 0.1522, + "step": 51650 + }, + { + "epoch": 1.45, + "learning_rate": 1.5499929883606786e-05, + "loss": 0.1576, + "step": 51700 + }, + { + "epoch": 1.45, + "learning_rate": 1.5485906604964243e-05, + "loss": 0.1435, + "step": 51750 + }, + { + "epoch": 1.45, + "learning_rate": 1.5471883326321693e-05, + "loss": 0.1809, + "step": 51800 + }, + { + "epoch": 1.45, + "learning_rate": 1.5457860047679146e-05, + "loss": 0.1621, + "step": 51850 + }, + { + "epoch": 1.46, + "learning_rate": 1.5443836769036603e-05, + "loss": 0.1644, + "step": 51900 + }, + { + "epoch": 1.46, + "learning_rate": 1.5429813490394053e-05, + "loss": 0.1629, + "step": 51950 + }, + { + "epoch": 1.46, + "learning_rate": 1.541579021175151e-05, + "loss": 0.1564, + "step": 52000 + }, + { + "epoch": 1.46, + "eval_bleu": 96.0177, + "eval_gen_len": 64.2583, + "eval_loss": 0.24838809669017792, + "eval_rouge1": 93.8642, + "eval_rouge2": 90.2554, + "eval_rougeL": 93.6654, + "eval_rougeLsum": 93.6673, + "eval_runtime": 3134.0792, + "eval_samples_per_second": 0.957, + "eval_steps_per_second": 0.239, + "step": 52000 + }, + { + "epoch": 1.46, + "learning_rate": 1.5401766933108963e-05, + "loss": 0.1697, + "step": 52050 + }, + { + "epoch": 1.46, + "learning_rate": 1.5387743654466413e-05, + "loss": 0.1629, + "step": 52100 + }, + { + "epoch": 1.46, + "learning_rate": 1.537372037582387e-05, + "loss": 0.1769, + "step": 52150 + }, + { + "epoch": 1.46, + "learning_rate": 1.535969709718132e-05, + "loss": 0.1569, + "step": 52200 + }, + { + "epoch": 1.47, + "learning_rate": 1.5345673818538773e-05, + "loss": 0.158, + "step": 52250 + }, + { + "epoch": 1.47, + "learning_rate": 1.533165053989623e-05, + "loss": 0.1486, + "step": 52300 + }, + { + "epoch": 1.47, + "learning_rate": 1.531762726125368e-05, + "loss": 0.1517, + "step": 52350 + }, + { + "epoch": 1.47, + "learning_rate": 1.5303603982611137e-05, + "loss": 0.1496, + "step": 52400 + }, + { + "epoch": 1.47, + "learning_rate": 1.528958070396859e-05, + "loss": 0.1511, + "step": 52450 + }, + { + "epoch": 1.47, + "learning_rate": 1.527555742532604e-05, + "loss": 0.1435, + "step": 52500 + }, + { + "epoch": 1.47, + "learning_rate": 1.5261534146683497e-05, + "loss": 0.1342, + "step": 52550 + }, + { + "epoch": 1.48, + "learning_rate": 1.5247510868040947e-05, + "loss": 0.1369, + "step": 52600 + }, + { + "epoch": 1.48, + "learning_rate": 1.5233487589398402e-05, + "loss": 0.1393, + "step": 52650 + }, + { + "epoch": 1.48, + "learning_rate": 1.5219464310755857e-05, + "loss": 0.1537, + "step": 52700 + }, + { + "epoch": 1.48, + "learning_rate": 1.5205441032113307e-05, + "loss": 0.1514, + "step": 52750 + }, + { + "epoch": 1.48, + "learning_rate": 1.5191417753470762e-05, + "loss": 0.1517, + "step": 52800 + }, + { + "epoch": 1.48, + "learning_rate": 1.5177394474828217e-05, + "loss": 0.1507, + "step": 52850 + }, + { + "epoch": 1.48, + "learning_rate": 1.5163371196185668e-05, + "loss": 0.1845, + "step": 52900 + }, + { + "epoch": 1.49, + "learning_rate": 1.5149347917543123e-05, + "loss": 0.1449, + "step": 52950 + }, + { + "epoch": 1.49, + "learning_rate": 1.5135324638900573e-05, + "loss": 0.1488, + "step": 53000 + }, + { + "epoch": 1.49, + "eval_bleu": 96.0359, + "eval_gen_len": 64.297, + "eval_loss": 0.2471029907464981, + "eval_rouge1": 93.848, + "eval_rouge2": 90.2745, + "eval_rougeL": 93.6537, + "eval_rougeLsum": 93.6498, + "eval_runtime": 3110.5388, + "eval_samples_per_second": 0.964, + "eval_steps_per_second": 0.241, + "step": 53000 + }, + { + "epoch": 1.49, + "learning_rate": 1.5121301360258028e-05, + "loss": 0.1479, + "step": 53050 + }, + { + "epoch": 1.49, + "learning_rate": 1.5107278081615484e-05, + "loss": 0.1713, + "step": 53100 + }, + { + "epoch": 1.49, + "learning_rate": 1.5093254802972935e-05, + "loss": 0.1583, + "step": 53150 + }, + { + "epoch": 1.49, + "learning_rate": 1.5079231524330389e-05, + "loss": 0.159, + "step": 53200 + }, + { + "epoch": 1.49, + "learning_rate": 1.5065208245687844e-05, + "loss": 0.1429, + "step": 53250 + }, + { + "epoch": 1.49, + "learning_rate": 1.5051184967045295e-05, + "loss": 0.1513, + "step": 53300 + }, + { + "epoch": 1.5, + "learning_rate": 1.503716168840275e-05, + "loss": 0.1605, + "step": 53350 + }, + { + "epoch": 1.5, + "learning_rate": 1.50231384097602e-05, + "loss": 0.1537, + "step": 53400 + }, + { + "epoch": 1.5, + "learning_rate": 1.5009115131117655e-05, + "loss": 0.1573, + "step": 53450 + }, + { + "epoch": 1.5, + "learning_rate": 1.4995091852475109e-05, + "loss": 0.1678, + "step": 53500 + }, + { + "epoch": 1.5, + "learning_rate": 1.4981068573832564e-05, + "loss": 0.1602, + "step": 53550 + }, + { + "epoch": 1.5, + "learning_rate": 1.4967045295190015e-05, + "loss": 0.1313, + "step": 53600 + }, + { + "epoch": 1.5, + "learning_rate": 1.4953022016547469e-05, + "loss": 0.1377, + "step": 53650 + }, + { + "epoch": 1.51, + "learning_rate": 1.4938998737904922e-05, + "loss": 0.1509, + "step": 53700 + }, + { + "epoch": 1.51, + "learning_rate": 1.4924975459262377e-05, + "loss": 0.1717, + "step": 53750 + }, + { + "epoch": 1.51, + "learning_rate": 1.4910952180619829e-05, + "loss": 0.1518, + "step": 53800 + }, + { + "epoch": 1.51, + "learning_rate": 1.4896928901977282e-05, + "loss": 0.1576, + "step": 53850 + }, + { + "epoch": 1.51, + "learning_rate": 1.4882905623334736e-05, + "loss": 0.1741, + "step": 53900 + }, + { + "epoch": 1.51, + "learning_rate": 1.486888234469219e-05, + "loss": 0.1457, + "step": 53950 + }, + { + "epoch": 1.51, + "learning_rate": 1.4854859066049642e-05, + "loss": 0.1285, + "step": 54000 + }, + { + "epoch": 1.51, + "eval_bleu": 96.0361, + "eval_gen_len": 64.2367, + "eval_loss": 0.2483721524477005, + "eval_rouge1": 93.8768, + "eval_rouge2": 90.3035, + "eval_rougeL": 93.698, + "eval_rougeLsum": 93.6976, + "eval_runtime": 3128.3254, + "eval_samples_per_second": 0.959, + "eval_steps_per_second": 0.24, + "step": 54000 + }, + { + "epoch": 1.52, + "learning_rate": 1.4840835787407096e-05, + "loss": 0.1327, + "step": 54050 + }, + { + "epoch": 1.52, + "learning_rate": 1.4826812508764549e-05, + "loss": 0.1455, + "step": 54100 + }, + { + "epoch": 1.52, + "learning_rate": 1.4812789230122004e-05, + "loss": 0.1336, + "step": 54150 + }, + { + "epoch": 1.52, + "learning_rate": 1.4798765951479456e-05, + "loss": 0.1497, + "step": 54200 + }, + { + "epoch": 1.52, + "learning_rate": 1.4784742672836909e-05, + "loss": 0.1545, + "step": 54250 + }, + { + "epoch": 1.52, + "learning_rate": 1.4770719394194362e-05, + "loss": 0.1488, + "step": 54300 + }, + { + "epoch": 1.52, + "learning_rate": 1.4756696115551817e-05, + "loss": 0.1591, + "step": 54350 + }, + { + "epoch": 1.53, + "learning_rate": 1.4742672836909271e-05, + "loss": 0.1515, + "step": 54400 + }, + { + "epoch": 1.53, + "learning_rate": 1.4728649558266723e-05, + "loss": 0.1538, + "step": 54450 + }, + { + "epoch": 1.53, + "learning_rate": 1.4714626279624176e-05, + "loss": 0.1652, + "step": 54500 + }, + { + "epoch": 1.53, + "learning_rate": 1.4700603000981631e-05, + "loss": 0.1561, + "step": 54550 + }, + { + "epoch": 1.53, + "learning_rate": 1.4686579722339084e-05, + "loss": 0.1644, + "step": 54600 + }, + { + "epoch": 1.53, + "learning_rate": 1.4672556443696536e-05, + "loss": 0.1596, + "step": 54650 + }, + { + "epoch": 1.53, + "learning_rate": 1.465853316505399e-05, + "loss": 0.1585, + "step": 54700 + }, + { + "epoch": 1.54, + "learning_rate": 1.4644509886411444e-05, + "loss": 0.1557, + "step": 54750 + }, + { + "epoch": 1.54, + "learning_rate": 1.4630486607768898e-05, + "loss": 0.137, + "step": 54800 + }, + { + "epoch": 1.54, + "learning_rate": 1.461646332912635e-05, + "loss": 0.1783, + "step": 54850 + }, + { + "epoch": 1.54, + "learning_rate": 1.4602440050483803e-05, + "loss": 0.1731, + "step": 54900 + }, + { + "epoch": 1.54, + "learning_rate": 1.4588416771841258e-05, + "loss": 0.1552, + "step": 54950 + }, + { + "epoch": 1.54, + "learning_rate": 1.4574393493198711e-05, + "loss": 0.1628, + "step": 55000 + }, + { + "epoch": 1.54, + "eval_bleu": 96.033, + "eval_gen_len": 64.2217, + "eval_loss": 0.2469196319580078, + "eval_rouge1": 93.9233, + "eval_rouge2": 90.3508, + "eval_rougeL": 93.7332, + "eval_rougeLsum": 93.7184, + "eval_runtime": 3083.8837, + "eval_samples_per_second": 0.973, + "eval_steps_per_second": 0.243, + "step": 55000 + }, + { + "epoch": 1.54, + "learning_rate": 1.4560370214556163e-05, + "loss": 0.1606, + "step": 55050 + }, + { + "epoch": 1.55, + "learning_rate": 1.4546346935913616e-05, + "loss": 0.1534, + "step": 55100 + }, + { + "epoch": 1.55, + "learning_rate": 1.4532323657271071e-05, + "loss": 0.1516, + "step": 55150 + }, + { + "epoch": 1.55, + "learning_rate": 1.4518300378628525e-05, + "loss": 0.1674, + "step": 55200 + }, + { + "epoch": 1.55, + "learning_rate": 1.4504277099985976e-05, + "loss": 0.1562, + "step": 55250 + }, + { + "epoch": 1.55, + "learning_rate": 1.449025382134343e-05, + "loss": 0.1509, + "step": 55300 + }, + { + "epoch": 1.55, + "learning_rate": 1.4476230542700885e-05, + "loss": 0.1476, + "step": 55350 + }, + { + "epoch": 1.55, + "learning_rate": 1.4462207264058338e-05, + "loss": 0.1514, + "step": 55400 + }, + { + "epoch": 1.56, + "learning_rate": 1.444818398541579e-05, + "loss": 0.1497, + "step": 55450 + }, + { + "epoch": 1.56, + "learning_rate": 1.4434160706773243e-05, + "loss": 0.1728, + "step": 55500 + }, + { + "epoch": 1.56, + "learning_rate": 1.4420137428130698e-05, + "loss": 0.1499, + "step": 55550 + }, + { + "epoch": 1.56, + "learning_rate": 1.4406114149488151e-05, + "loss": 0.1588, + "step": 55600 + }, + { + "epoch": 1.56, + "learning_rate": 1.4392090870845603e-05, + "loss": 0.1589, + "step": 55650 + }, + { + "epoch": 1.56, + "learning_rate": 1.4378067592203056e-05, + "loss": 0.1332, + "step": 55700 + }, + { + "epoch": 1.56, + "learning_rate": 1.4364044313560512e-05, + "loss": 0.15, + "step": 55750 + }, + { + "epoch": 1.56, + "learning_rate": 1.4350021034917965e-05, + "loss": 0.1731, + "step": 55800 + }, + { + "epoch": 1.57, + "learning_rate": 1.4335997756275418e-05, + "loss": 0.1468, + "step": 55850 + }, + { + "epoch": 1.57, + "learning_rate": 1.432197447763287e-05, + "loss": 0.1464, + "step": 55900 + }, + { + "epoch": 1.57, + "learning_rate": 1.4307951198990325e-05, + "loss": 0.1597, + "step": 55950 + }, + { + "epoch": 1.57, + "learning_rate": 1.4293927920347778e-05, + "loss": 0.155, + "step": 56000 + }, + { + "epoch": 1.57, + "eval_bleu": 96.0287, + "eval_gen_len": 64.226, + "eval_loss": 0.24680346250534058, + "eval_rouge1": 93.8476, + "eval_rouge2": 90.2618, + "eval_rougeL": 93.6663, + "eval_rougeLsum": 93.6463, + "eval_runtime": 3086.3631, + "eval_samples_per_second": 0.972, + "eval_steps_per_second": 0.243, + "step": 56000 + }, + { + "epoch": 1.57, + "learning_rate": 1.4279904641705232e-05, + "loss": 0.1392, + "step": 56050 + }, + { + "epoch": 1.57, + "learning_rate": 1.4265881363062683e-05, + "loss": 0.1546, + "step": 56100 + }, + { + "epoch": 1.57, + "learning_rate": 1.4251858084420138e-05, + "loss": 0.1596, + "step": 56150 + }, + { + "epoch": 1.58, + "learning_rate": 1.4237834805777592e-05, + "loss": 0.1574, + "step": 56200 + }, + { + "epoch": 1.58, + "learning_rate": 1.4223811527135045e-05, + "loss": 0.1593, + "step": 56250 + }, + { + "epoch": 1.58, + "learning_rate": 1.4209788248492497e-05, + "loss": 0.1497, + "step": 56300 + }, + { + "epoch": 1.58, + "learning_rate": 1.4195764969849952e-05, + "loss": 0.1447, + "step": 56350 + }, + { + "epoch": 1.58, + "learning_rate": 1.4181741691207405e-05, + "loss": 0.1399, + "step": 56400 + }, + { + "epoch": 1.58, + "learning_rate": 1.4167718412564859e-05, + "loss": 0.1392, + "step": 56450 + }, + { + "epoch": 1.58, + "learning_rate": 1.415369513392231e-05, + "loss": 0.1587, + "step": 56500 + }, + { + "epoch": 1.59, + "learning_rate": 1.4139671855279765e-05, + "loss": 0.1763, + "step": 56550 + }, + { + "epoch": 1.59, + "learning_rate": 1.4125648576637219e-05, + "loss": 0.1542, + "step": 56600 + }, + { + "epoch": 1.59, + "learning_rate": 1.4111625297994672e-05, + "loss": 0.1422, + "step": 56650 + }, + { + "epoch": 1.59, + "learning_rate": 1.4097602019352124e-05, + "loss": 0.158, + "step": 56700 + }, + { + "epoch": 1.59, + "learning_rate": 1.4083578740709579e-05, + "loss": 0.1511, + "step": 56750 + }, + { + "epoch": 1.59, + "learning_rate": 1.4069555462067032e-05, + "loss": 0.1572, + "step": 56800 + }, + { + "epoch": 1.59, + "learning_rate": 1.4055532183424485e-05, + "loss": 0.1677, + "step": 56850 + }, + { + "epoch": 1.6, + "learning_rate": 1.4041508904781937e-05, + "loss": 0.1425, + "step": 56900 + }, + { + "epoch": 1.6, + "learning_rate": 1.4027485626139392e-05, + "loss": 0.1744, + "step": 56950 + }, + { + "epoch": 1.6, + "learning_rate": 1.4013462347496845e-05, + "loss": 0.1514, + "step": 57000 + }, + { + "epoch": 1.6, + "eval_bleu": 96.0359, + "eval_gen_len": 64.2267, + "eval_loss": 0.24612654745578766, + "eval_rouge1": 93.9162, + "eval_rouge2": 90.3455, + "eval_rougeL": 93.7454, + "eval_rougeLsum": 93.7264, + "eval_runtime": 3113.4814, + "eval_samples_per_second": 0.964, + "eval_steps_per_second": 0.241, + "step": 57000 + }, + { + "epoch": 1.6, + "learning_rate": 1.3999439068854299e-05, + "loss": 0.149, + "step": 57050 + }, + { + "epoch": 1.6, + "learning_rate": 1.3985415790211752e-05, + "loss": 0.1457, + "step": 57100 + }, + { + "epoch": 1.6, + "learning_rate": 1.3971392511569206e-05, + "loss": 0.1426, + "step": 57150 + }, + { + "epoch": 1.6, + "learning_rate": 1.3957369232926659e-05, + "loss": 0.1574, + "step": 57200 + }, + { + "epoch": 1.61, + "learning_rate": 1.3943345954284112e-05, + "loss": 0.1566, + "step": 57250 + }, + { + "epoch": 1.61, + "learning_rate": 1.3929322675641566e-05, + "loss": 0.1459, + "step": 57300 + }, + { + "epoch": 1.61, + "learning_rate": 1.3915299396999019e-05, + "loss": 0.1431, + "step": 57350 + }, + { + "epoch": 1.61, + "learning_rate": 1.3901276118356472e-05, + "loss": 0.154, + "step": 57400 + }, + { + "epoch": 1.61, + "learning_rate": 1.3887252839713926e-05, + "loss": 0.1513, + "step": 57450 + }, + { + "epoch": 1.61, + "learning_rate": 1.3873229561071379e-05, + "loss": 0.1571, + "step": 57500 + }, + { + "epoch": 1.61, + "learning_rate": 1.3859206282428832e-05, + "loss": 0.1499, + "step": 57550 + }, + { + "epoch": 1.62, + "learning_rate": 1.3845183003786286e-05, + "loss": 0.1563, + "step": 57600 + }, + { + "epoch": 1.62, + "learning_rate": 1.3831159725143739e-05, + "loss": 0.1719, + "step": 57650 + }, + { + "epoch": 1.62, + "learning_rate": 1.3817136446501192e-05, + "loss": 0.1507, + "step": 57700 + }, + { + "epoch": 1.62, + "learning_rate": 1.3803113167858646e-05, + "loss": 0.1387, + "step": 57750 + }, + { + "epoch": 1.62, + "learning_rate": 1.37890898892161e-05, + "loss": 0.1439, + "step": 57800 + }, + { + "epoch": 1.62, + "learning_rate": 1.3775066610573553e-05, + "loss": 0.1434, + "step": 57850 + }, + { + "epoch": 1.62, + "learning_rate": 1.3761043331931006e-05, + "loss": 0.1719, + "step": 57900 + }, + { + "epoch": 1.63, + "learning_rate": 1.374702005328846e-05, + "loss": 0.1824, + "step": 57950 + }, + { + "epoch": 1.63, + "learning_rate": 1.3732996774645913e-05, + "loss": 0.1393, + "step": 58000 + }, + { + "epoch": 1.63, + "eval_bleu": 96.0528, + "eval_gen_len": 64.256, + "eval_loss": 0.24644368886947632, + "eval_rouge1": 93.9108, + "eval_rouge2": 90.3273, + "eval_rougeL": 93.7332, + "eval_rougeLsum": 93.7113, + "eval_runtime": 3121.7481, + "eval_samples_per_second": 0.961, + "eval_steps_per_second": 0.24, + "step": 58000 + }, + { + "epoch": 1.63, + "learning_rate": 1.3718973496003366e-05, + "loss": 0.1601, + "step": 58050 + }, + { + "epoch": 1.63, + "learning_rate": 1.370495021736082e-05, + "loss": 0.1494, + "step": 58100 + }, + { + "epoch": 1.63, + "learning_rate": 1.3690926938718273e-05, + "loss": 0.1413, + "step": 58150 + }, + { + "epoch": 1.63, + "learning_rate": 1.3676903660075726e-05, + "loss": 0.1609, + "step": 58200 + }, + { + "epoch": 1.63, + "learning_rate": 1.366288038143318e-05, + "loss": 0.1683, + "step": 58250 + }, + { + "epoch": 1.64, + "learning_rate": 1.3648857102790633e-05, + "loss": 0.1556, + "step": 58300 + }, + { + "epoch": 1.64, + "learning_rate": 1.3634833824148086e-05, + "loss": 0.1479, + "step": 58350 + }, + { + "epoch": 1.64, + "learning_rate": 1.362081054550554e-05, + "loss": 0.169, + "step": 58400 + }, + { + "epoch": 1.64, + "learning_rate": 1.3606787266862993e-05, + "loss": 0.1521, + "step": 58450 + }, + { + "epoch": 1.64, + "learning_rate": 1.3592763988220446e-05, + "loss": 0.16, + "step": 58500 + }, + { + "epoch": 1.64, + "learning_rate": 1.35787407095779e-05, + "loss": 0.1453, + "step": 58550 + }, + { + "epoch": 1.64, + "learning_rate": 1.3564717430935353e-05, + "loss": 0.1439, + "step": 58600 + }, + { + "epoch": 1.64, + "learning_rate": 1.3550694152292806e-05, + "loss": 0.1546, + "step": 58650 + }, + { + "epoch": 1.65, + "learning_rate": 1.353667087365026e-05, + "loss": 0.157, + "step": 58700 + }, + { + "epoch": 1.65, + "learning_rate": 1.3522647595007715e-05, + "loss": 0.1769, + "step": 58750 + }, + { + "epoch": 1.65, + "learning_rate": 1.3508624316365166e-05, + "loss": 0.1591, + "step": 58800 + }, + { + "epoch": 1.65, + "learning_rate": 1.349460103772262e-05, + "loss": 0.1492, + "step": 58850 + }, + { + "epoch": 1.65, + "learning_rate": 1.3480577759080073e-05, + "loss": 0.1474, + "step": 58900 + }, + { + "epoch": 1.65, + "learning_rate": 1.3466554480437528e-05, + "loss": 0.1622, + "step": 58950 + }, + { + "epoch": 1.65, + "learning_rate": 1.345253120179498e-05, + "loss": 0.1691, + "step": 59000 + }, + { + "epoch": 1.65, + "eval_bleu": 96.0335, + "eval_gen_len": 64.2307, + "eval_loss": 0.24554915726184845, + "eval_rouge1": 93.8976, + "eval_rouge2": 90.3009, + "eval_rougeL": 93.7085, + "eval_rougeLsum": 93.6965, + "eval_runtime": 3154.4266, + "eval_samples_per_second": 0.951, + "eval_steps_per_second": 0.238, + "step": 59000 + }, + { + "epoch": 1.66, + "learning_rate": 1.3438507923152433e-05, + "loss": 0.157, + "step": 59050 + }, + { + "epoch": 1.66, + "learning_rate": 1.3424484644509886e-05, + "loss": 0.1341, + "step": 59100 + }, + { + "epoch": 1.66, + "learning_rate": 1.3410461365867342e-05, + "loss": 0.1638, + "step": 59150 + }, + { + "epoch": 1.66, + "learning_rate": 1.3396438087224793e-05, + "loss": 0.1523, + "step": 59200 + }, + { + "epoch": 1.66, + "learning_rate": 1.3382414808582247e-05, + "loss": 0.1301, + "step": 59250 + }, + { + "epoch": 1.66, + "learning_rate": 1.33683915299397e-05, + "loss": 0.1456, + "step": 59300 + }, + { + "epoch": 1.66, + "learning_rate": 1.3354368251297155e-05, + "loss": 0.1592, + "step": 59350 + }, + { + "epoch": 1.67, + "learning_rate": 1.3340344972654607e-05, + "loss": 0.1464, + "step": 59400 + }, + { + "epoch": 1.67, + "learning_rate": 1.332632169401206e-05, + "loss": 0.1383, + "step": 59450 + }, + { + "epoch": 1.67, + "learning_rate": 1.3312298415369513e-05, + "loss": 0.1372, + "step": 59500 + }, + { + "epoch": 1.67, + "learning_rate": 1.3298275136726968e-05, + "loss": 0.1515, + "step": 59550 + }, + { + "epoch": 1.67, + "learning_rate": 1.328425185808442e-05, + "loss": 0.1714, + "step": 59600 + }, + { + "epoch": 1.67, + "learning_rate": 1.3270228579441873e-05, + "loss": 0.1572, + "step": 59650 + }, + { + "epoch": 1.67, + "learning_rate": 1.3256205300799327e-05, + "loss": 0.1468, + "step": 59700 + }, + { + "epoch": 1.68, + "learning_rate": 1.3242182022156782e-05, + "loss": 0.16, + "step": 59750 + }, + { + "epoch": 1.68, + "learning_rate": 1.3228158743514233e-05, + "loss": 0.1402, + "step": 59800 + }, + { + "epoch": 1.68, + "learning_rate": 1.3214135464871687e-05, + "loss": 0.1427, + "step": 59850 + }, + { + "epoch": 1.68, + "learning_rate": 1.320011218622914e-05, + "loss": 0.1508, + "step": 59900 + }, + { + "epoch": 1.68, + "learning_rate": 1.3186088907586595e-05, + "loss": 0.1613, + "step": 59950 + }, + { + "epoch": 1.68, + "learning_rate": 1.3172065628944049e-05, + "loss": 0.1623, + "step": 60000 + }, + { + "epoch": 1.68, + "eval_bleu": 96.0534, + "eval_gen_len": 64.217, + "eval_loss": 0.245611310005188, + "eval_rouge1": 93.9428, + "eval_rouge2": 90.3851, + "eval_rougeL": 93.7507, + "eval_rougeLsum": 93.731, + "eval_runtime": 3078.7009, + "eval_samples_per_second": 0.974, + "eval_steps_per_second": 0.244, + "step": 60000 + }, + { + "epoch": 1.68, + "learning_rate": 1.31580423503015e-05, + "loss": 0.1554, + "step": 60050 + }, + { + "epoch": 1.69, + "learning_rate": 1.3144019071658954e-05, + "loss": 0.1618, + "step": 60100 + }, + { + "epoch": 1.69, + "learning_rate": 1.3129995793016409e-05, + "loss": 0.164, + "step": 60150 + }, + { + "epoch": 1.69, + "learning_rate": 1.3115972514373862e-05, + "loss": 0.1601, + "step": 60200 + }, + { + "epoch": 1.69, + "learning_rate": 1.3101949235731314e-05, + "loss": 0.1497, + "step": 60250 + }, + { + "epoch": 1.69, + "learning_rate": 1.3087925957088767e-05, + "loss": 0.1414, + "step": 60300 + }, + { + "epoch": 1.69, + "learning_rate": 1.3073902678446222e-05, + "loss": 0.1665, + "step": 60350 + }, + { + "epoch": 1.69, + "learning_rate": 1.3059879399803675e-05, + "loss": 0.1615, + "step": 60400 + }, + { + "epoch": 1.7, + "learning_rate": 1.3045856121161127e-05, + "loss": 0.1411, + "step": 60450 + }, + { + "epoch": 1.7, + "learning_rate": 1.303183284251858e-05, + "loss": 0.1527, + "step": 60500 + }, + { + "epoch": 1.7, + "learning_rate": 1.3017809563876036e-05, + "loss": 0.147, + "step": 60550 + }, + { + "epoch": 1.7, + "learning_rate": 1.3003786285233489e-05, + "loss": 0.1346, + "step": 60600 + }, + { + "epoch": 1.7, + "learning_rate": 1.298976300659094e-05, + "loss": 0.1512, + "step": 60650 + }, + { + "epoch": 1.7, + "learning_rate": 1.2975739727948394e-05, + "loss": 0.1363, + "step": 60700 + }, + { + "epoch": 1.7, + "learning_rate": 1.2961716449305849e-05, + "loss": 0.1592, + "step": 60750 + }, + { + "epoch": 1.71, + "learning_rate": 1.2947693170663302e-05, + "loss": 0.1357, + "step": 60800 + }, + { + "epoch": 1.71, + "learning_rate": 1.2933669892020754e-05, + "loss": 0.1617, + "step": 60850 + }, + { + "epoch": 1.71, + "learning_rate": 1.2919646613378207e-05, + "loss": 0.1425, + "step": 60900 + }, + { + "epoch": 1.71, + "learning_rate": 1.2905623334735662e-05, + "loss": 0.1334, + "step": 60950 + }, + { + "epoch": 1.71, + "learning_rate": 1.2891600056093116e-05, + "loss": 0.1577, + "step": 61000 + }, + { + "epoch": 1.71, + "eval_bleu": 96.0539, + "eval_gen_len": 64.2317, + "eval_loss": 0.24573621153831482, + "eval_rouge1": 93.8833, + "eval_rouge2": 90.3011, + "eval_rougeL": 93.7003, + "eval_rougeLsum": 93.6749, + "eval_runtime": 3094.1804, + "eval_samples_per_second": 0.97, + "eval_steps_per_second": 0.242, + "step": 61000 + }, + { + "epoch": 1.71, + "learning_rate": 1.2877576777450567e-05, + "loss": 0.1561, + "step": 61050 + }, + { + "epoch": 1.71, + "learning_rate": 1.286355349880802e-05, + "loss": 0.1526, + "step": 61100 + }, + { + "epoch": 1.72, + "learning_rate": 1.2849530220165476e-05, + "loss": 0.1516, + "step": 61150 + }, + { + "epoch": 1.72, + "learning_rate": 1.283550694152293e-05, + "loss": 0.1604, + "step": 61200 + }, + { + "epoch": 1.72, + "learning_rate": 1.2821483662880381e-05, + "loss": 0.1393, + "step": 61250 + }, + { + "epoch": 1.72, + "learning_rate": 1.2807460384237834e-05, + "loss": 0.1522, + "step": 61300 + }, + { + "epoch": 1.72, + "learning_rate": 1.279343710559529e-05, + "loss": 0.1605, + "step": 61350 + }, + { + "epoch": 1.72, + "learning_rate": 1.2779413826952743e-05, + "loss": 0.1483, + "step": 61400 + }, + { + "epoch": 1.72, + "learning_rate": 1.2765390548310196e-05, + "loss": 0.1866, + "step": 61450 + }, + { + "epoch": 1.72, + "learning_rate": 1.2751367269667648e-05, + "loss": 0.1714, + "step": 61500 + }, + { + "epoch": 1.73, + "learning_rate": 1.2737343991025103e-05, + "loss": 0.1616, + "step": 61550 + }, + { + "epoch": 1.73, + "learning_rate": 1.2723320712382556e-05, + "loss": 0.1377, + "step": 61600 + }, + { + "epoch": 1.73, + "learning_rate": 1.270929743374001e-05, + "loss": 0.1322, + "step": 61650 + }, + { + "epoch": 1.73, + "learning_rate": 1.2695274155097461e-05, + "loss": 0.1655, + "step": 61700 + }, + { + "epoch": 1.73, + "learning_rate": 1.2681250876454916e-05, + "loss": 0.1752, + "step": 61750 + }, + { + "epoch": 1.73, + "learning_rate": 1.266722759781237e-05, + "loss": 0.1452, + "step": 61800 + }, + { + "epoch": 1.73, + "learning_rate": 1.2653204319169823e-05, + "loss": 0.1592, + "step": 61850 + }, + { + "epoch": 1.74, + "learning_rate": 1.2639181040527274e-05, + "loss": 0.1676, + "step": 61900 + }, + { + "epoch": 1.74, + "learning_rate": 1.262515776188473e-05, + "loss": 0.1578, + "step": 61950 + }, + { + "epoch": 1.74, + "learning_rate": 1.2611134483242183e-05, + "loss": 0.1543, + "step": 62000 + }, + { + "epoch": 1.74, + "eval_bleu": 96.0497, + "eval_gen_len": 64.2223, + "eval_loss": 0.2444319725036621, + "eval_rouge1": 93.9499, + "eval_rouge2": 90.3622, + "eval_rougeL": 93.7659, + "eval_rougeLsum": 93.7507, + "eval_runtime": 3090.9521, + "eval_samples_per_second": 0.971, + "eval_steps_per_second": 0.243, + "step": 62000 + }, + { + "epoch": 1.74, + "learning_rate": 1.2597111204599636e-05, + "loss": 0.1429, + "step": 62050 + }, + { + "epoch": 1.74, + "learning_rate": 1.2583087925957088e-05, + "loss": 0.1425, + "step": 62100 + }, + { + "epoch": 1.74, + "learning_rate": 1.2569064647314543e-05, + "loss": 0.1501, + "step": 62150 + }, + { + "epoch": 1.74, + "learning_rate": 1.2555041368671996e-05, + "loss": 0.1417, + "step": 62200 + }, + { + "epoch": 1.75, + "learning_rate": 1.254101809002945e-05, + "loss": 0.1495, + "step": 62250 + }, + { + "epoch": 1.75, + "learning_rate": 1.2526994811386901e-05, + "loss": 0.1493, + "step": 62300 + }, + { + "epoch": 1.75, + "learning_rate": 1.2512971532744356e-05, + "loss": 0.1437, + "step": 62350 + }, + { + "epoch": 1.75, + "learning_rate": 1.249894825410181e-05, + "loss": 0.1497, + "step": 62400 + }, + { + "epoch": 1.75, + "learning_rate": 1.2484924975459263e-05, + "loss": 0.1659, + "step": 62450 + }, + { + "epoch": 1.75, + "learning_rate": 1.2470901696816715e-05, + "loss": 0.1702, + "step": 62500 + }, + { + "epoch": 1.75, + "learning_rate": 1.245687841817417e-05, + "loss": 0.1591, + "step": 62550 + }, + { + "epoch": 1.76, + "learning_rate": 1.2442855139531623e-05, + "loss": 0.1472, + "step": 62600 + }, + { + "epoch": 1.76, + "learning_rate": 1.2428831860889077e-05, + "loss": 0.1638, + "step": 62650 + }, + { + "epoch": 1.76, + "learning_rate": 1.2414808582246528e-05, + "loss": 0.1669, + "step": 62700 + }, + { + "epoch": 1.76, + "learning_rate": 1.2400785303603983e-05, + "loss": 0.1435, + "step": 62750 + }, + { + "epoch": 1.76, + "learning_rate": 1.2386762024961437e-05, + "loss": 0.1513, + "step": 62800 + }, + { + "epoch": 1.76, + "learning_rate": 1.237273874631889e-05, + "loss": 0.1455, + "step": 62850 + }, + { + "epoch": 1.76, + "learning_rate": 1.2358715467676343e-05, + "loss": 0.1578, + "step": 62900 + }, + { + "epoch": 1.77, + "learning_rate": 1.2344692189033797e-05, + "loss": 0.1554, + "step": 62950 + }, + { + "epoch": 1.77, + "learning_rate": 1.233066891039125e-05, + "loss": 0.1544, + "step": 63000 + }, + { + "epoch": 1.77, + "eval_bleu": 96.0681, + "eval_gen_len": 64.241, + "eval_loss": 0.245001420378685, + "eval_rouge1": 93.9489, + "eval_rouge2": 90.383, + "eval_rougeL": 93.7697, + "eval_rougeLsum": 93.7483, + "eval_runtime": 3098.5549, + "eval_samples_per_second": 0.968, + "eval_steps_per_second": 0.242, + "step": 63000 + }, + { + "epoch": 1.77, + "learning_rate": 1.2316645631748703e-05, + "loss": 0.1269, + "step": 63050 + }, + { + "epoch": 1.77, + "learning_rate": 1.2302622353106157e-05, + "loss": 0.1689, + "step": 63100 + }, + { + "epoch": 1.77, + "learning_rate": 1.228859907446361e-05, + "loss": 0.1512, + "step": 63150 + }, + { + "epoch": 1.77, + "learning_rate": 1.2274575795821063e-05, + "loss": 0.1584, + "step": 63200 + }, + { + "epoch": 1.77, + "learning_rate": 1.2260552517178517e-05, + "loss": 0.142, + "step": 63250 + }, + { + "epoch": 1.78, + "learning_rate": 1.224652923853597e-05, + "loss": 0.1424, + "step": 63300 + }, + { + "epoch": 1.78, + "learning_rate": 1.2232505959893424e-05, + "loss": 0.144, + "step": 63350 + }, + { + "epoch": 1.78, + "learning_rate": 1.2218482681250877e-05, + "loss": 0.1608, + "step": 63400 + }, + { + "epoch": 1.78, + "learning_rate": 1.220445940260833e-05, + "loss": 0.1376, + "step": 63450 + }, + { + "epoch": 1.78, + "learning_rate": 1.2190436123965784e-05, + "loss": 0.17, + "step": 63500 + }, + { + "epoch": 1.78, + "learning_rate": 1.2176412845323237e-05, + "loss": 0.1587, + "step": 63550 + }, + { + "epoch": 1.78, + "learning_rate": 1.216238956668069e-05, + "loss": 0.1572, + "step": 63600 + }, + { + "epoch": 1.79, + "learning_rate": 1.2148366288038144e-05, + "loss": 0.1501, + "step": 63650 + }, + { + "epoch": 1.79, + "learning_rate": 1.2134343009395597e-05, + "loss": 0.1514, + "step": 63700 + }, + { + "epoch": 1.79, + "learning_rate": 1.212031973075305e-05, + "loss": 0.1656, + "step": 63750 + }, + { + "epoch": 1.79, + "learning_rate": 1.2106296452110504e-05, + "loss": 0.1684, + "step": 63800 + }, + { + "epoch": 1.79, + "learning_rate": 1.2092273173467957e-05, + "loss": 0.1481, + "step": 63850 + }, + { + "epoch": 1.79, + "learning_rate": 1.207824989482541e-05, + "loss": 0.1528, + "step": 63900 + }, + { + "epoch": 1.79, + "learning_rate": 1.2064226616182864e-05, + "loss": 0.1433, + "step": 63950 + }, + { + "epoch": 1.79, + "learning_rate": 1.2050203337540317e-05, + "loss": 0.148, + "step": 64000 + }, + { + "epoch": 1.79, + "eval_bleu": 96.1034, + "eval_gen_len": 64.2707, + "eval_loss": 0.24430668354034424, + "eval_rouge1": 93.969, + "eval_rouge2": 90.4279, + "eval_rougeL": 93.7797, + "eval_rougeLsum": 93.7478, + "eval_runtime": 3170.7735, + "eval_samples_per_second": 0.946, + "eval_steps_per_second": 0.237, + "step": 64000 + }, + { + "epoch": 1.8, + "learning_rate": 1.203618005889777e-05, + "loss": 0.1517, + "step": 64050 + }, + { + "epoch": 1.8, + "learning_rate": 1.2022156780255224e-05, + "loss": 0.1383, + "step": 64100 + }, + { + "epoch": 1.8, + "learning_rate": 1.2008133501612677e-05, + "loss": 0.1335, + "step": 64150 + }, + { + "epoch": 1.8, + "learning_rate": 1.199411022297013e-05, + "loss": 0.1427, + "step": 64200 + }, + { + "epoch": 1.8, + "learning_rate": 1.1980086944327584e-05, + "loss": 0.1617, + "step": 64250 + }, + { + "epoch": 1.8, + "learning_rate": 1.1966063665685037e-05, + "loss": 0.1247, + "step": 64300 + }, + { + "epoch": 1.8, + "learning_rate": 1.1952040387042492e-05, + "loss": 0.1574, + "step": 64350 + }, + { + "epoch": 1.81, + "learning_rate": 1.1938017108399944e-05, + "loss": 0.1431, + "step": 64400 + }, + { + "epoch": 1.81, + "learning_rate": 1.1923993829757397e-05, + "loss": 0.1423, + "step": 64450 + }, + { + "epoch": 1.81, + "learning_rate": 1.190997055111485e-05, + "loss": 0.1508, + "step": 64500 + }, + { + "epoch": 1.81, + "learning_rate": 1.1895947272472306e-05, + "loss": 0.1516, + "step": 64550 + }, + { + "epoch": 1.81, + "learning_rate": 1.1881923993829758e-05, + "loss": 0.1392, + "step": 64600 + }, + { + "epoch": 1.81, + "learning_rate": 1.1867900715187211e-05, + "loss": 0.1487, + "step": 64650 + }, + { + "epoch": 1.81, + "learning_rate": 1.1853877436544664e-05, + "loss": 0.1772, + "step": 64700 + }, + { + "epoch": 1.82, + "learning_rate": 1.183985415790212e-05, + "loss": 0.1416, + "step": 64750 + }, + { + "epoch": 1.82, + "learning_rate": 1.1825830879259571e-05, + "loss": 0.1644, + "step": 64800 + }, + { + "epoch": 1.82, + "learning_rate": 1.1811807600617024e-05, + "loss": 0.1524, + "step": 64850 + }, + { + "epoch": 1.82, + "learning_rate": 1.1797784321974478e-05, + "loss": 0.1573, + "step": 64900 + }, + { + "epoch": 1.82, + "learning_rate": 1.1783761043331933e-05, + "loss": 0.1497, + "step": 64950 + }, + { + "epoch": 1.82, + "learning_rate": 1.1769737764689384e-05, + "loss": 0.1676, + "step": 65000 + }, + { + "epoch": 1.82, + "eval_bleu": 96.0602, + "eval_gen_len": 64.2243, + "eval_loss": 0.24451076984405518, + "eval_rouge1": 93.9544, + "eval_rouge2": 90.3539, + "eval_rougeL": 93.7721, + "eval_rougeLsum": 93.7482, + "eval_runtime": 3116.5293, + "eval_samples_per_second": 0.963, + "eval_steps_per_second": 0.241, + "step": 65000 + }, + { + "epoch": 1.82, + "learning_rate": 1.1755714486046838e-05, + "loss": 0.1516, + "step": 65050 + }, + { + "epoch": 1.83, + "learning_rate": 1.1741691207404291e-05, + "loss": 0.1279, + "step": 65100 + }, + { + "epoch": 1.83, + "learning_rate": 1.1727667928761746e-05, + "loss": 0.1641, + "step": 65150 + }, + { + "epoch": 1.83, + "learning_rate": 1.1713644650119198e-05, + "loss": 0.1234, + "step": 65200 + }, + { + "epoch": 1.83, + "learning_rate": 1.1699621371476651e-05, + "loss": 0.1391, + "step": 65250 + }, + { + "epoch": 1.83, + "learning_rate": 1.1685598092834105e-05, + "loss": 0.1653, + "step": 65300 + }, + { + "epoch": 1.83, + "learning_rate": 1.167157481419156e-05, + "loss": 0.1587, + "step": 65350 + }, + { + "epoch": 1.83, + "learning_rate": 1.1657551535549011e-05, + "loss": 0.1459, + "step": 65400 + }, + { + "epoch": 1.84, + "learning_rate": 1.1643528256906465e-05, + "loss": 0.1583, + "step": 65450 + }, + { + "epoch": 1.84, + "learning_rate": 1.1629504978263918e-05, + "loss": 0.1477, + "step": 65500 + }, + { + "epoch": 1.84, + "learning_rate": 1.1615481699621373e-05, + "loss": 0.1549, + "step": 65550 + }, + { + "epoch": 1.84, + "learning_rate": 1.1601458420978825e-05, + "loss": 0.1531, + "step": 65600 + }, + { + "epoch": 1.84, + "learning_rate": 1.1587435142336278e-05, + "loss": 0.1594, + "step": 65650 + }, + { + "epoch": 1.84, + "learning_rate": 1.1573411863693731e-05, + "loss": 0.1635, + "step": 65700 + }, + { + "epoch": 1.84, + "learning_rate": 1.1559388585051186e-05, + "loss": 0.1508, + "step": 65750 + }, + { + "epoch": 1.85, + "learning_rate": 1.154536530640864e-05, + "loss": 0.1314, + "step": 65800 + }, + { + "epoch": 1.85, + "learning_rate": 1.1531342027766091e-05, + "loss": 0.1398, + "step": 65850 + }, + { + "epoch": 1.85, + "learning_rate": 1.1517318749123545e-05, + "loss": 0.1427, + "step": 65900 + }, + { + "epoch": 1.85, + "learning_rate": 1.1503295470481e-05, + "loss": 0.1338, + "step": 65950 + }, + { + "epoch": 1.85, + "learning_rate": 1.1489272191838453e-05, + "loss": 0.1542, + "step": 66000 + }, + { + "epoch": 1.85, + "eval_bleu": 96.0707, + "eval_gen_len": 64.2427, + "eval_loss": 0.24484632909297943, + "eval_rouge1": 93.9242, + "eval_rouge2": 90.3577, + "eval_rougeL": 93.7378, + "eval_rougeLsum": 93.7199, + "eval_runtime": 3063.6568, + "eval_samples_per_second": 0.979, + "eval_steps_per_second": 0.245, + "step": 66000 + }, + { + "epoch": 1.85, + "learning_rate": 1.1475248913195905e-05, + "loss": 0.1571, + "step": 66050 + }, + { + "epoch": 1.85, + "learning_rate": 1.1461225634553358e-05, + "loss": 0.1345, + "step": 66100 + }, + { + "epoch": 1.86, + "learning_rate": 1.1447202355910813e-05, + "loss": 0.1669, + "step": 66150 + }, + { + "epoch": 1.86, + "learning_rate": 1.1433179077268267e-05, + "loss": 0.1534, + "step": 66200 + }, + { + "epoch": 1.86, + "learning_rate": 1.1419155798625718e-05, + "loss": 0.1729, + "step": 66250 + }, + { + "epoch": 1.86, + "learning_rate": 1.1405132519983172e-05, + "loss": 0.1338, + "step": 66300 + }, + { + "epoch": 1.86, + "learning_rate": 1.1391109241340627e-05, + "loss": 0.1464, + "step": 66350 + }, + { + "epoch": 1.86, + "learning_rate": 1.137708596269808e-05, + "loss": 0.1445, + "step": 66400 + }, + { + "epoch": 1.86, + "learning_rate": 1.1363062684055532e-05, + "loss": 0.1372, + "step": 66450 + }, + { + "epoch": 1.87, + "learning_rate": 1.1349039405412985e-05, + "loss": 0.1819, + "step": 66500 + }, + { + "epoch": 1.87, + "learning_rate": 1.133501612677044e-05, + "loss": 0.1483, + "step": 66550 + }, + { + "epoch": 1.87, + "learning_rate": 1.1320992848127894e-05, + "loss": 0.1391, + "step": 66600 + }, + { + "epoch": 1.87, + "learning_rate": 1.1306969569485345e-05, + "loss": 0.1555, + "step": 66650 + }, + { + "epoch": 1.87, + "learning_rate": 1.1292946290842799e-05, + "loss": 0.1748, + "step": 66700 + }, + { + "epoch": 1.87, + "learning_rate": 1.1278923012200254e-05, + "loss": 0.1608, + "step": 66750 + }, + { + "epoch": 1.87, + "learning_rate": 1.1264899733557707e-05, + "loss": 0.1399, + "step": 66800 + }, + { + "epoch": 1.87, + "learning_rate": 1.1250876454915159e-05, + "loss": 0.1399, + "step": 66850 + }, + { + "epoch": 1.88, + "learning_rate": 1.1236853176272612e-05, + "loss": 0.1753, + "step": 66900 + }, + { + "epoch": 1.88, + "learning_rate": 1.1222829897630067e-05, + "loss": 0.1494, + "step": 66950 + }, + { + "epoch": 1.88, + "learning_rate": 1.120880661898752e-05, + "loss": 0.1507, + "step": 67000 + }, + { + "epoch": 1.88, + "eval_bleu": 96.0481, + "eval_gen_len": 64.2317, + "eval_loss": 0.24457640945911407, + "eval_rouge1": 93.9262, + "eval_rouge2": 90.3319, + "eval_rougeL": 93.7352, + "eval_rougeLsum": 93.7164, + "eval_runtime": 3094.0136, + "eval_samples_per_second": 0.97, + "eval_steps_per_second": 0.242, + "step": 67000 + }, + { + "epoch": 1.88, + "learning_rate": 1.1194783340344972e-05, + "loss": 0.1775, + "step": 67050 + }, + { + "epoch": 1.88, + "learning_rate": 1.1180760061702425e-05, + "loss": 0.1619, + "step": 67100 + }, + { + "epoch": 1.88, + "learning_rate": 1.116673678305988e-05, + "loss": 0.1589, + "step": 67150 + }, + { + "epoch": 1.88, + "learning_rate": 1.1152713504417334e-05, + "loss": 0.1598, + "step": 67200 + }, + { + "epoch": 1.89, + "learning_rate": 1.1138690225774787e-05, + "loss": 0.1652, + "step": 67250 + }, + { + "epoch": 1.89, + "learning_rate": 1.1124666947132239e-05, + "loss": 0.1336, + "step": 67300 + }, + { + "epoch": 1.89, + "learning_rate": 1.1110643668489694e-05, + "loss": 0.1574, + "step": 67350 + }, + { + "epoch": 1.89, + "learning_rate": 1.1096620389847147e-05, + "loss": 0.1644, + "step": 67400 + }, + { + "epoch": 1.89, + "learning_rate": 1.10825971112046e-05, + "loss": 0.1637, + "step": 67450 + }, + { + "epoch": 1.89, + "learning_rate": 1.1068573832562052e-05, + "loss": 0.1396, + "step": 67500 + }, + { + "epoch": 1.89, + "learning_rate": 1.1054550553919507e-05, + "loss": 0.1304, + "step": 67550 + }, + { + "epoch": 1.9, + "learning_rate": 1.104052727527696e-05, + "loss": 0.153, + "step": 67600 + }, + { + "epoch": 1.9, + "learning_rate": 1.1026503996634414e-05, + "loss": 0.1324, + "step": 67650 + }, + { + "epoch": 1.9, + "learning_rate": 1.1012480717991866e-05, + "loss": 0.1581, + "step": 67700 + }, + { + "epoch": 1.9, + "learning_rate": 1.099845743934932e-05, + "loss": 0.1593, + "step": 67750 + }, + { + "epoch": 1.9, + "learning_rate": 1.0984434160706774e-05, + "loss": 0.1422, + "step": 67800 + }, + { + "epoch": 1.9, + "learning_rate": 1.0970410882064227e-05, + "loss": 0.1491, + "step": 67850 + }, + { + "epoch": 1.9, + "learning_rate": 1.0956387603421679e-05, + "loss": 0.1727, + "step": 67900 + }, + { + "epoch": 1.91, + "learning_rate": 1.0942364324779134e-05, + "loss": 0.1576, + "step": 67950 + }, + { + "epoch": 1.91, + "learning_rate": 1.0928341046136588e-05, + "loss": 0.1701, + "step": 68000 + }, + { + "epoch": 1.91, + "eval_bleu": 96.0842, + "eval_gen_len": 64.2387, + "eval_loss": 0.24398915469646454, + "eval_rouge1": 93.9618, + "eval_rouge2": 90.4097, + "eval_rougeL": 93.7775, + "eval_rougeLsum": 93.7665, + "eval_runtime": 3135.9742, + "eval_samples_per_second": 0.957, + "eval_steps_per_second": 0.239, + "step": 68000 + }, + { + "epoch": 1.91, + "learning_rate": 1.0914317767494041e-05, + "loss": 0.1359, + "step": 68050 + }, + { + "epoch": 1.91, + "learning_rate": 1.0900294488851493e-05, + "loss": 0.1762, + "step": 68100 + }, + { + "epoch": 1.91, + "learning_rate": 1.0886271210208948e-05, + "loss": 0.1596, + "step": 68150 + }, + { + "epoch": 1.91, + "learning_rate": 1.0872247931566401e-05, + "loss": 0.1574, + "step": 68200 + }, + { + "epoch": 1.91, + "learning_rate": 1.0858224652923854e-05, + "loss": 0.1334, + "step": 68250 + }, + { + "epoch": 1.92, + "learning_rate": 1.0844201374281306e-05, + "loss": 0.1456, + "step": 68300 + }, + { + "epoch": 1.92, + "learning_rate": 1.0830178095638761e-05, + "loss": 0.1625, + "step": 68350 + }, + { + "epoch": 1.92, + "learning_rate": 1.0816154816996214e-05, + "loss": 0.1715, + "step": 68400 + }, + { + "epoch": 1.92, + "learning_rate": 1.0802131538353668e-05, + "loss": 0.1444, + "step": 68450 + }, + { + "epoch": 1.92, + "learning_rate": 1.078810825971112e-05, + "loss": 0.1748, + "step": 68500 + }, + { + "epoch": 1.92, + "learning_rate": 1.0774084981068574e-05, + "loss": 0.1522, + "step": 68550 + }, + { + "epoch": 1.92, + "learning_rate": 1.0760061702426028e-05, + "loss": 0.1364, + "step": 68600 + }, + { + "epoch": 1.93, + "learning_rate": 1.0746038423783481e-05, + "loss": 0.1557, + "step": 68650 + }, + { + "epoch": 1.93, + "learning_rate": 1.0732015145140935e-05, + "loss": 0.1399, + "step": 68700 + }, + { + "epoch": 1.93, + "learning_rate": 1.0717991866498388e-05, + "loss": 0.1478, + "step": 68750 + }, + { + "epoch": 1.93, + "learning_rate": 1.0703968587855841e-05, + "loss": 0.1444, + "step": 68800 + }, + { + "epoch": 1.93, + "learning_rate": 1.0689945309213295e-05, + "loss": 0.1419, + "step": 68850 + }, + { + "epoch": 1.93, + "learning_rate": 1.0675922030570748e-05, + "loss": 0.1642, + "step": 68900 + }, + { + "epoch": 1.93, + "learning_rate": 1.0661898751928201e-05, + "loss": 0.1646, + "step": 68950 + }, + { + "epoch": 1.94, + "learning_rate": 1.0647875473285655e-05, + "loss": 0.161, + "step": 69000 + }, + { + "epoch": 1.94, + "eval_bleu": 96.1059, + "eval_gen_len": 64.2643, + "eval_loss": 0.24390248954296112, + "eval_rouge1": 93.9311, + "eval_rouge2": 90.376, + "eval_rougeL": 93.746, + "eval_rougeLsum": 93.7269, + "eval_runtime": 3190.0005, + "eval_samples_per_second": 0.94, + "eval_steps_per_second": 0.235, + "step": 69000 + }, + { + "epoch": 1.94, + "learning_rate": 1.0633852194643108e-05, + "loss": 0.1595, + "step": 69050 + }, + { + "epoch": 1.94, + "learning_rate": 1.0619828916000561e-05, + "loss": 0.1468, + "step": 69100 + }, + { + "epoch": 1.94, + "learning_rate": 1.0605805637358015e-05, + "loss": 0.1417, + "step": 69150 + }, + { + "epoch": 1.94, + "learning_rate": 1.0591782358715468e-05, + "loss": 0.1555, + "step": 69200 + }, + { + "epoch": 1.94, + "learning_rate": 1.0577759080072921e-05, + "loss": 0.1611, + "step": 69250 + }, + { + "epoch": 1.94, + "learning_rate": 1.0563735801430375e-05, + "loss": 0.1587, + "step": 69300 + }, + { + "epoch": 1.95, + "learning_rate": 1.0549712522787828e-05, + "loss": 0.1521, + "step": 69350 + }, + { + "epoch": 1.95, + "learning_rate": 1.0535689244145282e-05, + "loss": 0.1356, + "step": 69400 + }, + { + "epoch": 1.95, + "learning_rate": 1.0521665965502735e-05, + "loss": 0.1494, + "step": 69450 + }, + { + "epoch": 1.95, + "learning_rate": 1.0507642686860188e-05, + "loss": 0.1391, + "step": 69500 + }, + { + "epoch": 1.95, + "learning_rate": 1.0493619408217642e-05, + "loss": 0.1631, + "step": 69550 + }, + { + "epoch": 1.95, + "learning_rate": 1.0479596129575095e-05, + "loss": 0.1526, + "step": 69600 + }, + { + "epoch": 1.95, + "learning_rate": 1.0465572850932548e-05, + "loss": 0.1431, + "step": 69650 + }, + { + "epoch": 1.95, + "learning_rate": 1.0451549572290002e-05, + "loss": 0.161, + "step": 69700 + }, + { + "epoch": 1.96, + "learning_rate": 1.0437526293647455e-05, + "loss": 0.1628, + "step": 69750 + }, + { + "epoch": 1.96, + "learning_rate": 1.0423503015004908e-05, + "loss": 0.1529, + "step": 69800 + }, + { + "epoch": 1.96, + "learning_rate": 1.0409479736362362e-05, + "loss": 0.1452, + "step": 69850 + }, + { + "epoch": 1.96, + "learning_rate": 1.0395456457719815e-05, + "loss": 0.1459, + "step": 69900 + }, + { + "epoch": 1.96, + "learning_rate": 1.0381433179077268e-05, + "loss": 0.1444, + "step": 69950 + }, + { + "epoch": 1.96, + "learning_rate": 1.0367409900434722e-05, + "loss": 0.1316, + "step": 70000 + }, + { + "epoch": 1.96, + "eval_bleu": 96.0919, + "eval_gen_len": 64.248, + "eval_loss": 0.24350972473621368, + "eval_rouge1": 93.9235, + "eval_rouge2": 90.3844, + "eval_rougeL": 93.751, + "eval_rougeLsum": 93.7278, + "eval_runtime": 3089.9712, + "eval_samples_per_second": 0.971, + "eval_steps_per_second": 0.243, + "step": 70000 + }, + { + "epoch": 1.96, + "learning_rate": 1.0353386621792175e-05, + "loss": 0.1485, + "step": 70050 + }, + { + "epoch": 1.97, + "learning_rate": 1.0339363343149629e-05, + "loss": 0.1559, + "step": 70100 + }, + { + "epoch": 1.97, + "learning_rate": 1.0325340064507084e-05, + "loss": 0.1483, + "step": 70150 + }, + { + "epoch": 1.97, + "learning_rate": 1.0311316785864535e-05, + "loss": 0.1845, + "step": 70200 + }, + { + "epoch": 1.97, + "learning_rate": 1.0297293507221989e-05, + "loss": 0.1417, + "step": 70250 + }, + { + "epoch": 1.97, + "learning_rate": 1.0283270228579442e-05, + "loss": 0.1341, + "step": 70300 + }, + { + "epoch": 1.97, + "learning_rate": 1.0269246949936897e-05, + "loss": 0.1462, + "step": 70350 + }, + { + "epoch": 1.97, + "learning_rate": 1.0255223671294349e-05, + "loss": 0.1721, + "step": 70400 + }, + { + "epoch": 1.98, + "learning_rate": 1.0241200392651802e-05, + "loss": 0.1549, + "step": 70450 + }, + { + "epoch": 1.98, + "learning_rate": 1.0227177114009255e-05, + "loss": 0.16, + "step": 70500 + }, + { + "epoch": 1.98, + "learning_rate": 1.021315383536671e-05, + "loss": 0.1455, + "step": 70550 + }, + { + "epoch": 1.98, + "learning_rate": 1.0199130556724162e-05, + "loss": 0.1312, + "step": 70600 + }, + { + "epoch": 1.98, + "learning_rate": 1.0185107278081615e-05, + "loss": 0.1607, + "step": 70650 + }, + { + "epoch": 1.98, + "learning_rate": 1.0171083999439069e-05, + "loss": 0.1483, + "step": 70700 + }, + { + "epoch": 1.98, + "learning_rate": 1.0157060720796524e-05, + "loss": 0.1725, + "step": 70750 + }, + { + "epoch": 1.99, + "learning_rate": 1.0143037442153976e-05, + "loss": 0.1598, + "step": 70800 + }, + { + "epoch": 1.99, + "learning_rate": 1.0129014163511429e-05, + "loss": 0.1384, + "step": 70850 + }, + { + "epoch": 1.99, + "learning_rate": 1.0114990884868882e-05, + "loss": 0.1448, + "step": 70900 + }, + { + "epoch": 1.99, + "learning_rate": 1.0100967606226337e-05, + "loss": 0.1534, + "step": 70950 + }, + { + "epoch": 1.99, + "learning_rate": 1.0086944327583789e-05, + "loss": 0.136, + "step": 71000 + }, + { + "epoch": 1.99, + "eval_bleu": 96.0747, + "eval_gen_len": 64.2193, + "eval_loss": 0.24425600469112396, + "eval_rouge1": 93.9676, + "eval_rouge2": 90.4404, + "eval_rougeL": 93.7902, + "eval_rougeLsum": 93.7617, + "eval_runtime": 3086.9294, + "eval_samples_per_second": 0.972, + "eval_steps_per_second": 0.243, + "step": 71000 + }, + { + "epoch": 1.99, + "learning_rate": 1.0072921048941242e-05, + "loss": 0.163, + "step": 71050 + }, + { + "epoch": 1.99, + "learning_rate": 1.0058897770298696e-05, + "loss": 0.1373, + "step": 71100 + }, + { + "epoch": 2.0, + "learning_rate": 1.004487449165615e-05, + "loss": 0.1452, + "step": 71150 + }, + { + "epoch": 2.0, + "learning_rate": 1.0030851213013602e-05, + "loss": 0.1583, + "step": 71200 + }, + { + "epoch": 2.0, + "learning_rate": 1.0016827934371056e-05, + "loss": 0.1396, + "step": 71250 + }, + { + "epoch": 2.0, + "learning_rate": 1.0002804655728509e-05, + "loss": 0.1309, + "step": 71300 + }, + { + "epoch": 2.0, + "learning_rate": 9.988781377085964e-06, + "loss": 0.1628, + "step": 71350 + }, + { + "epoch": 2.0, + "learning_rate": 9.974758098443416e-06, + "loss": 0.1354, + "step": 71400 + }, + { + "epoch": 2.0, + "learning_rate": 9.96073481980087e-06, + "loss": 0.1565, + "step": 71450 + }, + { + "epoch": 2.01, + "learning_rate": 9.946711541158323e-06, + "loss": 0.1411, + "step": 71500 + }, + { + "epoch": 2.01, + "learning_rate": 9.932688262515778e-06, + "loss": 0.1681, + "step": 71550 + }, + { + "epoch": 2.01, + "learning_rate": 9.918664983873231e-06, + "loss": 0.1406, + "step": 71600 + }, + { + "epoch": 2.01, + "learning_rate": 9.904641705230683e-06, + "loss": 0.1308, + "step": 71650 + }, + { + "epoch": 2.01, + "learning_rate": 9.890618426588136e-06, + "loss": 0.1221, + "step": 71700 + }, + { + "epoch": 2.01, + "learning_rate": 9.876595147945591e-06, + "loss": 0.1325, + "step": 71750 + }, + { + "epoch": 2.01, + "learning_rate": 9.862571869303044e-06, + "loss": 0.1377, + "step": 71800 + }, + { + "epoch": 2.02, + "learning_rate": 9.848548590660496e-06, + "loss": 0.1437, + "step": 71850 + }, + { + "epoch": 2.02, + "learning_rate": 9.83452531201795e-06, + "loss": 0.153, + "step": 71900 + }, + { + "epoch": 2.02, + "learning_rate": 9.820502033375404e-06, + "loss": 0.1462, + "step": 71950 + }, + { + "epoch": 2.02, + "learning_rate": 9.806478754732858e-06, + "loss": 0.1651, + "step": 72000 + }, + { + "epoch": 2.02, + "eval_bleu": 96.1009, + "eval_gen_len": 64.2463, + "eval_loss": 0.24310755729675293, + "eval_rouge1": 94.0005, + "eval_rouge2": 90.4979, + "eval_rougeL": 93.8136, + "eval_rougeLsum": 93.7931, + "eval_runtime": 3096.8678, + "eval_samples_per_second": 0.969, + "eval_steps_per_second": 0.242, + "step": 72000 + }, + { + "epoch": 2.02, + "learning_rate": 9.79245547609031e-06, + "loss": 0.1675, + "step": 72050 + }, + { + "epoch": 2.02, + "learning_rate": 9.778432197447763e-06, + "loss": 0.15, + "step": 72100 + }, + { + "epoch": 2.02, + "learning_rate": 9.764408918805218e-06, + "loss": 0.1351, + "step": 72150 + }, + { + "epoch": 2.02, + "learning_rate": 9.750385640162671e-06, + "loss": 0.1424, + "step": 72200 + }, + { + "epoch": 2.03, + "learning_rate": 9.736362361520123e-06, + "loss": 0.1351, + "step": 72250 + }, + { + "epoch": 2.03, + "learning_rate": 9.722339082877576e-06, + "loss": 0.1265, + "step": 72300 + }, + { + "epoch": 2.03, + "learning_rate": 9.708315804235031e-06, + "loss": 0.1696, + "step": 72350 + }, + { + "epoch": 2.03, + "learning_rate": 9.694292525592485e-06, + "loss": 0.1451, + "step": 72400 + }, + { + "epoch": 2.03, + "learning_rate": 9.680269246949936e-06, + "loss": 0.1417, + "step": 72450 + }, + { + "epoch": 2.03, + "learning_rate": 9.66624596830739e-06, + "loss": 0.1383, + "step": 72500 + }, + { + "epoch": 2.03, + "learning_rate": 9.652222689664845e-06, + "loss": 0.1393, + "step": 72550 + }, + { + "epoch": 2.04, + "learning_rate": 9.638199411022298e-06, + "loss": 0.1378, + "step": 72600 + }, + { + "epoch": 2.04, + "learning_rate": 9.62417613237975e-06, + "loss": 0.1632, + "step": 72650 + }, + { + "epoch": 2.04, + "learning_rate": 9.610152853737203e-06, + "loss": 0.152, + "step": 72700 + }, + { + "epoch": 2.04, + "learning_rate": 9.596129575094658e-06, + "loss": 0.1392, + "step": 72750 + }, + { + "epoch": 2.04, + "learning_rate": 9.582106296452112e-06, + "loss": 0.1381, + "step": 72800 + }, + { + "epoch": 2.04, + "learning_rate": 9.568083017809565e-06, + "loss": 0.1341, + "step": 72850 + }, + { + "epoch": 2.04, + "learning_rate": 9.554059739167017e-06, + "loss": 0.1372, + "step": 72900 + }, + { + "epoch": 2.05, + "learning_rate": 9.540036460524472e-06, + "loss": 0.1521, + "step": 72950 + }, + { + "epoch": 2.05, + "learning_rate": 9.526013181881925e-06, + "loss": 0.1552, + "step": 73000 + }, + { + "epoch": 2.05, + "eval_bleu": 96.0944, + "eval_gen_len": 64.2553, + "eval_loss": 0.2440294772386551, + "eval_rouge1": 93.9391, + "eval_rouge2": 90.4451, + "eval_rougeL": 93.7575, + "eval_rougeLsum": 93.7421, + "eval_runtime": 3104.3345, + "eval_samples_per_second": 0.966, + "eval_steps_per_second": 0.242, + "step": 73000 + }, + { + "epoch": 2.05, + "learning_rate": 9.511989903239378e-06, + "loss": 0.1374, + "step": 73050 + }, + { + "epoch": 2.05, + "learning_rate": 9.49796662459683e-06, + "loss": 0.1416, + "step": 73100 + }, + { + "epoch": 2.05, + "learning_rate": 9.483943345954285e-06, + "loss": 0.1511, + "step": 73150 + }, + { + "epoch": 2.05, + "learning_rate": 9.469920067311738e-06, + "loss": 0.1287, + "step": 73200 + }, + { + "epoch": 2.05, + "learning_rate": 9.455896788669192e-06, + "loss": 0.154, + "step": 73250 + }, + { + "epoch": 2.06, + "learning_rate": 9.441873510026643e-06, + "loss": 0.1399, + "step": 73300 + }, + { + "epoch": 2.06, + "learning_rate": 9.427850231384098e-06, + "loss": 0.1371, + "step": 73350 + }, + { + "epoch": 2.06, + "learning_rate": 9.413826952741552e-06, + "loss": 0.1424, + "step": 73400 + }, + { + "epoch": 2.06, + "learning_rate": 9.399803674099005e-06, + "loss": 0.1441, + "step": 73450 + }, + { + "epoch": 2.06, + "learning_rate": 9.385780395456457e-06, + "loss": 0.1276, + "step": 73500 + }, + { + "epoch": 2.06, + "learning_rate": 9.371757116813912e-06, + "loss": 0.1268, + "step": 73550 + }, + { + "epoch": 2.06, + "learning_rate": 9.357733838171365e-06, + "loss": 0.1587, + "step": 73600 + }, + { + "epoch": 2.07, + "learning_rate": 9.343710559528819e-06, + "loss": 0.1475, + "step": 73650 + }, + { + "epoch": 2.07, + "learning_rate": 9.32968728088627e-06, + "loss": 0.1288, + "step": 73700 + }, + { + "epoch": 2.07, + "learning_rate": 9.315664002243725e-06, + "loss": 0.1432, + "step": 73750 + }, + { + "epoch": 2.07, + "learning_rate": 9.301640723601179e-06, + "loss": 0.148, + "step": 73800 + }, + { + "epoch": 2.07, + "learning_rate": 9.287617444958632e-06, + "loss": 0.1506, + "step": 73850 + }, + { + "epoch": 2.07, + "learning_rate": 9.273594166316084e-06, + "loss": 0.1345, + "step": 73900 + }, + { + "epoch": 2.07, + "learning_rate": 9.259570887673539e-06, + "loss": 0.1276, + "step": 73950 + }, + { + "epoch": 2.08, + "learning_rate": 9.245547609030992e-06, + "loss": 0.1491, + "step": 74000 + }, + { + "epoch": 2.08, + "eval_bleu": 96.07, + "eval_gen_len": 64.2557, + "eval_loss": 0.24399851262569427, + "eval_rouge1": 93.9143, + "eval_rouge2": 90.3682, + "eval_rougeL": 93.7204, + "eval_rougeLsum": 93.712, + "eval_runtime": 3103.8893, + "eval_samples_per_second": 0.967, + "eval_steps_per_second": 0.242, + "step": 74000 + }, + { + "epoch": 2.08, + "learning_rate": 9.231524330388445e-06, + "loss": 0.1277, + "step": 74050 + }, + { + "epoch": 2.08, + "learning_rate": 9.217501051745897e-06, + "loss": 0.1306, + "step": 74100 + }, + { + "epoch": 2.08, + "learning_rate": 9.203477773103352e-06, + "loss": 0.1371, + "step": 74150 + }, + { + "epoch": 2.08, + "learning_rate": 9.189454494460806e-06, + "loss": 0.1384, + "step": 74200 + }, + { + "epoch": 2.08, + "learning_rate": 9.175431215818259e-06, + "loss": 0.1405, + "step": 74250 + }, + { + "epoch": 2.08, + "learning_rate": 9.161407937175712e-06, + "loss": 0.1373, + "step": 74300 + }, + { + "epoch": 2.09, + "learning_rate": 9.147384658533166e-06, + "loss": 0.1418, + "step": 74350 + }, + { + "epoch": 2.09, + "learning_rate": 9.133361379890619e-06, + "loss": 0.126, + "step": 74400 + }, + { + "epoch": 2.09, + "learning_rate": 9.119338101248072e-06, + "loss": 0.1738, + "step": 74450 + }, + { + "epoch": 2.09, + "learning_rate": 9.105314822605526e-06, + "loss": 0.1466, + "step": 74500 + }, + { + "epoch": 2.09, + "learning_rate": 9.091291543962979e-06, + "loss": 0.1381, + "step": 74550 + }, + { + "epoch": 2.09, + "learning_rate": 9.077268265320432e-06, + "loss": 0.1384, + "step": 74600 + }, + { + "epoch": 2.09, + "learning_rate": 9.063244986677886e-06, + "loss": 0.1442, + "step": 74650 + }, + { + "epoch": 2.1, + "learning_rate": 9.049221708035339e-06, + "loss": 0.1371, + "step": 74700 + }, + { + "epoch": 2.1, + "learning_rate": 9.035198429392793e-06, + "loss": 0.1331, + "step": 74750 + }, + { + "epoch": 2.1, + "learning_rate": 9.021175150750246e-06, + "loss": 0.136, + "step": 74800 + }, + { + "epoch": 2.1, + "learning_rate": 9.0071518721077e-06, + "loss": 0.1422, + "step": 74850 + }, + { + "epoch": 2.1, + "learning_rate": 8.993128593465153e-06, + "loss": 0.1386, + "step": 74900 + }, + { + "epoch": 2.1, + "learning_rate": 8.979105314822606e-06, + "loss": 0.1247, + "step": 74950 + }, + { + "epoch": 2.1, + "learning_rate": 8.96508203618006e-06, + "loss": 0.1567, + "step": 75000 + }, + { + "epoch": 2.1, + "eval_bleu": 96.1043, + "eval_gen_len": 64.258, + "eval_loss": 0.24418920278549194, + "eval_rouge1": 93.9448, + "eval_rouge2": 90.3671, + "eval_rougeL": 93.7538, + "eval_rougeLsum": 93.7435, + "eval_runtime": 3102.9976, + "eval_samples_per_second": 0.967, + "eval_steps_per_second": 0.242, + "step": 75000 + }, + { + "epoch": 2.1, + "learning_rate": 8.951058757537513e-06, + "loss": 0.1308, + "step": 75050 + }, + { + "epoch": 2.11, + "learning_rate": 8.937035478894966e-06, + "loss": 0.1387, + "step": 75100 + }, + { + "epoch": 2.11, + "learning_rate": 8.92301220025242e-06, + "loss": 0.1356, + "step": 75150 + }, + { + "epoch": 2.11, + "learning_rate": 8.908988921609873e-06, + "loss": 0.1397, + "step": 75200 + }, + { + "epoch": 2.11, + "learning_rate": 8.894965642967326e-06, + "loss": 0.1276, + "step": 75250 + }, + { + "epoch": 2.11, + "learning_rate": 8.88094236432478e-06, + "loss": 0.1354, + "step": 75300 + }, + { + "epoch": 2.11, + "learning_rate": 8.866919085682233e-06, + "loss": 0.1537, + "step": 75350 + }, + { + "epoch": 2.11, + "learning_rate": 8.852895807039686e-06, + "loss": 0.1508, + "step": 75400 + }, + { + "epoch": 2.12, + "learning_rate": 8.83887252839714e-06, + "loss": 0.1659, + "step": 75450 + }, + { + "epoch": 2.12, + "learning_rate": 8.824849249754593e-06, + "loss": 0.1433, + "step": 75500 + }, + { + "epoch": 2.12, + "learning_rate": 8.810825971112046e-06, + "loss": 0.1315, + "step": 75550 + }, + { + "epoch": 2.12, + "learning_rate": 8.7968026924695e-06, + "loss": 0.1257, + "step": 75600 + }, + { + "epoch": 2.12, + "learning_rate": 8.782779413826953e-06, + "loss": 0.1226, + "step": 75650 + }, + { + "epoch": 2.12, + "learning_rate": 8.768756135184406e-06, + "loss": 0.1397, + "step": 75700 + }, + { + "epoch": 2.12, + "learning_rate": 8.754732856541861e-06, + "loss": 0.1245, + "step": 75750 + }, + { + "epoch": 2.13, + "learning_rate": 8.740709577899313e-06, + "loss": 0.1508, + "step": 75800 + }, + { + "epoch": 2.13, + "learning_rate": 8.726686299256766e-06, + "loss": 0.152, + "step": 75850 + }, + { + "epoch": 2.13, + "learning_rate": 8.71266302061422e-06, + "loss": 0.1475, + "step": 75900 + }, + { + "epoch": 2.13, + "learning_rate": 8.698639741971675e-06, + "loss": 0.1448, + "step": 75950 + }, + { + "epoch": 2.13, + "learning_rate": 8.684616463329126e-06, + "loss": 0.1444, + "step": 76000 + }, + { + "epoch": 2.13, + "eval_bleu": 96.0981, + "eval_gen_len": 64.2277, + "eval_loss": 0.24386686086654663, + "eval_rouge1": 93.9613, + "eval_rouge2": 90.4186, + "eval_rougeL": 93.7791, + "eval_rougeLsum": 93.7578, + "eval_runtime": 3114.9073, + "eval_samples_per_second": 0.963, + "eval_steps_per_second": 0.241, + "step": 76000 + }, + { + "epoch": 2.13, + "learning_rate": 8.67059318468658e-06, + "loss": 0.1552, + "step": 76050 + }, + { + "epoch": 2.13, + "learning_rate": 8.656569906044033e-06, + "loss": 0.151, + "step": 76100 + }, + { + "epoch": 2.14, + "learning_rate": 8.642546627401488e-06, + "loss": 0.1493, + "step": 76150 + }, + { + "epoch": 2.14, + "learning_rate": 8.62852334875894e-06, + "loss": 0.1279, + "step": 76200 + }, + { + "epoch": 2.14, + "learning_rate": 8.614500070116393e-06, + "loss": 0.1521, + "step": 76250 + }, + { + "epoch": 2.14, + "learning_rate": 8.600476791473847e-06, + "loss": 0.1565, + "step": 76300 + }, + { + "epoch": 2.14, + "learning_rate": 8.586453512831302e-06, + "loss": 0.1412, + "step": 76350 + }, + { + "epoch": 2.14, + "learning_rate": 8.572430234188753e-06, + "loss": 0.138, + "step": 76400 + }, + { + "epoch": 2.14, + "learning_rate": 8.558406955546207e-06, + "loss": 0.1302, + "step": 76450 + }, + { + "epoch": 2.15, + "learning_rate": 8.54438367690366e-06, + "loss": 0.1542, + "step": 76500 + }, + { + "epoch": 2.15, + "learning_rate": 8.530360398261115e-06, + "loss": 0.1341, + "step": 76550 + }, + { + "epoch": 2.15, + "learning_rate": 8.516337119618567e-06, + "loss": 0.1643, + "step": 76600 + }, + { + "epoch": 2.15, + "learning_rate": 8.50231384097602e-06, + "loss": 0.1574, + "step": 76650 + }, + { + "epoch": 2.15, + "learning_rate": 8.488290562333473e-06, + "loss": 0.1382, + "step": 76700 + }, + { + "epoch": 2.15, + "learning_rate": 8.474267283690929e-06, + "loss": 0.1355, + "step": 76750 + }, + { + "epoch": 2.15, + "learning_rate": 8.46024400504838e-06, + "loss": 0.1396, + "step": 76800 + }, + { + "epoch": 2.16, + "learning_rate": 8.446220726405834e-06, + "loss": 0.1244, + "step": 76850 + }, + { + "epoch": 2.16, + "learning_rate": 8.432197447763287e-06, + "loss": 0.1291, + "step": 76900 + }, + { + "epoch": 2.16, + "learning_rate": 8.418174169120742e-06, + "loss": 0.1402, + "step": 76950 + }, + { + "epoch": 2.16, + "learning_rate": 8.404150890478194e-06, + "loss": 0.151, + "step": 77000 + }, + { + "epoch": 2.16, + "eval_bleu": 96.1028, + "eval_gen_len": 64.2047, + "eval_loss": 0.243386372923851, + "eval_rouge1": 93.9323, + "eval_rouge2": 90.4104, + "eval_rougeL": 93.7353, + "eval_rougeLsum": 93.7303, + "eval_runtime": 3082.1904, + "eval_samples_per_second": 0.973, + "eval_steps_per_second": 0.243, + "step": 77000 + }, + { + "epoch": 2.16, + "learning_rate": 8.390127611835647e-06, + "loss": 0.1471, + "step": 77050 + }, + { + "epoch": 2.16, + "learning_rate": 8.3761043331931e-06, + "loss": 0.1363, + "step": 77100 + }, + { + "epoch": 2.16, + "learning_rate": 8.362081054550555e-06, + "loss": 0.1174, + "step": 77150 + }, + { + "epoch": 2.17, + "learning_rate": 8.348057775908009e-06, + "loss": 0.1249, + "step": 77200 + }, + { + "epoch": 2.17, + "learning_rate": 8.33403449726546e-06, + "loss": 0.1323, + "step": 77250 + }, + { + "epoch": 2.17, + "learning_rate": 8.320011218622914e-06, + "loss": 0.1441, + "step": 77300 + }, + { + "epoch": 2.17, + "learning_rate": 8.305987939980369e-06, + "loss": 0.1354, + "step": 77350 + }, + { + "epoch": 2.17, + "learning_rate": 8.291964661337822e-06, + "loss": 0.1299, + "step": 77400 + }, + { + "epoch": 2.17, + "learning_rate": 8.277941382695274e-06, + "loss": 0.16, + "step": 77450 + }, + { + "epoch": 2.17, + "learning_rate": 8.263918104052727e-06, + "loss": 0.1471, + "step": 77500 + }, + { + "epoch": 2.18, + "learning_rate": 8.249894825410182e-06, + "loss": 0.1507, + "step": 77550 + }, + { + "epoch": 2.18, + "learning_rate": 8.235871546767636e-06, + "loss": 0.1345, + "step": 77600 + }, + { + "epoch": 2.18, + "learning_rate": 8.221848268125087e-06, + "loss": 0.1597, + "step": 77650 + }, + { + "epoch": 2.18, + "learning_rate": 8.20782498948254e-06, + "loss": 0.1361, + "step": 77700 + }, + { + "epoch": 2.18, + "learning_rate": 8.193801710839996e-06, + "loss": 0.1437, + "step": 77750 + }, + { + "epoch": 2.18, + "learning_rate": 8.179778432197449e-06, + "loss": 0.1438, + "step": 77800 + }, + { + "epoch": 2.18, + "learning_rate": 8.1657551535549e-06, + "loss": 0.1442, + "step": 77850 + }, + { + "epoch": 2.18, + "learning_rate": 8.151731874912354e-06, + "loss": 0.1451, + "step": 77900 + }, + { + "epoch": 2.19, + "learning_rate": 8.137708596269809e-06, + "loss": 0.1455, + "step": 77950 + }, + { + "epoch": 2.19, + "learning_rate": 8.123685317627262e-06, + "loss": 0.1331, + "step": 78000 + }, + { + "epoch": 2.19, + "eval_bleu": 96.0923, + "eval_gen_len": 64.236, + "eval_loss": 0.24420137703418732, + "eval_rouge1": 93.9515, + "eval_rouge2": 90.4414, + "eval_rougeL": 93.7733, + "eval_rougeLsum": 93.7553, + "eval_runtime": 3102.8715, + "eval_samples_per_second": 0.967, + "eval_steps_per_second": 0.242, + "step": 78000 + }, + { + "epoch": 2.19, + "learning_rate": 8.109662038984714e-06, + "loss": 0.1332, + "step": 78050 + }, + { + "epoch": 2.19, + "learning_rate": 8.095638760342167e-06, + "loss": 0.1361, + "step": 78100 + }, + { + "epoch": 2.19, + "learning_rate": 8.081615481699623e-06, + "loss": 0.1394, + "step": 78150 + }, + { + "epoch": 2.19, + "learning_rate": 8.067592203057076e-06, + "loss": 0.1387, + "step": 78200 + }, + { + "epoch": 2.19, + "learning_rate": 8.053568924414528e-06, + "loss": 0.1264, + "step": 78250 + }, + { + "epoch": 2.2, + "learning_rate": 8.039545645771981e-06, + "loss": 0.1464, + "step": 78300 + }, + { + "epoch": 2.2, + "learning_rate": 8.025522367129436e-06, + "loss": 0.1574, + "step": 78350 + }, + { + "epoch": 2.2, + "learning_rate": 8.01149908848689e-06, + "loss": 0.1449, + "step": 78400 + }, + { + "epoch": 2.2, + "learning_rate": 7.997475809844341e-06, + "loss": 0.1361, + "step": 78450 + }, + { + "epoch": 2.2, + "learning_rate": 7.983452531201794e-06, + "loss": 0.1254, + "step": 78500 + }, + { + "epoch": 2.2, + "learning_rate": 7.96942925255925e-06, + "loss": 0.1396, + "step": 78550 + }, + { + "epoch": 2.2, + "learning_rate": 7.955405973916703e-06, + "loss": 0.1437, + "step": 78600 + }, + { + "epoch": 2.21, + "learning_rate": 7.941382695274156e-06, + "loss": 0.1434, + "step": 78650 + }, + { + "epoch": 2.21, + "learning_rate": 7.927359416631608e-06, + "loss": 0.1273, + "step": 78700 + }, + { + "epoch": 2.21, + "learning_rate": 7.913336137989063e-06, + "loss": 0.1393, + "step": 78750 + }, + { + "epoch": 2.21, + "learning_rate": 7.899312859346516e-06, + "loss": 0.1356, + "step": 78800 + }, + { + "epoch": 2.21, + "learning_rate": 7.88528958070397e-06, + "loss": 0.1554, + "step": 78850 + }, + { + "epoch": 2.21, + "learning_rate": 7.871266302061421e-06, + "loss": 0.1372, + "step": 78900 + }, + { + "epoch": 2.21, + "learning_rate": 7.857243023418876e-06, + "loss": 0.1407, + "step": 78950 + }, + { + "epoch": 2.22, + "learning_rate": 7.84321974477633e-06, + "loss": 0.1658, + "step": 79000 + }, + { + "epoch": 2.22, + "eval_bleu": 96.0964, + "eval_gen_len": 64.256, + "eval_loss": 0.24366088211536407, + "eval_rouge1": 93.9674, + "eval_rouge2": 90.4223, + "eval_rougeL": 93.7734, + "eval_rougeLsum": 93.7712, + "eval_runtime": 3096.7975, + "eval_samples_per_second": 0.969, + "eval_steps_per_second": 0.242, + "step": 79000 + }, + { + "epoch": 2.22, + "learning_rate": 7.829196466133783e-06, + "loss": 0.1379, + "step": 79050 + }, + { + "epoch": 2.22, + "learning_rate": 7.815173187491235e-06, + "loss": 0.1474, + "step": 79100 + }, + { + "epoch": 2.22, + "learning_rate": 7.80114990884869e-06, + "loss": 0.1297, + "step": 79150 + }, + { + "epoch": 2.22, + "learning_rate": 7.787126630206143e-06, + "loss": 0.145, + "step": 79200 + }, + { + "epoch": 2.22, + "learning_rate": 7.773103351563596e-06, + "loss": 0.1223, + "step": 79250 + }, + { + "epoch": 2.22, + "learning_rate": 7.759080072921048e-06, + "loss": 0.1425, + "step": 79300 + }, + { + "epoch": 2.23, + "learning_rate": 7.745056794278503e-06, + "loss": 0.1358, + "step": 79350 + }, + { + "epoch": 2.23, + "learning_rate": 7.731033515635956e-06, + "loss": 0.1391, + "step": 79400 + }, + { + "epoch": 2.23, + "learning_rate": 7.71701023699341e-06, + "loss": 0.1581, + "step": 79450 + }, + { + "epoch": 2.23, + "learning_rate": 7.702986958350861e-06, + "loss": 0.1425, + "step": 79500 + }, + { + "epoch": 2.23, + "learning_rate": 7.688963679708317e-06, + "loss": 0.15, + "step": 79550 + }, + { + "epoch": 2.23, + "learning_rate": 7.67494040106577e-06, + "loss": 0.1496, + "step": 79600 + }, + { + "epoch": 2.23, + "learning_rate": 7.660917122423223e-06, + "loss": 0.1609, + "step": 79650 + }, + { + "epoch": 2.24, + "learning_rate": 7.646893843780675e-06, + "loss": 0.1502, + "step": 79700 + }, + { + "epoch": 2.24, + "learning_rate": 7.63287056513813e-06, + "loss": 0.1345, + "step": 79750 + }, + { + "epoch": 2.24, + "learning_rate": 7.618847286495583e-06, + "loss": 0.1429, + "step": 79800 + }, + { + "epoch": 2.24, + "learning_rate": 7.604824007853036e-06, + "loss": 0.1408, + "step": 79850 + }, + { + "epoch": 2.24, + "learning_rate": 7.590800729210489e-06, + "loss": 0.1613, + "step": 79900 + }, + { + "epoch": 2.24, + "learning_rate": 7.576777450567943e-06, + "loss": 0.1605, + "step": 79950 + }, + { + "epoch": 2.24, + "learning_rate": 7.562754171925397e-06, + "loss": 0.1396, + "step": 80000 + }, + { + "epoch": 2.24, + "eval_bleu": 96.1134, + "eval_gen_len": 64.2447, + "eval_loss": 0.2430116832256317, + "eval_rouge1": 93.9967, + "eval_rouge2": 90.463, + "eval_rougeL": 93.817, + "eval_rougeLsum": 93.8046, + "eval_runtime": 3075.4099, + "eval_samples_per_second": 0.975, + "eval_steps_per_second": 0.244, + "step": 80000 + }, + { + "epoch": 2.25, + "learning_rate": 7.54873089328285e-06, + "loss": 0.147, + "step": 80050 + }, + { + "epoch": 2.25, + "learning_rate": 7.534707614640303e-06, + "loss": 0.1501, + "step": 80100 + }, + { + "epoch": 2.25, + "learning_rate": 7.520684335997758e-06, + "loss": 0.1215, + "step": 80150 + }, + { + "epoch": 2.25, + "learning_rate": 7.50666105735521e-06, + "loss": 0.1323, + "step": 80200 + }, + { + "epoch": 2.25, + "learning_rate": 7.4926377787126635e-06, + "loss": 0.1307, + "step": 80250 + }, + { + "epoch": 2.25, + "learning_rate": 7.478614500070117e-06, + "loss": 0.1369, + "step": 80300 + }, + { + "epoch": 2.25, + "learning_rate": 7.46459122142757e-06, + "loss": 0.1502, + "step": 80350 + }, + { + "epoch": 2.25, + "learning_rate": 7.450567942785024e-06, + "loss": 0.14, + "step": 80400 + }, + { + "epoch": 2.26, + "learning_rate": 7.436544664142477e-06, + "loss": 0.1411, + "step": 80450 + }, + { + "epoch": 2.26, + "learning_rate": 7.42252138549993e-06, + "loss": 0.1401, + "step": 80500 + }, + { + "epoch": 2.26, + "learning_rate": 7.408498106857384e-06, + "loss": 0.1713, + "step": 80550 + }, + { + "epoch": 2.26, + "learning_rate": 7.394474828214837e-06, + "loss": 0.1678, + "step": 80600 + }, + { + "epoch": 2.26, + "learning_rate": 7.38045154957229e-06, + "loss": 0.1203, + "step": 80650 + }, + { + "epoch": 2.26, + "learning_rate": 7.366428270929744e-06, + "loss": 0.1635, + "step": 80700 + }, + { + "epoch": 2.26, + "learning_rate": 7.352404992287197e-06, + "loss": 0.1318, + "step": 80750 + }, + { + "epoch": 2.27, + "learning_rate": 7.3383817136446505e-06, + "loss": 0.1292, + "step": 80800 + }, + { + "epoch": 2.27, + "learning_rate": 7.324358435002104e-06, + "loss": 0.1433, + "step": 80850 + }, + { + "epoch": 2.27, + "learning_rate": 7.310335156359557e-06, + "loss": 0.1572, + "step": 80900 + }, + { + "epoch": 2.27, + "learning_rate": 7.2963118777170105e-06, + "loss": 0.1338, + "step": 80950 + }, + { + "epoch": 2.27, + "learning_rate": 7.282288599074464e-06, + "loss": 0.147, + "step": 81000 + }, + { + "epoch": 2.27, + "eval_bleu": 96.1115, + "eval_gen_len": 64.2613, + "eval_loss": 0.24394148588180542, + "eval_rouge1": 93.9741, + "eval_rouge2": 90.4331, + "eval_rougeL": 93.775, + "eval_rougeLsum": 93.7668, + "eval_runtime": 3119.4921, + "eval_samples_per_second": 0.962, + "eval_steps_per_second": 0.24, + "step": 81000 + }, + { + "epoch": 2.27, + "learning_rate": 7.268265320431917e-06, + "loss": 0.1329, + "step": 81050 + }, + { + "epoch": 2.27, + "learning_rate": 7.254242041789371e-06, + "loss": 0.1416, + "step": 81100 + }, + { + "epoch": 2.28, + "learning_rate": 7.240218763146824e-06, + "loss": 0.1443, + "step": 81150 + }, + { + "epoch": 2.28, + "learning_rate": 7.226195484504277e-06, + "loss": 0.1519, + "step": 81200 + }, + { + "epoch": 2.28, + "learning_rate": 7.212172205861731e-06, + "loss": 0.1501, + "step": 81250 + }, + { + "epoch": 2.28, + "learning_rate": 7.198148927219184e-06, + "loss": 0.1465, + "step": 81300 + }, + { + "epoch": 2.28, + "learning_rate": 7.184125648576637e-06, + "loss": 0.1639, + "step": 81350 + }, + { + "epoch": 2.28, + "learning_rate": 7.170102369934091e-06, + "loss": 0.1335, + "step": 81400 + }, + { + "epoch": 2.28, + "learning_rate": 7.156079091291544e-06, + "loss": 0.1588, + "step": 81450 + }, + { + "epoch": 2.29, + "learning_rate": 7.142055812648998e-06, + "loss": 0.1513, + "step": 81500 + }, + { + "epoch": 2.29, + "learning_rate": 7.128032534006451e-06, + "loss": 0.1523, + "step": 81550 + }, + { + "epoch": 2.29, + "learning_rate": 7.114009255363905e-06, + "loss": 0.1477, + "step": 81600 + }, + { + "epoch": 2.29, + "learning_rate": 7.0999859767213576e-06, + "loss": 0.1269, + "step": 81650 + }, + { + "epoch": 2.29, + "learning_rate": 7.085962698078812e-06, + "loss": 0.1501, + "step": 81700 + }, + { + "epoch": 2.29, + "learning_rate": 7.071939419436264e-06, + "loss": 0.1421, + "step": 81750 + }, + { + "epoch": 2.29, + "learning_rate": 7.0579161407937185e-06, + "loss": 0.151, + "step": 81800 + }, + { + "epoch": 2.3, + "learning_rate": 7.043892862151171e-06, + "loss": 0.1383, + "step": 81850 + }, + { + "epoch": 2.3, + "learning_rate": 7.029869583508625e-06, + "loss": 0.142, + "step": 81900 + }, + { + "epoch": 2.3, + "learning_rate": 7.015846304866078e-06, + "loss": 0.1574, + "step": 81950 + }, + { + "epoch": 2.3, + "learning_rate": 7.001823026223532e-06, + "loss": 0.1601, + "step": 82000 + }, + { + "epoch": 2.3, + "eval_bleu": 96.0955, + "eval_gen_len": 64.251, + "eval_loss": 0.24331849813461304, + "eval_rouge1": 93.9796, + "eval_rouge2": 90.4277, + "eval_rougeL": 93.7849, + "eval_rougeLsum": 93.7712, + "eval_runtime": 3157.1975, + "eval_samples_per_second": 0.95, + "eval_steps_per_second": 0.238, + "step": 82000 + }, + { + "epoch": 2.3, + "learning_rate": 6.987799747580984e-06, + "loss": 0.1371, + "step": 82050 + }, + { + "epoch": 2.3, + "learning_rate": 6.973776468938439e-06, + "loss": 0.1392, + "step": 82100 + }, + { + "epoch": 2.3, + "learning_rate": 6.959753190295891e-06, + "loss": 0.1309, + "step": 82150 + }, + { + "epoch": 2.31, + "learning_rate": 6.945729911653345e-06, + "loss": 0.1614, + "step": 82200 + }, + { + "epoch": 2.31, + "learning_rate": 6.931706633010798e-06, + "loss": 0.1377, + "step": 82250 + }, + { + "epoch": 2.31, + "learning_rate": 6.917683354368251e-06, + "loss": 0.1473, + "step": 82300 + }, + { + "epoch": 2.31, + "learning_rate": 6.9036600757257046e-06, + "loss": 0.145, + "step": 82350 + }, + { + "epoch": 2.31, + "learning_rate": 6.889636797083158e-06, + "loss": 0.1601, + "step": 82400 + }, + { + "epoch": 2.31, + "learning_rate": 6.875613518440611e-06, + "loss": 0.1367, + "step": 82450 + }, + { + "epoch": 2.31, + "learning_rate": 6.861590239798065e-06, + "loss": 0.1492, + "step": 82500 + }, + { + "epoch": 2.32, + "learning_rate": 6.847566961155518e-06, + "loss": 0.1403, + "step": 82550 + }, + { + "epoch": 2.32, + "learning_rate": 6.833543682512971e-06, + "loss": 0.1419, + "step": 82600 + }, + { + "epoch": 2.32, + "learning_rate": 6.819520403870425e-06, + "loss": 0.1287, + "step": 82650 + }, + { + "epoch": 2.32, + "learning_rate": 6.805497125227878e-06, + "loss": 0.14, + "step": 82700 + }, + { + "epoch": 2.32, + "learning_rate": 6.7914738465853314e-06, + "loss": 0.1348, + "step": 82750 + }, + { + "epoch": 2.32, + "learning_rate": 6.777450567942785e-06, + "loss": 0.1375, + "step": 82800 + }, + { + "epoch": 2.32, + "learning_rate": 6.763427289300239e-06, + "loss": 0.1324, + "step": 82850 + }, + { + "epoch": 2.33, + "learning_rate": 6.7494040106576915e-06, + "loss": 0.1366, + "step": 82900 + }, + { + "epoch": 2.33, + "learning_rate": 6.735380732015146e-06, + "loss": 0.1333, + "step": 82950 + }, + { + "epoch": 2.33, + "learning_rate": 6.721357453372598e-06, + "loss": 0.1389, + "step": 83000 + }, + { + "epoch": 2.33, + "eval_bleu": 96.1354, + "eval_gen_len": 64.263, + "eval_loss": 0.24297164380550385, + "eval_rouge1": 93.9976, + "eval_rouge2": 90.4522, + "eval_rougeL": 93.8079, + "eval_rougeLsum": 93.7903, + "eval_runtime": 3153.994, + "eval_samples_per_second": 0.951, + "eval_steps_per_second": 0.238, + "step": 83000 + }, + { + "epoch": 2.33, + "learning_rate": 6.707334174730052e-06, + "loss": 0.1571, + "step": 83050 + }, + { + "epoch": 2.33, + "learning_rate": 6.693310896087505e-06, + "loss": 0.1383, + "step": 83100 + }, + { + "epoch": 2.33, + "learning_rate": 6.679287617444959e-06, + "loss": 0.1524, + "step": 83150 + }, + { + "epoch": 2.33, + "learning_rate": 6.665264338802412e-06, + "loss": 0.1568, + "step": 83200 + }, + { + "epoch": 2.33, + "learning_rate": 6.651241060159866e-06, + "loss": 0.1226, + "step": 83250 + }, + { + "epoch": 2.34, + "learning_rate": 6.637217781517318e-06, + "loss": 0.1259, + "step": 83300 + }, + { + "epoch": 2.34, + "learning_rate": 6.6231945028747726e-06, + "loss": 0.1466, + "step": 83350 + }, + { + "epoch": 2.34, + "learning_rate": 6.609171224232225e-06, + "loss": 0.1464, + "step": 83400 + }, + { + "epoch": 2.34, + "learning_rate": 6.595147945589679e-06, + "loss": 0.1355, + "step": 83450 + }, + { + "epoch": 2.34, + "learning_rate": 6.581124666947132e-06, + "loss": 0.1387, + "step": 83500 + }, + { + "epoch": 2.34, + "learning_rate": 6.567101388304586e-06, + "loss": 0.1477, + "step": 83550 + }, + { + "epoch": 2.34, + "learning_rate": 6.5530781096620385e-06, + "loss": 0.1507, + "step": 83600 + }, + { + "epoch": 2.35, + "learning_rate": 6.539054831019493e-06, + "loss": 0.1477, + "step": 83650 + }, + { + "epoch": 2.35, + "learning_rate": 6.525031552376945e-06, + "loss": 0.1483, + "step": 83700 + }, + { + "epoch": 2.35, + "learning_rate": 6.5110082737343994e-06, + "loss": 0.1524, + "step": 83750 + }, + { + "epoch": 2.35, + "learning_rate": 6.496984995091852e-06, + "loss": 0.1522, + "step": 83800 + }, + { + "epoch": 2.35, + "learning_rate": 6.482961716449306e-06, + "loss": 0.1347, + "step": 83850 + }, + { + "epoch": 2.35, + "learning_rate": 6.468938437806759e-06, + "loss": 0.1562, + "step": 83900 + }, + { + "epoch": 2.35, + "learning_rate": 6.454915159164213e-06, + "loss": 0.1554, + "step": 83950 + }, + { + "epoch": 2.36, + "learning_rate": 6.440891880521665e-06, + "loss": 0.1416, + "step": 84000 + }, + { + "epoch": 2.36, + "eval_bleu": 96.1053, + "eval_gen_len": 64.255, + "eval_loss": 0.2427954524755478, + "eval_rouge1": 93.9492, + "eval_rouge2": 90.4232, + "eval_rougeL": 93.7656, + "eval_rougeLsum": 93.7482, + "eval_runtime": 3082.0924, + "eval_samples_per_second": 0.973, + "eval_steps_per_second": 0.243, + "step": 84000 + }, + { + "epoch": 2.36, + "learning_rate": 6.4268686018791196e-06, + "loss": 0.1348, + "step": 84050 + }, + { + "epoch": 2.36, + "learning_rate": 6.412845323236572e-06, + "loss": 0.1689, + "step": 84100 + }, + { + "epoch": 2.36, + "learning_rate": 6.398822044594026e-06, + "loss": 0.139, + "step": 84150 + }, + { + "epoch": 2.36, + "learning_rate": 6.384798765951479e-06, + "loss": 0.1409, + "step": 84200 + }, + { + "epoch": 2.36, + "learning_rate": 6.370775487308933e-06, + "loss": 0.1619, + "step": 84250 + }, + { + "epoch": 2.36, + "learning_rate": 6.356752208666386e-06, + "loss": 0.1342, + "step": 84300 + }, + { + "epoch": 2.37, + "learning_rate": 6.34272893002384e-06, + "loss": 0.1418, + "step": 84350 + }, + { + "epoch": 2.37, + "learning_rate": 6.328705651381293e-06, + "loss": 0.1504, + "step": 84400 + }, + { + "epoch": 2.37, + "learning_rate": 6.3146823727387464e-06, + "loss": 0.1529, + "step": 84450 + }, + { + "epoch": 2.37, + "learning_rate": 6.3006590940962e-06, + "loss": 0.1363, + "step": 84500 + }, + { + "epoch": 2.37, + "learning_rate": 6.286635815453653e-06, + "loss": 0.1557, + "step": 84550 + }, + { + "epoch": 2.37, + "learning_rate": 6.2726125368111065e-06, + "loss": 0.14, + "step": 84600 + }, + { + "epoch": 2.37, + "learning_rate": 6.25858925816856e-06, + "loss": 0.139, + "step": 84650 + }, + { + "epoch": 2.38, + "learning_rate": 6.244565979526013e-06, + "loss": 0.1357, + "step": 84700 + }, + { + "epoch": 2.38, + "learning_rate": 6.230542700883467e-06, + "loss": 0.154, + "step": 84750 + }, + { + "epoch": 2.38, + "learning_rate": 6.21651942224092e-06, + "loss": 0.1294, + "step": 84800 + }, + { + "epoch": 2.38, + "learning_rate": 6.202496143598373e-06, + "loss": 0.1392, + "step": 84850 + }, + { + "epoch": 2.38, + "learning_rate": 6.188472864955827e-06, + "loss": 0.1495, + "step": 84900 + }, + { + "epoch": 2.38, + "learning_rate": 6.17444958631328e-06, + "loss": 0.147, + "step": 84950 + }, + { + "epoch": 2.38, + "learning_rate": 6.160426307670733e-06, + "loss": 0.1411, + "step": 85000 + }, + { + "epoch": 2.38, + "eval_bleu": 96.1115, + "eval_gen_len": 64.2543, + "eval_loss": 0.24300958216190338, + "eval_rouge1": 93.9688, + "eval_rouge2": 90.4201, + "eval_rougeL": 93.7772, + "eval_rougeLsum": 93.7569, + "eval_runtime": 3128.8052, + "eval_samples_per_second": 0.959, + "eval_steps_per_second": 0.24, + "step": 85000 + }, + { + "epoch": 2.39, + "learning_rate": 6.146403029028187e-06, + "loss": 0.1596, + "step": 85050 + }, + { + "epoch": 2.39, + "learning_rate": 6.13237975038564e-06, + "loss": 0.1376, + "step": 85100 + }, + { + "epoch": 2.39, + "learning_rate": 6.1183564717430934e-06, + "loss": 0.1409, + "step": 85150 + }, + { + "epoch": 2.39, + "learning_rate": 6.104333193100547e-06, + "loss": 0.1576, + "step": 85200 + }, + { + "epoch": 2.39, + "learning_rate": 6.090309914458e-06, + "loss": 0.1442, + "step": 85250 + }, + { + "epoch": 2.39, + "learning_rate": 6.0762866358154535e-06, + "loss": 0.1338, + "step": 85300 + }, + { + "epoch": 2.39, + "learning_rate": 6.062263357172907e-06, + "loss": 0.1561, + "step": 85350 + }, + { + "epoch": 2.4, + "learning_rate": 6.04824007853036e-06, + "loss": 0.1302, + "step": 85400 + }, + { + "epoch": 2.4, + "learning_rate": 6.034216799887814e-06, + "loss": 0.1793, + "step": 85450 + }, + { + "epoch": 2.4, + "learning_rate": 6.020193521245267e-06, + "loss": 0.1602, + "step": 85500 + }, + { + "epoch": 2.4, + "learning_rate": 6.00617024260272e-06, + "loss": 0.143, + "step": 85550 + }, + { + "epoch": 2.4, + "learning_rate": 5.992146963960174e-06, + "loss": 0.1253, + "step": 85600 + }, + { + "epoch": 2.4, + "learning_rate": 5.978123685317627e-06, + "loss": 0.141, + "step": 85650 + }, + { + "epoch": 2.4, + "learning_rate": 5.96410040667508e-06, + "loss": 0.1298, + "step": 85700 + }, + { + "epoch": 2.4, + "learning_rate": 5.950077128032535e-06, + "loss": 0.1495, + "step": 85750 + }, + { + "epoch": 2.41, + "learning_rate": 5.936053849389987e-06, + "loss": 0.1397, + "step": 85800 + }, + { + "epoch": 2.41, + "learning_rate": 5.922030570747441e-06, + "loss": 0.118, + "step": 85850 + }, + { + "epoch": 2.41, + "learning_rate": 5.908007292104894e-06, + "loss": 0.1418, + "step": 85900 + }, + { + "epoch": 2.41, + "learning_rate": 5.893984013462348e-06, + "loss": 0.1633, + "step": 85950 + }, + { + "epoch": 2.41, + "learning_rate": 5.8799607348198005e-06, + "loss": 0.1351, + "step": 86000 + }, + { + "epoch": 2.41, + "eval_bleu": 96.1109, + "eval_gen_len": 64.2517, + "eval_loss": 0.24320924282073975, + "eval_rouge1": 93.9905, + "eval_rouge2": 90.4711, + "eval_rougeL": 93.7884, + "eval_rougeLsum": 93.7735, + "eval_runtime": 3131.4139, + "eval_samples_per_second": 0.958, + "eval_steps_per_second": 0.24, + "step": 86000 + }, + { + "epoch": 2.41, + "learning_rate": 5.865937456177255e-06, + "loss": 0.1365, + "step": 86050 + }, + { + "epoch": 2.41, + "learning_rate": 5.851914177534707e-06, + "loss": 0.1397, + "step": 86100 + }, + { + "epoch": 2.42, + "learning_rate": 5.8378908988921614e-06, + "loss": 0.1393, + "step": 86150 + }, + { + "epoch": 2.42, + "learning_rate": 5.823867620249614e-06, + "loss": 0.1381, + "step": 86200 + }, + { + "epoch": 2.42, + "learning_rate": 5.809844341607068e-06, + "loss": 0.1503, + "step": 86250 + }, + { + "epoch": 2.42, + "learning_rate": 5.795821062964521e-06, + "loss": 0.1352, + "step": 86300 + }, + { + "epoch": 2.42, + "learning_rate": 5.781797784321975e-06, + "loss": 0.1371, + "step": 86350 + }, + { + "epoch": 2.42, + "learning_rate": 5.767774505679427e-06, + "loss": 0.1468, + "step": 86400 + }, + { + "epoch": 2.42, + "learning_rate": 5.753751227036882e-06, + "loss": 0.1402, + "step": 86450 + }, + { + "epoch": 2.43, + "learning_rate": 5.739727948394334e-06, + "loss": 0.1472, + "step": 86500 + }, + { + "epoch": 2.43, + "learning_rate": 5.725704669751788e-06, + "loss": 0.1517, + "step": 86550 + }, + { + "epoch": 2.43, + "learning_rate": 5.711681391109241e-06, + "loss": 0.1518, + "step": 86600 + }, + { + "epoch": 2.43, + "learning_rate": 5.697658112466695e-06, + "loss": 0.1366, + "step": 86650 + }, + { + "epoch": 2.43, + "learning_rate": 5.6836348338241475e-06, + "loss": 0.1325, + "step": 86700 + }, + { + "epoch": 2.43, + "learning_rate": 5.669611555181602e-06, + "loss": 0.1415, + "step": 86750 + }, + { + "epoch": 2.43, + "learning_rate": 5.655588276539054e-06, + "loss": 0.155, + "step": 86800 + }, + { + "epoch": 2.44, + "learning_rate": 5.6415649978965084e-06, + "loss": 0.1388, + "step": 86850 + }, + { + "epoch": 2.44, + "learning_rate": 5.627541719253961e-06, + "loss": 0.1539, + "step": 86900 + }, + { + "epoch": 2.44, + "learning_rate": 5.613518440611415e-06, + "loss": 0.1373, + "step": 86950 + }, + { + "epoch": 2.44, + "learning_rate": 5.599495161968868e-06, + "loss": 0.1272, + "step": 87000 + }, + { + "epoch": 2.44, + "eval_bleu": 96.107, + "eval_gen_len": 64.2437, + "eval_loss": 0.2423429638147354, + "eval_rouge1": 93.9715, + "eval_rouge2": 90.4552, + "eval_rougeL": 93.783, + "eval_rougeLsum": 93.7701, + "eval_runtime": 3108.876, + "eval_samples_per_second": 0.965, + "eval_steps_per_second": 0.241, + "step": 87000 + }, + { + "epoch": 2.44, + "learning_rate": 5.585471883326322e-06, + "loss": 0.1392, + "step": 87050 + }, + { + "epoch": 2.44, + "learning_rate": 5.571448604683775e-06, + "loss": 0.1521, + "step": 87100 + }, + { + "epoch": 2.44, + "learning_rate": 5.557425326041229e-06, + "loss": 0.1498, + "step": 87150 + }, + { + "epoch": 2.45, + "learning_rate": 5.543402047398682e-06, + "loss": 0.1395, + "step": 87200 + }, + { + "epoch": 2.45, + "learning_rate": 5.529378768756135e-06, + "loss": 0.1411, + "step": 87250 + }, + { + "epoch": 2.45, + "learning_rate": 5.515355490113589e-06, + "loss": 0.141, + "step": 87300 + }, + { + "epoch": 2.45, + "learning_rate": 5.501332211471042e-06, + "loss": 0.1477, + "step": 87350 + }, + { + "epoch": 2.45, + "learning_rate": 5.487308932828495e-06, + "loss": 0.1756, + "step": 87400 + }, + { + "epoch": 2.45, + "learning_rate": 5.473285654185949e-06, + "loss": 0.1696, + "step": 87450 + }, + { + "epoch": 2.45, + "learning_rate": 5.459262375543402e-06, + "loss": 0.146, + "step": 87500 + }, + { + "epoch": 2.46, + "learning_rate": 5.4452390969008555e-06, + "loss": 0.1241, + "step": 87550 + }, + { + "epoch": 2.46, + "learning_rate": 5.431215818258309e-06, + "loss": 0.147, + "step": 87600 + }, + { + "epoch": 2.46, + "learning_rate": 5.417192539615762e-06, + "loss": 0.1288, + "step": 87650 + }, + { + "epoch": 2.46, + "learning_rate": 5.4031692609732155e-06, + "loss": 0.14, + "step": 87700 + }, + { + "epoch": 2.46, + "learning_rate": 5.389145982330669e-06, + "loss": 0.1577, + "step": 87750 + }, + { + "epoch": 2.46, + "learning_rate": 5.375122703688122e-06, + "loss": 0.1302, + "step": 87800 + }, + { + "epoch": 2.46, + "learning_rate": 5.361099425045576e-06, + "loss": 0.1343, + "step": 87850 + }, + { + "epoch": 2.47, + "learning_rate": 5.347076146403029e-06, + "loss": 0.1218, + "step": 87900 + }, + { + "epoch": 2.47, + "learning_rate": 5.333052867760482e-06, + "loss": 0.1592, + "step": 87950 + }, + { + "epoch": 2.47, + "learning_rate": 5.319029589117936e-06, + "loss": 0.1501, + "step": 88000 + }, + { + "epoch": 2.47, + "eval_bleu": 96.0986, + "eval_gen_len": 64.2647, + "eval_loss": 0.2428046017885208, + "eval_rouge1": 93.9542, + "eval_rouge2": 90.3776, + "eval_rougeL": 93.7639, + "eval_rougeLsum": 93.7476, + "eval_runtime": 3098.6862, + "eval_samples_per_second": 0.968, + "eval_steps_per_second": 0.242, + "step": 88000 + }, + { + "epoch": 2.47, + "learning_rate": 5.305006310475389e-06, + "loss": 0.1199, + "step": 88050 + }, + { + "epoch": 2.47, + "learning_rate": 5.290983031832842e-06, + "loss": 0.1412, + "step": 88100 + }, + { + "epoch": 2.47, + "learning_rate": 5.276959753190296e-06, + "loss": 0.1489, + "step": 88150 + }, + { + "epoch": 2.47, + "learning_rate": 5.262936474547749e-06, + "loss": 0.1319, + "step": 88200 + }, + { + "epoch": 2.48, + "learning_rate": 5.2489131959052025e-06, + "loss": 0.1308, + "step": 88250 + }, + { + "epoch": 2.48, + "learning_rate": 5.234889917262656e-06, + "loss": 0.116, + "step": 88300 + }, + { + "epoch": 2.48, + "learning_rate": 5.220866638620109e-06, + "loss": 0.1496, + "step": 88350 + }, + { + "epoch": 2.48, + "learning_rate": 5.2068433599775625e-06, + "loss": 0.1404, + "step": 88400 + }, + { + "epoch": 2.48, + "learning_rate": 5.192820081335016e-06, + "loss": 0.1378, + "step": 88450 + }, + { + "epoch": 2.48, + "learning_rate": 5.178796802692469e-06, + "loss": 0.1391, + "step": 88500 + }, + { + "epoch": 2.48, + "learning_rate": 5.1647735240499235e-06, + "loss": 0.1307, + "step": 88550 + }, + { + "epoch": 2.48, + "learning_rate": 5.150750245407376e-06, + "loss": 0.1454, + "step": 88600 + }, + { + "epoch": 2.49, + "learning_rate": 5.13672696676483e-06, + "loss": 0.1421, + "step": 88650 + }, + { + "epoch": 2.49, + "learning_rate": 5.122703688122283e-06, + "loss": 0.1483, + "step": 88700 + }, + { + "epoch": 2.49, + "learning_rate": 5.108680409479737e-06, + "loss": 0.1311, + "step": 88750 + }, + { + "epoch": 2.49, + "learning_rate": 5.094657130837189e-06, + "loss": 0.1403, + "step": 88800 + }, + { + "epoch": 2.49, + "learning_rate": 5.080633852194644e-06, + "loss": 0.137, + "step": 88850 + }, + { + "epoch": 2.49, + "learning_rate": 5.066610573552096e-06, + "loss": 0.1562, + "step": 88900 + }, + { + "epoch": 2.49, + "learning_rate": 5.05258729490955e-06, + "loss": 0.1512, + "step": 88950 + }, + { + "epoch": 2.5, + "learning_rate": 5.038564016267003e-06, + "loss": 0.1537, + "step": 89000 + }, + { + "epoch": 2.5, + "eval_bleu": 96.0983, + "eval_gen_len": 64.2437, + "eval_loss": 0.2420242875814438, + "eval_rouge1": 93.9781, + "eval_rouge2": 90.4393, + "eval_rougeL": 93.7904, + "eval_rougeLsum": 93.78, + "eval_runtime": 3102.7126, + "eval_samples_per_second": 0.967, + "eval_steps_per_second": 0.242, + "step": 89000 + }, + { + "epoch": 2.5, + "learning_rate": 5.024540737624457e-06, + "loss": 0.1406, + "step": 89050 + }, + { + "epoch": 2.5, + "learning_rate": 5.0105174589819095e-06, + "loss": 0.1533, + "step": 89100 + }, + { + "epoch": 2.5, + "learning_rate": 4.996494180339364e-06, + "loss": 0.1394, + "step": 89150 + }, + { + "epoch": 2.5, + "learning_rate": 4.982470901696816e-06, + "loss": 0.1335, + "step": 89200 + }, + { + "epoch": 2.5, + "learning_rate": 4.9684476230542705e-06, + "loss": 0.126, + "step": 89250 + }, + { + "epoch": 2.5, + "learning_rate": 4.954424344411723e-06, + "loss": 0.1383, + "step": 89300 + }, + { + "epoch": 2.51, + "learning_rate": 4.940401065769177e-06, + "loss": 0.1488, + "step": 89350 + }, + { + "epoch": 2.51, + "learning_rate": 4.92637778712663e-06, + "loss": 0.151, + "step": 89400 + }, + { + "epoch": 2.51, + "learning_rate": 4.912354508484084e-06, + "loss": 0.137, + "step": 89450 + }, + { + "epoch": 2.51, + "learning_rate": 4.898331229841536e-06, + "loss": 0.1622, + "step": 89500 + }, + { + "epoch": 2.51, + "learning_rate": 4.884307951198991e-06, + "loss": 0.1332, + "step": 89550 + }, + { + "epoch": 2.51, + "learning_rate": 4.870284672556443e-06, + "loss": 0.159, + "step": 89600 + }, + { + "epoch": 2.51, + "learning_rate": 4.856261393913897e-06, + "loss": 0.1437, + "step": 89650 + }, + { + "epoch": 2.52, + "learning_rate": 4.84223811527135e-06, + "loss": 0.1521, + "step": 89700 + }, + { + "epoch": 2.52, + "learning_rate": 4.828214836628804e-06, + "loss": 0.1497, + "step": 89750 + }, + { + "epoch": 2.52, + "learning_rate": 4.8141915579862566e-06, + "loss": 0.1333, + "step": 89800 + }, + { + "epoch": 2.52, + "learning_rate": 4.800168279343711e-06, + "loss": 0.1464, + "step": 89850 + }, + { + "epoch": 2.52, + "learning_rate": 4.786145000701163e-06, + "loss": 0.1362, + "step": 89900 + }, + { + "epoch": 2.52, + "learning_rate": 4.7721217220586175e-06, + "loss": 0.1379, + "step": 89950 + }, + { + "epoch": 2.52, + "learning_rate": 4.758098443416071e-06, + "loss": 0.1404, + "step": 90000 + }, + { + "epoch": 2.52, + "eval_bleu": 96.1002, + "eval_gen_len": 64.232, + "eval_loss": 0.2419024407863617, + "eval_rouge1": 94.0091, + "eval_rouge2": 90.4435, + "eval_rougeL": 93.8185, + "eval_rougeLsum": 93.8034, + "eval_runtime": 3103.5189, + "eval_samples_per_second": 0.967, + "eval_steps_per_second": 0.242, + "step": 90000 + }, + { + "epoch": 2.53, + "learning_rate": 4.744075164773524e-06, + "loss": 0.1659, + "step": 90050 + }, + { + "epoch": 2.53, + "learning_rate": 4.7300518861309775e-06, + "loss": 0.1377, + "step": 90100 + }, + { + "epoch": 2.53, + "learning_rate": 4.716028607488431e-06, + "loss": 0.1398, + "step": 90150 + }, + { + "epoch": 2.53, + "learning_rate": 4.702005328845884e-06, + "loss": 0.1551, + "step": 90200 + }, + { + "epoch": 2.53, + "learning_rate": 4.687982050203338e-06, + "loss": 0.1371, + "step": 90250 + }, + { + "epoch": 2.53, + "learning_rate": 4.673958771560791e-06, + "loss": 0.1332, + "step": 90300 + }, + { + "epoch": 2.53, + "learning_rate": 4.659935492918244e-06, + "loss": 0.1401, + "step": 90350 + }, + { + "epoch": 2.54, + "learning_rate": 4.645912214275698e-06, + "loss": 0.1191, + "step": 90400 + }, + { + "epoch": 2.54, + "learning_rate": 4.631888935633151e-06, + "loss": 0.1421, + "step": 90450 + }, + { + "epoch": 2.54, + "learning_rate": 4.617865656990604e-06, + "loss": 0.1362, + "step": 90500 + }, + { + "epoch": 2.54, + "learning_rate": 4.603842378348058e-06, + "loss": 0.1488, + "step": 90550 + }, + { + "epoch": 2.54, + "learning_rate": 4.589819099705511e-06, + "loss": 0.1591, + "step": 90600 + }, + { + "epoch": 2.54, + "learning_rate": 4.5757958210629645e-06, + "loss": 0.1331, + "step": 90650 + }, + { + "epoch": 2.54, + "learning_rate": 4.561772542420418e-06, + "loss": 0.1337, + "step": 90700 + }, + { + "epoch": 2.55, + "learning_rate": 4.547749263777871e-06, + "loss": 0.1487, + "step": 90750 + }, + { + "epoch": 2.55, + "learning_rate": 4.5337259851353246e-06, + "loss": 0.1401, + "step": 90800 + }, + { + "epoch": 2.55, + "learning_rate": 4.519702706492778e-06, + "loss": 0.1403, + "step": 90850 + }, + { + "epoch": 2.55, + "learning_rate": 4.505679427850231e-06, + "loss": 0.141, + "step": 90900 + }, + { + "epoch": 2.55, + "learning_rate": 4.491656149207685e-06, + "loss": 0.1422, + "step": 90950 + }, + { + "epoch": 2.55, + "learning_rate": 4.477632870565138e-06, + "loss": 0.1415, + "step": 91000 + }, + { + "epoch": 2.55, + "eval_bleu": 96.1065, + "eval_gen_len": 64.2577, + "eval_loss": 0.24246443808078766, + "eval_rouge1": 94.0034, + "eval_rouge2": 90.453, + "eval_rougeL": 93.8071, + "eval_rougeLsum": 93.7875, + "eval_runtime": 3093.3898, + "eval_samples_per_second": 0.97, + "eval_steps_per_second": 0.242, + "step": 91000 + }, + { + "epoch": 2.55, + "learning_rate": 4.463609591922591e-06, + "loss": 0.1411, + "step": 91050 + }, + { + "epoch": 2.56, + "learning_rate": 4.449586313280045e-06, + "loss": 0.1617, + "step": 91100 + }, + { + "epoch": 2.56, + "learning_rate": 4.435563034637498e-06, + "loss": 0.1657, + "step": 91150 + }, + { + "epoch": 2.56, + "learning_rate": 4.421539755994951e-06, + "loss": 0.1337, + "step": 91200 + }, + { + "epoch": 2.56, + "learning_rate": 4.407516477352405e-06, + "loss": 0.1298, + "step": 91250 + }, + { + "epoch": 2.56, + "learning_rate": 4.393493198709858e-06, + "loss": 0.1551, + "step": 91300 + }, + { + "epoch": 2.56, + "learning_rate": 4.3794699200673115e-06, + "loss": 0.1551, + "step": 91350 + }, + { + "epoch": 2.56, + "learning_rate": 4.365446641424765e-06, + "loss": 0.1484, + "step": 91400 + }, + { + "epoch": 2.56, + "learning_rate": 4.351423362782219e-06, + "loss": 0.1345, + "step": 91450 + }, + { + "epoch": 2.57, + "learning_rate": 4.3374000841396716e-06, + "loss": 0.1532, + "step": 91500 + }, + { + "epoch": 2.57, + "learning_rate": 4.323376805497126e-06, + "loss": 0.1451, + "step": 91550 + }, + { + "epoch": 2.57, + "learning_rate": 4.309353526854578e-06, + "loss": 0.1437, + "step": 91600 + }, + { + "epoch": 2.57, + "learning_rate": 4.2953302482120325e-06, + "loss": 0.1572, + "step": 91650 + }, + { + "epoch": 2.57, + "learning_rate": 4.281306969569485e-06, + "loss": 0.1351, + "step": 91700 + }, + { + "epoch": 2.57, + "learning_rate": 4.267283690926939e-06, + "loss": 0.134, + "step": 91750 + }, + { + "epoch": 2.57, + "learning_rate": 4.253260412284392e-06, + "loss": 0.1358, + "step": 91800 + }, + { + "epoch": 2.58, + "learning_rate": 4.239237133641846e-06, + "loss": 0.138, + "step": 91850 + }, + { + "epoch": 2.58, + "learning_rate": 4.225213854999298e-06, + "loss": 0.1377, + "step": 91900 + }, + { + "epoch": 2.58, + "learning_rate": 4.211190576356753e-06, + "loss": 0.1473, + "step": 91950 + }, + { + "epoch": 2.58, + "learning_rate": 4.197167297714205e-06, + "loss": 0.1288, + "step": 92000 + }, + { + "epoch": 2.58, + "eval_bleu": 96.1036, + "eval_gen_len": 64.2483, + "eval_loss": 0.24192480742931366, + "eval_rouge1": 93.9846, + "eval_rouge2": 90.4605, + "eval_rougeL": 93.8057, + "eval_rougeLsum": 93.7865, + "eval_runtime": 3083.7598, + "eval_samples_per_second": 0.973, + "eval_steps_per_second": 0.243, + "step": 92000 + }, + { + "epoch": 2.58, + "learning_rate": 4.183144019071659e-06, + "loss": 0.1448, + "step": 92050 + }, + { + "epoch": 2.58, + "learning_rate": 4.169120740429112e-06, + "loss": 0.1436, + "step": 92100 + }, + { + "epoch": 2.58, + "learning_rate": 4.155097461786566e-06, + "loss": 0.1567, + "step": 92150 + }, + { + "epoch": 2.59, + "learning_rate": 4.1410741831440186e-06, + "loss": 0.1441, + "step": 92200 + }, + { + "epoch": 2.59, + "learning_rate": 4.127050904501473e-06, + "loss": 0.1365, + "step": 92250 + }, + { + "epoch": 2.59, + "learning_rate": 4.113027625858925e-06, + "loss": 0.1466, + "step": 92300 + }, + { + "epoch": 2.59, + "learning_rate": 4.0990043472163795e-06, + "loss": 0.1398, + "step": 92350 + }, + { + "epoch": 2.59, + "learning_rate": 4.084981068573832e-06, + "loss": 0.1404, + "step": 92400 + }, + { + "epoch": 2.59, + "learning_rate": 4.070957789931286e-06, + "loss": 0.149, + "step": 92450 + }, + { + "epoch": 2.59, + "learning_rate": 4.056934511288739e-06, + "loss": 0.1322, + "step": 92500 + }, + { + "epoch": 2.6, + "learning_rate": 4.042911232646193e-06, + "loss": 0.1344, + "step": 92550 + }, + { + "epoch": 2.6, + "learning_rate": 4.0288879540036454e-06, + "loss": 0.1471, + "step": 92600 + }, + { + "epoch": 2.6, + "learning_rate": 4.0148646753611e-06, + "loss": 0.1535, + "step": 92650 + }, + { + "epoch": 2.6, + "learning_rate": 4.000841396718552e-06, + "loss": 0.1403, + "step": 92700 + }, + { + "epoch": 2.6, + "learning_rate": 3.986818118076006e-06, + "loss": 0.1265, + "step": 92750 + }, + { + "epoch": 2.6, + "learning_rate": 3.972794839433459e-06, + "loss": 0.1376, + "step": 92800 + }, + { + "epoch": 2.6, + "learning_rate": 3.958771560790913e-06, + "loss": 0.1421, + "step": 92850 + }, + { + "epoch": 2.61, + "learning_rate": 3.944748282148366e-06, + "loss": 0.1379, + "step": 92900 + }, + { + "epoch": 2.61, + "learning_rate": 3.93072500350582e-06, + "loss": 0.1493, + "step": 92950 + }, + { + "epoch": 2.61, + "learning_rate": 3.916701724863273e-06, + "loss": 0.1392, + "step": 93000 + }, + { + "epoch": 2.61, + "eval_bleu": 96.1104, + "eval_gen_len": 64.2603, + "eval_loss": 0.24193215370178223, + "eval_rouge1": 94.0378, + "eval_rouge2": 90.4824, + "eval_rougeL": 93.8304, + "eval_rougeLsum": 93.8203, + "eval_runtime": 3103.7806, + "eval_samples_per_second": 0.967, + "eval_steps_per_second": 0.242, + "step": 93000 + }, + { + "epoch": 2.61, + "learning_rate": 3.9026784462207265e-06, + "loss": 0.1626, + "step": 93050 + }, + { + "epoch": 2.61, + "learning_rate": 3.88865516757818e-06, + "loss": 0.1596, + "step": 93100 + }, + { + "epoch": 2.61, + "learning_rate": 3.874631888935633e-06, + "loss": 0.1582, + "step": 93150 + }, + { + "epoch": 2.61, + "learning_rate": 3.8606086102930866e-06, + "loss": 0.1369, + "step": 93200 + }, + { + "epoch": 2.62, + "learning_rate": 3.84658533165054e-06, + "loss": 0.1522, + "step": 93250 + }, + { + "epoch": 2.62, + "learning_rate": 3.832562053007993e-06, + "loss": 0.1698, + "step": 93300 + }, + { + "epoch": 2.62, + "learning_rate": 3.818538774365447e-06, + "loss": 0.1352, + "step": 93350 + }, + { + "epoch": 2.62, + "learning_rate": 3.8045154957228996e-06, + "loss": 0.1307, + "step": 93400 + }, + { + "epoch": 2.62, + "learning_rate": 3.7904922170803534e-06, + "loss": 0.1264, + "step": 93450 + }, + { + "epoch": 2.62, + "learning_rate": 3.7764689384378063e-06, + "loss": 0.1512, + "step": 93500 + }, + { + "epoch": 2.62, + "learning_rate": 3.7624456597952605e-06, + "loss": 0.1284, + "step": 93550 + }, + { + "epoch": 2.63, + "learning_rate": 3.748422381152714e-06, + "loss": 0.1413, + "step": 93600 + }, + { + "epoch": 2.63, + "learning_rate": 3.734399102510167e-06, + "loss": 0.1374, + "step": 93650 + }, + { + "epoch": 2.63, + "learning_rate": 3.7203758238676206e-06, + "loss": 0.144, + "step": 93700 + }, + { + "epoch": 2.63, + "learning_rate": 3.706352545225074e-06, + "loss": 0.1633, + "step": 93750 + }, + { + "epoch": 2.63, + "learning_rate": 3.6923292665825273e-06, + "loss": 0.1542, + "step": 93800 + }, + { + "epoch": 2.63, + "learning_rate": 3.6783059879399806e-06, + "loss": 0.1286, + "step": 93850 + }, + { + "epoch": 2.63, + "learning_rate": 3.664282709297434e-06, + "loss": 0.1526, + "step": 93900 + }, + { + "epoch": 2.63, + "learning_rate": 3.6502594306548874e-06, + "loss": 0.1431, + "step": 93950 + }, + { + "epoch": 2.64, + "learning_rate": 3.6362361520123407e-06, + "loss": 0.141, + "step": 94000 + }, + { + "epoch": 2.64, + "eval_bleu": 96.1146, + "eval_gen_len": 64.2413, + "eval_loss": 0.24184368550777435, + "eval_rouge1": 94.0178, + "eval_rouge2": 90.4805, + "eval_rougeL": 93.8232, + "eval_rougeLsum": 93.8125, + "eval_runtime": 3081.3777, + "eval_samples_per_second": 0.974, + "eval_steps_per_second": 0.243, + "step": 94000 + }, + { + "epoch": 2.64, + "learning_rate": 3.622212873369794e-06, + "loss": 0.1481, + "step": 94050 + }, + { + "epoch": 2.64, + "learning_rate": 3.6081895947272474e-06, + "loss": 0.1359, + "step": 94100 + }, + { + "epoch": 2.64, + "learning_rate": 3.5941663160847008e-06, + "loss": 0.1494, + "step": 94150 + }, + { + "epoch": 2.64, + "learning_rate": 3.580143037442154e-06, + "loss": 0.1372, + "step": 94200 + }, + { + "epoch": 2.64, + "learning_rate": 3.5661197587996075e-06, + "loss": 0.1474, + "step": 94250 + }, + { + "epoch": 2.64, + "learning_rate": 3.552096480157061e-06, + "loss": 0.1232, + "step": 94300 + }, + { + "epoch": 2.65, + "learning_rate": 3.5380732015145142e-06, + "loss": 0.1355, + "step": 94350 + }, + { + "epoch": 2.65, + "learning_rate": 3.5240499228719676e-06, + "loss": 0.1243, + "step": 94400 + }, + { + "epoch": 2.65, + "learning_rate": 3.510026644229421e-06, + "loss": 0.1414, + "step": 94450 + }, + { + "epoch": 2.65, + "learning_rate": 3.4960033655868743e-06, + "loss": 0.1344, + "step": 94500 + }, + { + "epoch": 2.65, + "learning_rate": 3.4819800869443276e-06, + "loss": 0.1251, + "step": 94550 + }, + { + "epoch": 2.65, + "learning_rate": 3.467956808301781e-06, + "loss": 0.1469, + "step": 94600 + }, + { + "epoch": 2.65, + "learning_rate": 3.4539335296592344e-06, + "loss": 0.1671, + "step": 94650 + }, + { + "epoch": 2.66, + "learning_rate": 3.4399102510166877e-06, + "loss": 0.1343, + "step": 94700 + }, + { + "epoch": 2.66, + "learning_rate": 3.425886972374141e-06, + "loss": 0.1493, + "step": 94750 + }, + { + "epoch": 2.66, + "learning_rate": 3.4118636937315944e-06, + "loss": 0.1354, + "step": 94800 + }, + { + "epoch": 2.66, + "learning_rate": 3.397840415089048e-06, + "loss": 0.1397, + "step": 94850 + }, + { + "epoch": 2.66, + "learning_rate": 3.383817136446501e-06, + "loss": 0.1304, + "step": 94900 + }, + { + "epoch": 2.66, + "learning_rate": 3.3697938578039545e-06, + "loss": 0.1198, + "step": 94950 + }, + { + "epoch": 2.66, + "learning_rate": 3.3557705791614083e-06, + "loss": 0.1372, + "step": 95000 + }, + { + "epoch": 2.66, + "eval_bleu": 96.1034, + "eval_gen_len": 64.2807, + "eval_loss": 0.24183472990989685, + "eval_rouge1": 93.9826, + "eval_rouge2": 90.409, + "eval_rougeL": 93.7782, + "eval_rougeLsum": 93.7723, + "eval_runtime": 3083.1749, + "eval_samples_per_second": 0.973, + "eval_steps_per_second": 0.243, + "step": 95000 + }, + { + "epoch": 2.67, + "learning_rate": 3.3417473005188616e-06, + "loss": 0.1558, + "step": 95050 + }, + { + "epoch": 2.67, + "learning_rate": 3.327724021876315e-06, + "loss": 0.1282, + "step": 95100 + }, + { + "epoch": 2.67, + "learning_rate": 3.3137007432337684e-06, + "loss": 0.1595, + "step": 95150 + }, + { + "epoch": 2.67, + "learning_rate": 3.2996774645912217e-06, + "loss": 0.1403, + "step": 95200 + }, + { + "epoch": 2.67, + "learning_rate": 3.285654185948675e-06, + "loss": 0.1429, + "step": 95250 + }, + { + "epoch": 2.67, + "learning_rate": 3.2716309073061284e-06, + "loss": 0.1364, + "step": 95300 + }, + { + "epoch": 2.67, + "learning_rate": 3.257607628663582e-06, + "loss": 0.1406, + "step": 95350 + }, + { + "epoch": 2.68, + "learning_rate": 3.243584350021035e-06, + "loss": 0.1495, + "step": 95400 + }, + { + "epoch": 2.68, + "learning_rate": 3.2295610713784885e-06, + "loss": 0.1608, + "step": 95450 + }, + { + "epoch": 2.68, + "learning_rate": 3.215537792735942e-06, + "loss": 0.1503, + "step": 95500 + }, + { + "epoch": 2.68, + "learning_rate": 3.2015145140933952e-06, + "loss": 0.1371, + "step": 95550 + }, + { + "epoch": 2.68, + "learning_rate": 3.1874912354508486e-06, + "loss": 0.1465, + "step": 95600 + }, + { + "epoch": 2.68, + "learning_rate": 3.173467956808302e-06, + "loss": 0.1321, + "step": 95650 + }, + { + "epoch": 2.68, + "learning_rate": 3.1594446781657553e-06, + "loss": 0.1382, + "step": 95700 + }, + { + "epoch": 2.69, + "learning_rate": 3.1454213995232087e-06, + "loss": 0.1441, + "step": 95750 + }, + { + "epoch": 2.69, + "learning_rate": 3.131398120880662e-06, + "loss": 0.1558, + "step": 95800 + }, + { + "epoch": 2.69, + "learning_rate": 3.1173748422381154e-06, + "loss": 0.1395, + "step": 95850 + }, + { + "epoch": 2.69, + "learning_rate": 3.1033515635955687e-06, + "loss": 0.1308, + "step": 95900 + }, + { + "epoch": 2.69, + "learning_rate": 3.089328284953022e-06, + "loss": 0.1322, + "step": 95950 + }, + { + "epoch": 2.69, + "learning_rate": 3.0753050063104754e-06, + "loss": 0.1299, + "step": 96000 + }, + { + "epoch": 2.69, + "eval_bleu": 96.1016, + "eval_gen_len": 64.2387, + "eval_loss": 0.24173256754875183, + "eval_rouge1": 93.9954, + "eval_rouge2": 90.4732, + "eval_rougeL": 93.8053, + "eval_rougeLsum": 93.7909, + "eval_runtime": 3082.3137, + "eval_samples_per_second": 0.973, + "eval_steps_per_second": 0.243, + "step": 96000 + }, + { + "epoch": 2.69, + "learning_rate": 3.061281727667929e-06, + "loss": 0.1424, + "step": 96050 + }, + { + "epoch": 2.7, + "learning_rate": 3.047258449025382e-06, + "loss": 0.1332, + "step": 96100 + }, + { + "epoch": 2.7, + "learning_rate": 3.0332351703828355e-06, + "loss": 0.1298, + "step": 96150 + }, + { + "epoch": 2.7, + "learning_rate": 3.019211891740289e-06, + "loss": 0.1383, + "step": 96200 + }, + { + "epoch": 2.7, + "learning_rate": 3.0051886130977422e-06, + "loss": 0.1434, + "step": 96250 + }, + { + "epoch": 2.7, + "learning_rate": 2.9911653344551956e-06, + "loss": 0.1406, + "step": 96300 + }, + { + "epoch": 2.7, + "learning_rate": 2.977142055812649e-06, + "loss": 0.1343, + "step": 96350 + }, + { + "epoch": 2.7, + "learning_rate": 2.9631187771701023e-06, + "loss": 0.1335, + "step": 96400 + }, + { + "epoch": 2.71, + "learning_rate": 2.949095498527556e-06, + "loss": 0.1457, + "step": 96450 + }, + { + "epoch": 2.71, + "learning_rate": 2.9350722198850094e-06, + "loss": 0.1458, + "step": 96500 + }, + { + "epoch": 2.71, + "learning_rate": 2.921048941242463e-06, + "loss": 0.1494, + "step": 96550 + }, + { + "epoch": 2.71, + "learning_rate": 2.907025662599916e-06, + "loss": 0.1453, + "step": 96600 + }, + { + "epoch": 2.71, + "learning_rate": 2.8930023839573695e-06, + "loss": 0.1452, + "step": 96650 + }, + { + "epoch": 2.71, + "learning_rate": 2.878979105314823e-06, + "loss": 0.1607, + "step": 96700 + }, + { + "epoch": 2.71, + "learning_rate": 2.8649558266722762e-06, + "loss": 0.149, + "step": 96750 + }, + { + "epoch": 2.71, + "learning_rate": 2.8509325480297296e-06, + "loss": 0.1419, + "step": 96800 + }, + { + "epoch": 2.72, + "learning_rate": 2.836909269387183e-06, + "loss": 0.1308, + "step": 96850 + }, + { + "epoch": 2.72, + "learning_rate": 2.8228859907446363e-06, + "loss": 0.143, + "step": 96900 + }, + { + "epoch": 2.72, + "learning_rate": 2.8088627121020897e-06, + "loss": 0.134, + "step": 96950 + }, + { + "epoch": 2.72, + "learning_rate": 2.794839433459543e-06, + "loss": 0.1375, + "step": 97000 + }, + { + "epoch": 2.72, + "eval_bleu": 96.1201, + "eval_gen_len": 64.2793, + "eval_loss": 0.24178442358970642, + "eval_rouge1": 93.9882, + "eval_rouge2": 90.4529, + "eval_rougeL": 93.7934, + "eval_rougeLsum": 93.7813, + "eval_runtime": 3072.8346, + "eval_samples_per_second": 0.976, + "eval_steps_per_second": 0.244, + "step": 97000 + }, + { + "epoch": 2.72, + "learning_rate": 2.7808161548169964e-06, + "loss": 0.1406, + "step": 97050 + }, + { + "epoch": 2.72, + "learning_rate": 2.7667928761744497e-06, + "loss": 0.1449, + "step": 97100 + }, + { + "epoch": 2.72, + "learning_rate": 2.752769597531903e-06, + "loss": 0.137, + "step": 97150 + }, + { + "epoch": 2.73, + "learning_rate": 2.7387463188893565e-06, + "loss": 0.1525, + "step": 97200 + }, + { + "epoch": 2.73, + "learning_rate": 2.72472304024681e-06, + "loss": 0.1456, + "step": 97250 + }, + { + "epoch": 2.73, + "learning_rate": 2.710699761604263e-06, + "loss": 0.1472, + "step": 97300 + }, + { + "epoch": 2.73, + "learning_rate": 2.6966764829617165e-06, + "loss": 0.1447, + "step": 97350 + }, + { + "epoch": 2.73, + "learning_rate": 2.68265320431917e-06, + "loss": 0.1359, + "step": 97400 + }, + { + "epoch": 2.73, + "learning_rate": 2.6686299256766232e-06, + "loss": 0.1593, + "step": 97450 + }, + { + "epoch": 2.73, + "learning_rate": 2.6546066470340766e-06, + "loss": 0.136, + "step": 97500 + }, + { + "epoch": 2.74, + "learning_rate": 2.64058336839153e-06, + "loss": 0.1496, + "step": 97550 + }, + { + "epoch": 2.74, + "learning_rate": 2.6265600897489833e-06, + "loss": 0.1401, + "step": 97600 + }, + { + "epoch": 2.74, + "learning_rate": 2.6125368111064367e-06, + "loss": 0.1427, + "step": 97650 + }, + { + "epoch": 2.74, + "learning_rate": 2.59851353246389e-06, + "loss": 0.1495, + "step": 97700 + }, + { + "epoch": 2.74, + "learning_rate": 2.5844902538213434e-06, + "loss": 0.1276, + "step": 97750 + }, + { + "epoch": 2.74, + "learning_rate": 2.5704669751787967e-06, + "loss": 0.1434, + "step": 97800 + }, + { + "epoch": 2.74, + "learning_rate": 2.5564436965362505e-06, + "loss": 0.13, + "step": 97850 + }, + { + "epoch": 2.75, + "learning_rate": 2.542420417893704e-06, + "loss": 0.1309, + "step": 97900 + }, + { + "epoch": 2.75, + "learning_rate": 2.5283971392511572e-06, + "loss": 0.1659, + "step": 97950 + }, + { + "epoch": 2.75, + "learning_rate": 2.5143738606086106e-06, + "loss": 0.1282, + "step": 98000 + }, + { + "epoch": 2.75, + "eval_bleu": 96.1244, + "eval_gen_len": 64.2753, + "eval_loss": 0.24166151881217957, + "eval_rouge1": 93.9974, + "eval_rouge2": 90.4493, + "eval_rougeL": 93.8008, + "eval_rougeLsum": 93.7908, + "eval_runtime": 3089.0272, + "eval_samples_per_second": 0.971, + "eval_steps_per_second": 0.243, + "step": 98000 + }, + { + "epoch": 2.75, + "learning_rate": 2.500350581966064e-06, + "loss": 0.1406, + "step": 98050 + }, + { + "epoch": 2.75, + "learning_rate": 2.4863273033235173e-06, + "loss": 0.1402, + "step": 98100 + }, + { + "epoch": 2.75, + "learning_rate": 2.4723040246809707e-06, + "loss": 0.1543, + "step": 98150 + }, + { + "epoch": 2.75, + "learning_rate": 2.458280746038424e-06, + "loss": 0.1434, + "step": 98200 + }, + { + "epoch": 2.76, + "learning_rate": 2.4442574673958774e-06, + "loss": 0.1338, + "step": 98250 + }, + { + "epoch": 2.76, + "learning_rate": 2.4302341887533307e-06, + "loss": 0.1202, + "step": 98300 + }, + { + "epoch": 2.76, + "learning_rate": 2.416210910110784e-06, + "loss": 0.1641, + "step": 98350 + }, + { + "epoch": 2.76, + "learning_rate": 2.4021876314682375e-06, + "loss": 0.1365, + "step": 98400 + }, + { + "epoch": 2.76, + "learning_rate": 2.388164352825691e-06, + "loss": 0.1366, + "step": 98450 + }, + { + "epoch": 2.76, + "learning_rate": 2.374141074183144e-06, + "loss": 0.1485, + "step": 98500 + }, + { + "epoch": 2.76, + "learning_rate": 2.3601177955405975e-06, + "loss": 0.1366, + "step": 98550 + }, + { + "epoch": 2.77, + "learning_rate": 2.346094516898051e-06, + "loss": 0.1644, + "step": 98600 + }, + { + "epoch": 2.77, + "learning_rate": 2.3320712382555042e-06, + "loss": 0.1359, + "step": 98650 + }, + { + "epoch": 2.77, + "learning_rate": 2.3180479596129576e-06, + "loss": 0.1452, + "step": 98700 + }, + { + "epoch": 2.77, + "learning_rate": 2.304024680970411e-06, + "loss": 0.1391, + "step": 98750 + }, + { + "epoch": 2.77, + "learning_rate": 2.2900014023278643e-06, + "loss": 0.1375, + "step": 98800 + }, + { + "epoch": 2.77, + "learning_rate": 2.2759781236853177e-06, + "loss": 0.1517, + "step": 98850 + }, + { + "epoch": 2.77, + "learning_rate": 2.261954845042771e-06, + "loss": 0.149, + "step": 98900 + }, + { + "epoch": 2.78, + "learning_rate": 2.2479315664002244e-06, + "loss": 0.1239, + "step": 98950 + }, + { + "epoch": 2.78, + "learning_rate": 2.2339082877576778e-06, + "loss": 0.1374, + "step": 99000 + }, + { + "epoch": 2.78, + "eval_bleu": 96.1272, + "eval_gen_len": 64.262, + "eval_loss": 0.24160811305046082, + "eval_rouge1": 93.9828, + "eval_rouge2": 90.4481, + "eval_rougeL": 93.7941, + "eval_rougeLsum": 93.7877, + "eval_runtime": 3114.2761, + "eval_samples_per_second": 0.963, + "eval_steps_per_second": 0.241, + "step": 99000 + }, + { + "epoch": 2.78, + "learning_rate": 2.219885009115131e-06, + "loss": 0.1295, + "step": 99050 + }, + { + "epoch": 2.78, + "learning_rate": 2.2058617304725845e-06, + "loss": 0.1544, + "step": 99100 + }, + { + "epoch": 2.78, + "learning_rate": 2.191838451830038e-06, + "loss": 0.1506, + "step": 99150 + }, + { + "epoch": 2.78, + "learning_rate": 2.177815173187491e-06, + "loss": 0.1497, + "step": 99200 + }, + { + "epoch": 2.78, + "learning_rate": 2.1637918945449445e-06, + "loss": 0.1591, + "step": 99250 + }, + { + "epoch": 2.79, + "learning_rate": 2.1497686159023983e-06, + "loss": 0.1467, + "step": 99300 + }, + { + "epoch": 2.79, + "learning_rate": 2.1357453372598517e-06, + "loss": 0.131, + "step": 99350 + }, + { + "epoch": 2.79, + "learning_rate": 2.121722058617305e-06, + "loss": 0.1436, + "step": 99400 + }, + { + "epoch": 2.79, + "learning_rate": 2.1076987799747584e-06, + "loss": 0.1561, + "step": 99450 + }, + { + "epoch": 2.79, + "learning_rate": 2.0936755013322118e-06, + "loss": 0.1469, + "step": 99500 + }, + { + "epoch": 2.79, + "learning_rate": 2.079652222689665e-06, + "loss": 0.1375, + "step": 99550 + }, + { + "epoch": 2.79, + "learning_rate": 2.0656289440471185e-06, + "loss": 0.1539, + "step": 99600 + }, + { + "epoch": 2.79, + "learning_rate": 2.051605665404572e-06, + "loss": 0.1314, + "step": 99650 + }, + { + "epoch": 2.8, + "learning_rate": 2.037582386762025e-06, + "loss": 0.1277, + "step": 99700 + }, + { + "epoch": 2.8, + "learning_rate": 2.0235591081194785e-06, + "loss": 0.1627, + "step": 99750 + }, + { + "epoch": 2.8, + "learning_rate": 2.009535829476932e-06, + "loss": 0.139, + "step": 99800 + }, + { + "epoch": 2.8, + "learning_rate": 1.9955125508343853e-06, + "loss": 0.137, + "step": 99850 + }, + { + "epoch": 2.8, + "learning_rate": 1.9814892721918386e-06, + "loss": 0.125, + "step": 99900 + }, + { + "epoch": 2.8, + "learning_rate": 1.967465993549292e-06, + "loss": 0.1275, + "step": 99950 + }, + { + "epoch": 2.8, + "learning_rate": 1.9534427149067453e-06, + "loss": 0.1327, + "step": 100000 + }, + { + "epoch": 2.8, + "eval_bleu": 96.1239, + "eval_gen_len": 64.253, + "eval_loss": 0.2416970431804657, + "eval_rouge1": 94.0048, + "eval_rouge2": 90.4634, + "eval_rougeL": 93.8114, + "eval_rougeLsum": 93.8025, + "eval_runtime": 3102.0331, + "eval_samples_per_second": 0.967, + "eval_steps_per_second": 0.242, + "step": 100000 + }, + { + "epoch": 2.81, + "learning_rate": 1.9394194362641987e-06, + "loss": 0.1341, + "step": 100050 + }, + { + "epoch": 2.81, + "learning_rate": 1.925396157621652e-06, + "loss": 0.13, + "step": 100100 + }, + { + "epoch": 2.81, + "learning_rate": 1.9113728789791054e-06, + "loss": 0.1335, + "step": 100150 + }, + { + "epoch": 2.81, + "learning_rate": 1.8973496003365588e-06, + "loss": 0.1611, + "step": 100200 + }, + { + "epoch": 2.81, + "learning_rate": 1.8833263216940121e-06, + "loss": 0.1226, + "step": 100250 + }, + { + "epoch": 2.81, + "learning_rate": 1.8693030430514655e-06, + "loss": 0.1349, + "step": 100300 + }, + { + "epoch": 2.81, + "learning_rate": 1.855279764408919e-06, + "loss": 0.1462, + "step": 100350 + }, + { + "epoch": 2.82, + "learning_rate": 1.8412564857663724e-06, + "loss": 0.1378, + "step": 100400 + }, + { + "epoch": 2.82, + "learning_rate": 1.8272332071238258e-06, + "loss": 0.1636, + "step": 100450 + }, + { + "epoch": 2.82, + "learning_rate": 1.8132099284812791e-06, + "loss": 0.1499, + "step": 100500 + }, + { + "epoch": 2.82, + "learning_rate": 1.7991866498387325e-06, + "loss": 0.1489, + "step": 100550 + }, + { + "epoch": 2.82, + "learning_rate": 1.7851633711961858e-06, + "loss": 0.1582, + "step": 100600 + }, + { + "epoch": 2.82, + "learning_rate": 1.7711400925536392e-06, + "loss": 0.1305, + "step": 100650 + }, + { + "epoch": 2.82, + "learning_rate": 1.7571168139110925e-06, + "loss": 0.1596, + "step": 100700 + }, + { + "epoch": 2.83, + "learning_rate": 1.743093535268546e-06, + "loss": 0.1275, + "step": 100750 + }, + { + "epoch": 2.83, + "learning_rate": 1.7290702566259993e-06, + "loss": 0.1398, + "step": 100800 + }, + { + "epoch": 2.83, + "learning_rate": 1.7150469779834526e-06, + "loss": 0.1348, + "step": 100850 + }, + { + "epoch": 2.83, + "learning_rate": 1.701023699340906e-06, + "loss": 0.147, + "step": 100900 + }, + { + "epoch": 2.83, + "learning_rate": 1.6870004206983593e-06, + "loss": 0.1471, + "step": 100950 + }, + { + "epoch": 2.83, + "learning_rate": 1.6729771420558127e-06, + "loss": 0.1433, + "step": 101000 + }, + { + "epoch": 2.83, + "eval_bleu": 96.1231, + "eval_gen_len": 64.2653, + "eval_loss": 0.24147701263427734, + "eval_rouge1": 93.9973, + "eval_rouge2": 90.4389, + "eval_rougeL": 93.8007, + "eval_rougeLsum": 93.7953, + "eval_runtime": 3076.5825, + "eval_samples_per_second": 0.975, + "eval_steps_per_second": 0.244, + "step": 101000 + }, + { + "epoch": 2.83, + "learning_rate": 1.6589538634132663e-06, + "loss": 0.1294, + "step": 101050 + }, + { + "epoch": 2.84, + "learning_rate": 1.6449305847707196e-06, + "loss": 0.1555, + "step": 101100 + }, + { + "epoch": 2.84, + "learning_rate": 1.630907306128173e-06, + "loss": 0.1633, + "step": 101150 + }, + { + "epoch": 2.84, + "learning_rate": 1.6168840274856263e-06, + "loss": 0.1383, + "step": 101200 + }, + { + "epoch": 2.84, + "learning_rate": 1.6028607488430797e-06, + "loss": 0.1397, + "step": 101250 + }, + { + "epoch": 2.84, + "learning_rate": 1.588837470200533e-06, + "loss": 0.1473, + "step": 101300 + }, + { + "epoch": 2.84, + "learning_rate": 1.5748141915579864e-06, + "loss": 0.1379, + "step": 101350 + }, + { + "epoch": 2.84, + "learning_rate": 1.5607909129154398e-06, + "loss": 0.1373, + "step": 101400 + }, + { + "epoch": 2.85, + "learning_rate": 1.5467676342728931e-06, + "loss": 0.1438, + "step": 101450 + }, + { + "epoch": 2.85, + "learning_rate": 1.5327443556303465e-06, + "loss": 0.146, + "step": 101500 + }, + { + "epoch": 2.85, + "learning_rate": 1.5187210769877998e-06, + "loss": 0.1479, + "step": 101550 + }, + { + "epoch": 2.85, + "learning_rate": 1.5046977983452532e-06, + "loss": 0.1447, + "step": 101600 + }, + { + "epoch": 2.85, + "learning_rate": 1.4906745197027066e-06, + "loss": 0.1461, + "step": 101650 + }, + { + "epoch": 2.85, + "learning_rate": 1.47665124106016e-06, + "loss": 0.1559, + "step": 101700 + }, + { + "epoch": 2.85, + "learning_rate": 1.4626279624176133e-06, + "loss": 0.162, + "step": 101750 + }, + { + "epoch": 2.86, + "learning_rate": 1.4486046837750668e-06, + "loss": 0.1347, + "step": 101800 + }, + { + "epoch": 2.86, + "learning_rate": 1.4345814051325202e-06, + "loss": 0.1394, + "step": 101850 + }, + { + "epoch": 2.86, + "learning_rate": 1.4205581264899736e-06, + "loss": 0.1429, + "step": 101900 + }, + { + "epoch": 2.86, + "learning_rate": 1.406534847847427e-06, + "loss": 0.1298, + "step": 101950 + }, + { + "epoch": 2.86, + "learning_rate": 1.3925115692048803e-06, + "loss": 0.1688, + "step": 102000 + }, + { + "epoch": 2.86, + "eval_bleu": 96.1287, + "eval_gen_len": 64.2707, + "eval_loss": 0.24163706600666046, + "eval_rouge1": 94.0131, + "eval_rouge2": 90.4767, + "eval_rougeL": 93.8162, + "eval_rougeLsum": 93.8057, + "eval_runtime": 3096.7851, + "eval_samples_per_second": 0.969, + "eval_steps_per_second": 0.242, + "step": 102000 + }, + { + "epoch": 2.86, + "learning_rate": 1.3784882905623336e-06, + "loss": 0.162, + "step": 102050 + }, + { + "epoch": 2.86, + "learning_rate": 1.364465011919787e-06, + "loss": 0.1441, + "step": 102100 + }, + { + "epoch": 2.86, + "learning_rate": 1.3504417332772403e-06, + "loss": 0.1396, + "step": 102150 + }, + { + "epoch": 2.87, + "learning_rate": 1.3364184546346937e-06, + "loss": 0.1423, + "step": 102200 + }, + { + "epoch": 2.87, + "learning_rate": 1.322395175992147e-06, + "loss": 0.1324, + "step": 102250 + }, + { + "epoch": 2.87, + "learning_rate": 1.3083718973496004e-06, + "loss": 0.16, + "step": 102300 + }, + { + "epoch": 2.87, + "learning_rate": 1.2943486187070538e-06, + "loss": 0.135, + "step": 102350 + }, + { + "epoch": 2.87, + "learning_rate": 1.2803253400645071e-06, + "loss": 0.1298, + "step": 102400 + }, + { + "epoch": 2.87, + "learning_rate": 1.2663020614219605e-06, + "loss": 0.1523, + "step": 102450 + }, + { + "epoch": 2.87, + "learning_rate": 1.252278782779414e-06, + "loss": 0.1286, + "step": 102500 + }, + { + "epoch": 2.88, + "learning_rate": 1.2382555041368674e-06, + "loss": 0.1439, + "step": 102550 + }, + { + "epoch": 2.88, + "learning_rate": 1.2242322254943208e-06, + "loss": 0.1283, + "step": 102600 + }, + { + "epoch": 2.88, + "learning_rate": 1.2102089468517741e-06, + "loss": 0.1216, + "step": 102650 + }, + { + "epoch": 2.88, + "learning_rate": 1.1961856682092275e-06, + "loss": 0.1485, + "step": 102700 + }, + { + "epoch": 2.88, + "learning_rate": 1.1821623895666808e-06, + "loss": 0.1455, + "step": 102750 + }, + { + "epoch": 2.88, + "learning_rate": 1.1681391109241342e-06, + "loss": 0.1384, + "step": 102800 + }, + { + "epoch": 2.88, + "learning_rate": 1.1541158322815876e-06, + "loss": 0.14, + "step": 102850 + }, + { + "epoch": 2.89, + "learning_rate": 1.1400925536390407e-06, + "loss": 0.1367, + "step": 102900 + }, + { + "epoch": 2.89, + "learning_rate": 1.126069274996494e-06, + "loss": 0.1558, + "step": 102950 + }, + { + "epoch": 2.89, + "learning_rate": 1.1120459963539474e-06, + "loss": 0.1377, + "step": 103000 + }, + { + "epoch": 2.89, + "eval_bleu": 96.1203, + "eval_gen_len": 64.258, + "eval_loss": 0.24138423800468445, + "eval_rouge1": 94.0261, + "eval_rouge2": 90.4778, + "eval_rougeL": 93.8237, + "eval_rougeLsum": 93.815, + "eval_runtime": 3149.2273, + "eval_samples_per_second": 0.953, + "eval_steps_per_second": 0.238, + "step": 103000 + }, + { + "epoch": 2.89, + "learning_rate": 1.0980227177114008e-06, + "loss": 0.1283, + "step": 103050 + }, + { + "epoch": 2.89, + "learning_rate": 1.0839994390688541e-06, + "loss": 0.1487, + "step": 103100 + }, + { + "epoch": 2.89, + "learning_rate": 1.0699761604263075e-06, + "loss": 0.144, + "step": 103150 + }, + { + "epoch": 2.89, + "learning_rate": 1.055952881783761e-06, + "loss": 0.1369, + "step": 103200 + }, + { + "epoch": 2.9, + "learning_rate": 1.0419296031412144e-06, + "loss": 0.1451, + "step": 103250 + }, + { + "epoch": 2.9, + "learning_rate": 1.0279063244986678e-06, + "loss": 0.1536, + "step": 103300 + }, + { + "epoch": 2.9, + "learning_rate": 1.0138830458561211e-06, + "loss": 0.1434, + "step": 103350 + }, + { + "epoch": 2.9, + "learning_rate": 9.998597672135745e-07, + "loss": 0.144, + "step": 103400 + }, + { + "epoch": 2.9, + "learning_rate": 9.858364885710279e-07, + "loss": 0.142, + "step": 103450 + }, + { + "epoch": 2.9, + "learning_rate": 9.718132099284812e-07, + "loss": 0.1209, + "step": 103500 + }, + { + "epoch": 2.9, + "learning_rate": 9.577899312859346e-07, + "loss": 0.127, + "step": 103550 + }, + { + "epoch": 2.91, + "learning_rate": 9.437666526433879e-07, + "loss": 0.1505, + "step": 103600 + }, + { + "epoch": 2.91, + "learning_rate": 9.297433740008414e-07, + "loss": 0.1377, + "step": 103650 + }, + { + "epoch": 2.91, + "learning_rate": 9.157200953582947e-07, + "loss": 0.1458, + "step": 103700 + }, + { + "epoch": 2.91, + "learning_rate": 9.016968167157482e-07, + "loss": 0.1445, + "step": 103750 + }, + { + "epoch": 2.91, + "learning_rate": 8.876735380732016e-07, + "loss": 0.1363, + "step": 103800 + }, + { + "epoch": 2.91, + "learning_rate": 8.736502594306549e-07, + "loss": 0.152, + "step": 103850 + }, + { + "epoch": 2.91, + "learning_rate": 8.596269807881083e-07, + "loss": 0.1565, + "step": 103900 + }, + { + "epoch": 2.92, + "learning_rate": 8.456037021455616e-07, + "loss": 0.142, + "step": 103950 + }, + { + "epoch": 2.92, + "learning_rate": 8.31580423503015e-07, + "loss": 0.1482, + "step": 104000 + }, + { + "epoch": 2.92, + "eval_bleu": 96.1208, + "eval_gen_len": 64.2503, + "eval_loss": 0.24138183891773224, + "eval_rouge1": 94.0225, + "eval_rouge2": 90.488, + "eval_rougeL": 93.8289, + "eval_rougeLsum": 93.8131, + "eval_runtime": 3071.8283, + "eval_samples_per_second": 0.977, + "eval_steps_per_second": 0.244, + "step": 104000 + }, + { + "epoch": 2.92, + "learning_rate": 8.175571448604684e-07, + "loss": 0.1504, + "step": 104050 + }, + { + "epoch": 2.92, + "learning_rate": 8.035338662179218e-07, + "loss": 0.1263, + "step": 104100 + }, + { + "epoch": 2.92, + "learning_rate": 7.895105875753752e-07, + "loss": 0.1346, + "step": 104150 + }, + { + "epoch": 2.92, + "learning_rate": 7.754873089328285e-07, + "loss": 0.1471, + "step": 104200 + }, + { + "epoch": 2.92, + "learning_rate": 7.614640302902819e-07, + "loss": 0.1356, + "step": 104250 + }, + { + "epoch": 2.93, + "learning_rate": 7.474407516477353e-07, + "loss": 0.1457, + "step": 104300 + }, + { + "epoch": 2.93, + "learning_rate": 7.334174730051886e-07, + "loss": 0.1458, + "step": 104350 + }, + { + "epoch": 2.93, + "learning_rate": 7.19394194362642e-07, + "loss": 0.1373, + "step": 104400 + }, + { + "epoch": 2.93, + "learning_rate": 7.053709157200954e-07, + "loss": 0.1362, + "step": 104450 + }, + { + "epoch": 2.93, + "learning_rate": 6.913476370775488e-07, + "loss": 0.1548, + "step": 104500 + }, + { + "epoch": 2.93, + "learning_rate": 6.773243584350021e-07, + "loss": 0.1398, + "step": 104550 + }, + { + "epoch": 2.93, + "learning_rate": 6.633010797924555e-07, + "loss": 0.1604, + "step": 104600 + }, + { + "epoch": 2.94, + "learning_rate": 6.492778011499089e-07, + "loss": 0.146, + "step": 104650 + }, + { + "epoch": 2.94, + "learning_rate": 6.352545225073622e-07, + "loss": 0.1272, + "step": 104700 + }, + { + "epoch": 2.94, + "learning_rate": 6.212312438648156e-07, + "loss": 0.1495, + "step": 104750 + }, + { + "epoch": 2.94, + "learning_rate": 6.07207965222269e-07, + "loss": 0.1408, + "step": 104800 + }, + { + "epoch": 2.94, + "learning_rate": 5.931846865797224e-07, + "loss": 0.1502, + "step": 104850 + }, + { + "epoch": 2.94, + "learning_rate": 5.791614079371758e-07, + "loss": 0.1471, + "step": 104900 + }, + { + "epoch": 2.94, + "learning_rate": 5.651381292946291e-07, + "loss": 0.16, + "step": 104950 + }, + { + "epoch": 2.94, + "learning_rate": 5.511148506520825e-07, + "loss": 0.1286, + "step": 105000 + }, + { + "epoch": 2.94, + "eval_bleu": 96.128, + "eval_gen_len": 64.2513, + "eval_loss": 0.24144940078258514, + "eval_rouge1": 94.0054, + "eval_rouge2": 90.4653, + "eval_rougeL": 93.8155, + "eval_rougeLsum": 93.8029, + "eval_runtime": 3085.2312, + "eval_samples_per_second": 0.972, + "eval_steps_per_second": 0.243, + "step": 105000 + }, + { + "epoch": 2.95, + "learning_rate": 5.370915720095358e-07, + "loss": 0.1509, + "step": 105050 + }, + { + "epoch": 2.95, + "learning_rate": 5.230682933669892e-07, + "loss": 0.1342, + "step": 105100 + }, + { + "epoch": 2.95, + "learning_rate": 5.090450147244427e-07, + "loss": 0.144, + "step": 105150 + }, + { + "epoch": 2.95, + "learning_rate": 4.95021736081896e-07, + "loss": 0.166, + "step": 105200 + }, + { + "epoch": 2.95, + "learning_rate": 4.809984574393494e-07, + "loss": 0.1405, + "step": 105250 + }, + { + "epoch": 2.95, + "learning_rate": 4.669751787968027e-07, + "loss": 0.1551, + "step": 105300 + }, + { + "epoch": 2.95, + "learning_rate": 4.529519001542561e-07, + "loss": 0.1417, + "step": 105350 + }, + { + "epoch": 2.96, + "learning_rate": 4.3892862151170944e-07, + "loss": 0.1233, + "step": 105400 + }, + { + "epoch": 2.96, + "learning_rate": 4.2490534286916285e-07, + "loss": 0.1406, + "step": 105450 + }, + { + "epoch": 2.96, + "learning_rate": 4.108820642266162e-07, + "loss": 0.1347, + "step": 105500 + }, + { + "epoch": 2.96, + "learning_rate": 3.9685878558406956e-07, + "loss": 0.1533, + "step": 105550 + }, + { + "epoch": 2.96, + "learning_rate": 3.82835506941523e-07, + "loss": 0.1462, + "step": 105600 + }, + { + "epoch": 2.96, + "learning_rate": 3.6881222829897633e-07, + "loss": 0.1404, + "step": 105650 + }, + { + "epoch": 2.96, + "learning_rate": 3.547889496564297e-07, + "loss": 0.1323, + "step": 105700 + }, + { + "epoch": 2.97, + "learning_rate": 3.4076567101388305e-07, + "loss": 0.1564, + "step": 105750 + }, + { + "epoch": 2.97, + "learning_rate": 3.2674239237133646e-07, + "loss": 0.1508, + "step": 105800 + }, + { + "epoch": 2.97, + "learning_rate": 3.127191137287898e-07, + "loss": 0.1467, + "step": 105850 + }, + { + "epoch": 2.97, + "learning_rate": 2.986958350862432e-07, + "loss": 0.1574, + "step": 105900 + }, + { + "epoch": 2.97, + "learning_rate": 2.846725564436966e-07, + "loss": 0.132, + "step": 105950 + }, + { + "epoch": 2.97, + "learning_rate": 2.7064927780114994e-07, + "loss": 0.1392, + "step": 106000 + }, + { + "epoch": 2.97, + "eval_bleu": 96.1228, + "eval_gen_len": 64.2473, + "eval_loss": 0.2414444237947464, + "eval_rouge1": 94.0048, + "eval_rouge2": 90.4742, + "eval_rougeL": 93.8171, + "eval_rougeLsum": 93.8047, + "eval_runtime": 3077.7276, + "eval_samples_per_second": 0.975, + "eval_steps_per_second": 0.244, + "step": 106000 + }, + { + "epoch": 2.97, + "learning_rate": 2.566259991586033e-07, + "loss": 0.1544, + "step": 106050 + }, + { + "epoch": 2.98, + "learning_rate": 2.4260272051605666e-07, + "loss": 0.1498, + "step": 106100 + }, + { + "epoch": 2.98, + "learning_rate": 2.2857944187351002e-07, + "loss": 0.1291, + "step": 106150 + }, + { + "epoch": 2.98, + "learning_rate": 2.145561632309634e-07, + "loss": 0.1411, + "step": 106200 + }, + { + "epoch": 2.98, + "learning_rate": 2.0053288458841676e-07, + "loss": 0.1445, + "step": 106250 + }, + { + "epoch": 2.98, + "learning_rate": 1.8650960594587014e-07, + "loss": 0.1499, + "step": 106300 + }, + { + "epoch": 2.98, + "learning_rate": 1.7248632730332353e-07, + "loss": 0.1442, + "step": 106350 + }, + { + "epoch": 2.98, + "learning_rate": 1.5846304866077688e-07, + "loss": 0.1387, + "step": 106400 + }, + { + "epoch": 2.99, + "learning_rate": 1.4443977001823027e-07, + "loss": 0.138, + "step": 106450 + }, + { + "epoch": 2.99, + "learning_rate": 1.3041649137568363e-07, + "loss": 0.1559, + "step": 106500 + }, + { + "epoch": 2.99, + "learning_rate": 1.1639321273313701e-07, + "loss": 0.1437, + "step": 106550 + }, + { + "epoch": 2.99, + "learning_rate": 1.023699340905904e-07, + "loss": 0.1597, + "step": 106600 + }, + { + "epoch": 2.99, + "learning_rate": 8.834665544804376e-08, + "loss": 0.1455, + "step": 106650 + }, + { + "epoch": 2.99, + "learning_rate": 7.432337680549714e-08, + "loss": 0.1505, + "step": 106700 + }, + { + "epoch": 2.99, + "learning_rate": 6.030009816295049e-08, + "loss": 0.1633, + "step": 106750 + }, + { + "epoch": 3.0, + "learning_rate": 4.627681952040387e-08, + "loss": 0.135, + "step": 106800 + }, + { + "epoch": 3.0, + "learning_rate": 3.225354087785724e-08, + "loss": 0.1483, + "step": 106850 + }, + { + "epoch": 3.0, + "learning_rate": 1.8230262235310616e-08, + "loss": 0.1375, + "step": 106900 + }, + { + "epoch": 3.0, + "learning_rate": 4.2069835927639876e-09, + "loss": 0.1489, + "step": 106950 + }, + { + "epoch": 3.0, + "step": 106965, + "total_flos": 7.416839339283579e+17, + "train_loss": 0.1832969891809344, + "train_runtime": 592063.0002, + "train_samples_per_second": 2.891, + "train_steps_per_second": 0.181 + } + ], + "max_steps": 106965, + "num_train_epochs": 3, + "total_flos": 7.416839339283579e+17, + "trial_name": null, + "trial_params": null +}