{ "best_metric": 2.237640380859375, "best_model_checkpoint": "/exports/eddie/scratch/s1970716/models/summarization/longt5_xl_summ_screen_bp_only_30/checkpoint-14", "epoch": 10.991304347826087, "eval_steps": 500, "global_step": 158, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 0.0005, "loss": 0.3076, "step": 2 }, { "epoch": 0.28, "learning_rate": 0.0005, "loss": 0.3217, "step": 4 }, { "epoch": 0.42, "learning_rate": 0.0005, "loss": 0.3519, "step": 6 }, { "epoch": 0.56, "learning_rate": 0.0005, "loss": 0.3449, "step": 8 }, { "epoch": 0.7, "learning_rate": 0.0005, "loss": 0.3197, "step": 10 }, { "epoch": 0.83, "learning_rate": 0.0005, "loss": 0.3342, "step": 12 }, { "epoch": 0.97, "learning_rate": 0.0005, "loss": 0.324, "step": 14 }, { "epoch": 0.97, "eval_gen_len": 246.7396449704142, "eval_loss": 2.237640380859375, "eval_rouge1": 40.4388, "eval_rouge2": 16.4662, "eval_rougeL": 28.0771, "eval_rougeLsum": 38.3405, "eval_runtime": 1709.2671, "eval_samples_per_second": 0.198, "eval_steps_per_second": 0.025, "step": 14 }, { "epoch": 1.11, "learning_rate": 0.0005, "loss": 0.2825, "step": 16 }, { "epoch": 1.25, "learning_rate": 0.0005, "loss": 0.2835, "step": 18 }, { "epoch": 1.39, "learning_rate": 0.0005, "loss": 0.2732, "step": 20 }, { "epoch": 1.53, "learning_rate": 0.0005, "loss": 0.2928, "step": 22 }, { "epoch": 1.67, "learning_rate": 0.0005, "loss": 0.2912, "step": 24 }, { "epoch": 1.81, "learning_rate": 0.0005, "loss": 0.2914, "step": 26 }, { "epoch": 1.95, "learning_rate": 0.0005, "loss": 0.2707, "step": 28 }, { "epoch": 1.95, "eval_gen_len": 307.3786982248521, "eval_loss": 2.320437431335449, "eval_rouge1": 40.2873, "eval_rouge2": 16.7641, "eval_rougeL": 27.3895, "eval_rougeLsum": 38.2689, "eval_runtime": 1775.692, "eval_samples_per_second": 0.19, "eval_steps_per_second": 0.024, "step": 28 }, { "epoch": 2.09, "learning_rate": 0.0005, "loss": 0.2342, "step": 30 }, { "epoch": 2.23, "learning_rate": 0.0005, "loss": 0.2326, "step": 32 }, { "epoch": 2.37, "learning_rate": 0.0005, "loss": 0.2334, "step": 34 }, { "epoch": 2.5, "learning_rate": 0.0005, "loss": 0.2212, "step": 36 }, { "epoch": 2.64, "learning_rate": 0.0005, "loss": 0.2161, "step": 38 }, { "epoch": 2.78, "learning_rate": 0.0005, "loss": 0.2236, "step": 40 }, { "epoch": 2.92, "learning_rate": 0.0005, "loss": 0.2217, "step": 42 }, { "epoch": 2.99, "eval_gen_len": 501.93195266272187, "eval_loss": 2.5281190872192383, "eval_rouge1": 31.9916, "eval_rouge2": 13.8136, "eval_rougeL": 22.1895, "eval_rougeLsum": 30.623, "eval_runtime": 1780.5368, "eval_samples_per_second": 0.19, "eval_steps_per_second": 0.024, "step": 43 }, { "epoch": 3.06, "learning_rate": 0.0005, "loss": 0.2055, "step": 44 }, { "epoch": 3.2, "learning_rate": 0.0005, "loss": 0.1943, "step": 46 }, { "epoch": 3.34, "learning_rate": 0.0005, "loss": 0.1875, "step": 48 }, { "epoch": 3.48, "learning_rate": 0.0005, "loss": 0.1909, "step": 50 }, { "epoch": 3.62, "learning_rate": 0.0005, "loss": 0.1881, "step": 52 }, { "epoch": 3.76, "learning_rate": 0.0005, "loss": 0.1541, "step": 54 }, { "epoch": 3.9, "learning_rate": 0.0005, "loss": 0.1776, "step": 56 }, { "epoch": 3.97, "eval_gen_len": 489.6183431952663, "eval_loss": 2.7530014514923096, "eval_rouge1": 31.7535, "eval_rouge2": 13.8852, "eval_rougeL": 22.8653, "eval_rougeLsum": 30.3796, "eval_runtime": 1780.5033, "eval_samples_per_second": 0.19, "eval_steps_per_second": 0.024, "step": 57 }, { "epoch": 4.03, "learning_rate": 0.0005, "loss": 0.1876, "step": 58 }, { "epoch": 4.17, "learning_rate": 0.0005, "loss": 0.1589, "step": 60 }, { "epoch": 4.31, "learning_rate": 0.0005, "loss": 0.1529, "step": 62 }, { "epoch": 4.45, "learning_rate": 0.0005, "loss": 0.1464, "step": 64 }, { "epoch": 4.59, "learning_rate": 0.0005, "loss": 0.1689, "step": 66 }, { "epoch": 4.73, "learning_rate": 0.0005, "loss": 0.1492, "step": 68 }, { "epoch": 4.87, "learning_rate": 0.0005, "loss": 0.1424, "step": 70 }, { "epoch": 4.94, "eval_gen_len": 502.11242603550295, "eval_loss": 2.657783031463623, "eval_rouge1": 32.117, "eval_rouge2": 14.2141, "eval_rougeL": 22.3733, "eval_rougeLsum": 30.8328, "eval_runtime": 1780.0926, "eval_samples_per_second": 0.19, "eval_steps_per_second": 0.024, "step": 71 }, { "epoch": 5.01, "learning_rate": 0.0005, "loss": 0.1494, "step": 72 }, { "epoch": 5.15, "learning_rate": 0.0005, "loss": 0.118, "step": 74 }, { "epoch": 5.29, "learning_rate": 0.0005, "loss": 0.1499, "step": 76 }, { "epoch": 5.43, "learning_rate": 0.0005, "loss": 0.1369, "step": 78 }, { "epoch": 5.57, "learning_rate": 0.0005, "loss": 0.1084, "step": 80 }, { "epoch": 5.7, "learning_rate": 0.0005, "loss": 0.117, "step": 82 }, { "epoch": 5.84, "learning_rate": 0.0005, "loss": 0.144, "step": 84 }, { "epoch": 5.98, "learning_rate": 0.0005, "loss": 0.1449, "step": 86 }, { "epoch": 5.98, "eval_gen_len": 357.31360946745565, "eval_loss": 2.5508346557617188, "eval_rouge1": 35.3448, "eval_rouge2": 13.8478, "eval_rougeL": 24.9044, "eval_rougeLsum": 33.6108, "eval_runtime": 1768.966, "eval_samples_per_second": 0.191, "eval_steps_per_second": 0.024, "step": 86 }, { "epoch": 6.12, "learning_rate": 0.0005, "loss": 0.1101, "step": 88 }, { "epoch": 6.26, "learning_rate": 0.0005, "loss": 0.0985, "step": 90 }, { "epoch": 6.4, "learning_rate": 0.0005, "loss": 0.1101, "step": 92 }, { "epoch": 6.54, "learning_rate": 0.0005, "loss": 0.1013, "step": 94 }, { "epoch": 6.68, "learning_rate": 0.0005, "loss": 0.1057, "step": 96 }, { "epoch": 6.82, "learning_rate": 0.0005, "loss": 0.1102, "step": 98 }, { "epoch": 6.96, "learning_rate": 0.0005, "loss": 0.1191, "step": 100 }, { "epoch": 6.96, "eval_gen_len": 408.86686390532543, "eval_loss": 3.162177324295044, "eval_rouge1": 37.2189, "eval_rouge2": 16.0076, "eval_rougeL": 25.7011, "eval_rougeLsum": 35.294, "eval_runtime": 1778.8704, "eval_samples_per_second": 0.19, "eval_steps_per_second": 0.024, "step": 100 }, { "epoch": 7.1, "learning_rate": 0.0005, "loss": 0.1, "step": 102 }, { "epoch": 7.23, "learning_rate": 0.0005, "loss": 0.0949, "step": 104 }, { "epoch": 7.37, "learning_rate": 0.0005, "loss": 0.0974, "step": 106 }, { "epoch": 7.51, "learning_rate": 0.0005, "loss": 0.096, "step": 108 }, { "epoch": 7.65, "learning_rate": 0.0005, "loss": 0.1023, "step": 110 }, { "epoch": 7.79, "learning_rate": 0.0005, "loss": 0.0829, "step": 112 }, { "epoch": 7.93, "learning_rate": 0.0005, "loss": 0.0879, "step": 114 }, { "epoch": 8.0, "eval_gen_len": 318.2278106508876, "eval_loss": 2.8510310649871826, "eval_rouge1": 39.8825, "eval_rouge2": 16.8073, "eval_rougeL": 27.2428, "eval_rougeLsum": 37.9568, "eval_runtime": 1776.4341, "eval_samples_per_second": 0.19, "eval_steps_per_second": 0.024, "step": 115 }, { "epoch": 8.07, "learning_rate": 0.0005, "loss": 0.0957, "step": 116 }, { "epoch": 8.21, "learning_rate": 0.0005, "loss": 0.0991, "step": 118 }, { "epoch": 8.35, "learning_rate": 0.0005, "loss": 0.095, "step": 120 }, { "epoch": 8.49, "learning_rate": 0.0005, "loss": 0.0999, "step": 122 }, { "epoch": 8.63, "learning_rate": 0.0005, "loss": 0.0787, "step": 124 }, { "epoch": 8.77, "learning_rate": 0.0005, "loss": 0.0716, "step": 126 }, { "epoch": 8.9, "learning_rate": 0.0005, "loss": 0.0899, "step": 128 }, { "epoch": 8.97, "eval_gen_len": 500.405325443787, "eval_loss": 2.9137675762176514, "eval_rouge1": 31.7139, "eval_rouge2": 13.7066, "eval_rougeL": 21.8844, "eval_rougeLsum": 30.5075, "eval_runtime": 1780.3113, "eval_samples_per_second": 0.19, "eval_steps_per_second": 0.024, "step": 129 }, { "epoch": 9.04, "learning_rate": 0.0005, "loss": 0.0945, "step": 130 }, { "epoch": 9.18, "learning_rate": 0.0005, "loss": 0.0597, "step": 132 }, { "epoch": 9.32, "learning_rate": 0.0005, "loss": 0.0586, "step": 134 }, { "epoch": 9.46, "learning_rate": 0.0005, "loss": 0.0757, "step": 136 }, { "epoch": 9.6, "learning_rate": 0.0005, "loss": 0.0665, "step": 138 }, { "epoch": 9.74, "learning_rate": 0.0005, "loss": 0.0594, "step": 140 }, { "epoch": 9.88, "learning_rate": 0.0005, "loss": 0.0656, "step": 142 }, { "epoch": 9.95, "eval_gen_len": 488.1686390532544, "eval_loss": 3.1616334915161133, "eval_rouge1": 33.055, "eval_rouge2": 14.5841, "eval_rougeL": 22.5883, "eval_rougeLsum": 31.7565, "eval_runtime": 1782.2406, "eval_samples_per_second": 0.19, "eval_steps_per_second": 0.024, "step": 143 }, { "epoch": 10.02, "learning_rate": 0.0005, "loss": 0.0613, "step": 144 }, { "epoch": 10.16, "learning_rate": 0.0005, "loss": 0.0621, "step": 146 }, { "epoch": 10.3, "learning_rate": 0.0005, "loss": 0.0739, "step": 148 }, { "epoch": 10.43, "learning_rate": 0.0005, "loss": 0.0841, "step": 150 }, { "epoch": 10.57, "learning_rate": 0.0005, "loss": 0.0562, "step": 152 }, { "epoch": 10.71, "learning_rate": 0.0005, "loss": 0.0611, "step": 154 }, { "epoch": 10.85, "learning_rate": 0.0005, "loss": 0.0607, "step": 156 }, { "epoch": 10.99, "learning_rate": 0.0005, "loss": 0.0542, "step": 158 }, { "epoch": 10.99, "eval_gen_len": 198.80769230769232, "eval_loss": 3.3630056381225586, "eval_rouge1": 43.7514, "eval_rouge2": 18.9011, "eval_rougeL": 29.9017, "eval_rougeLsum": 41.6887, "eval_runtime": 1460.3937, "eval_samples_per_second": 0.231, "eval_steps_per_second": 0.029, "step": 158 } ], "logging_steps": 2, "max_steps": 210, "num_train_epochs": 15, "save_steps": 500, "total_flos": 2.764359930466935e+18, "trial_name": null, "trial_params": null }