longt5_xl_summ_screen_bp_only_30 / trainer_state.json
learn3r's picture
Upload 14 files
e03e6a3
raw
history blame contribute delete
No virus
12.8 kB
{
"best_metric": 2.237640380859375,
"best_model_checkpoint": "/exports/eddie/scratch/s1970716/models/summarization/longt5_xl_summ_screen_bp_only_30/checkpoint-14",
"epoch": 10.991304347826087,
"eval_steps": 500,
"global_step": 158,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.14,
"learning_rate": 0.0005,
"loss": 0.3076,
"step": 2
},
{
"epoch": 0.28,
"learning_rate": 0.0005,
"loss": 0.3217,
"step": 4
},
{
"epoch": 0.42,
"learning_rate": 0.0005,
"loss": 0.3519,
"step": 6
},
{
"epoch": 0.56,
"learning_rate": 0.0005,
"loss": 0.3449,
"step": 8
},
{
"epoch": 0.7,
"learning_rate": 0.0005,
"loss": 0.3197,
"step": 10
},
{
"epoch": 0.83,
"learning_rate": 0.0005,
"loss": 0.3342,
"step": 12
},
{
"epoch": 0.97,
"learning_rate": 0.0005,
"loss": 0.324,
"step": 14
},
{
"epoch": 0.97,
"eval_gen_len": 246.7396449704142,
"eval_loss": 2.237640380859375,
"eval_rouge1": 40.4388,
"eval_rouge2": 16.4662,
"eval_rougeL": 28.0771,
"eval_rougeLsum": 38.3405,
"eval_runtime": 1709.2671,
"eval_samples_per_second": 0.198,
"eval_steps_per_second": 0.025,
"step": 14
},
{
"epoch": 1.11,
"learning_rate": 0.0005,
"loss": 0.2825,
"step": 16
},
{
"epoch": 1.25,
"learning_rate": 0.0005,
"loss": 0.2835,
"step": 18
},
{
"epoch": 1.39,
"learning_rate": 0.0005,
"loss": 0.2732,
"step": 20
},
{
"epoch": 1.53,
"learning_rate": 0.0005,
"loss": 0.2928,
"step": 22
},
{
"epoch": 1.67,
"learning_rate": 0.0005,
"loss": 0.2912,
"step": 24
},
{
"epoch": 1.81,
"learning_rate": 0.0005,
"loss": 0.2914,
"step": 26
},
{
"epoch": 1.95,
"learning_rate": 0.0005,
"loss": 0.2707,
"step": 28
},
{
"epoch": 1.95,
"eval_gen_len": 307.3786982248521,
"eval_loss": 2.320437431335449,
"eval_rouge1": 40.2873,
"eval_rouge2": 16.7641,
"eval_rougeL": 27.3895,
"eval_rougeLsum": 38.2689,
"eval_runtime": 1775.692,
"eval_samples_per_second": 0.19,
"eval_steps_per_second": 0.024,
"step": 28
},
{
"epoch": 2.09,
"learning_rate": 0.0005,
"loss": 0.2342,
"step": 30
},
{
"epoch": 2.23,
"learning_rate": 0.0005,
"loss": 0.2326,
"step": 32
},
{
"epoch": 2.37,
"learning_rate": 0.0005,
"loss": 0.2334,
"step": 34
},
{
"epoch": 2.5,
"learning_rate": 0.0005,
"loss": 0.2212,
"step": 36
},
{
"epoch": 2.64,
"learning_rate": 0.0005,
"loss": 0.2161,
"step": 38
},
{
"epoch": 2.78,
"learning_rate": 0.0005,
"loss": 0.2236,
"step": 40
},
{
"epoch": 2.92,
"learning_rate": 0.0005,
"loss": 0.2217,
"step": 42
},
{
"epoch": 2.99,
"eval_gen_len": 501.93195266272187,
"eval_loss": 2.5281190872192383,
"eval_rouge1": 31.9916,
"eval_rouge2": 13.8136,
"eval_rougeL": 22.1895,
"eval_rougeLsum": 30.623,
"eval_runtime": 1780.5368,
"eval_samples_per_second": 0.19,
"eval_steps_per_second": 0.024,
"step": 43
},
{
"epoch": 3.06,
"learning_rate": 0.0005,
"loss": 0.2055,
"step": 44
},
{
"epoch": 3.2,
"learning_rate": 0.0005,
"loss": 0.1943,
"step": 46
},
{
"epoch": 3.34,
"learning_rate": 0.0005,
"loss": 0.1875,
"step": 48
},
{
"epoch": 3.48,
"learning_rate": 0.0005,
"loss": 0.1909,
"step": 50
},
{
"epoch": 3.62,
"learning_rate": 0.0005,
"loss": 0.1881,
"step": 52
},
{
"epoch": 3.76,
"learning_rate": 0.0005,
"loss": 0.1541,
"step": 54
},
{
"epoch": 3.9,
"learning_rate": 0.0005,
"loss": 0.1776,
"step": 56
},
{
"epoch": 3.97,
"eval_gen_len": 489.6183431952663,
"eval_loss": 2.7530014514923096,
"eval_rouge1": 31.7535,
"eval_rouge2": 13.8852,
"eval_rougeL": 22.8653,
"eval_rougeLsum": 30.3796,
"eval_runtime": 1780.5033,
"eval_samples_per_second": 0.19,
"eval_steps_per_second": 0.024,
"step": 57
},
{
"epoch": 4.03,
"learning_rate": 0.0005,
"loss": 0.1876,
"step": 58
},
{
"epoch": 4.17,
"learning_rate": 0.0005,
"loss": 0.1589,
"step": 60
},
{
"epoch": 4.31,
"learning_rate": 0.0005,
"loss": 0.1529,
"step": 62
},
{
"epoch": 4.45,
"learning_rate": 0.0005,
"loss": 0.1464,
"step": 64
},
{
"epoch": 4.59,
"learning_rate": 0.0005,
"loss": 0.1689,
"step": 66
},
{
"epoch": 4.73,
"learning_rate": 0.0005,
"loss": 0.1492,
"step": 68
},
{
"epoch": 4.87,
"learning_rate": 0.0005,
"loss": 0.1424,
"step": 70
},
{
"epoch": 4.94,
"eval_gen_len": 502.11242603550295,
"eval_loss": 2.657783031463623,
"eval_rouge1": 32.117,
"eval_rouge2": 14.2141,
"eval_rougeL": 22.3733,
"eval_rougeLsum": 30.8328,
"eval_runtime": 1780.0926,
"eval_samples_per_second": 0.19,
"eval_steps_per_second": 0.024,
"step": 71
},
{
"epoch": 5.01,
"learning_rate": 0.0005,
"loss": 0.1494,
"step": 72
},
{
"epoch": 5.15,
"learning_rate": 0.0005,
"loss": 0.118,
"step": 74
},
{
"epoch": 5.29,
"learning_rate": 0.0005,
"loss": 0.1499,
"step": 76
},
{
"epoch": 5.43,
"learning_rate": 0.0005,
"loss": 0.1369,
"step": 78
},
{
"epoch": 5.57,
"learning_rate": 0.0005,
"loss": 0.1084,
"step": 80
},
{
"epoch": 5.7,
"learning_rate": 0.0005,
"loss": 0.117,
"step": 82
},
{
"epoch": 5.84,
"learning_rate": 0.0005,
"loss": 0.144,
"step": 84
},
{
"epoch": 5.98,
"learning_rate": 0.0005,
"loss": 0.1449,
"step": 86
},
{
"epoch": 5.98,
"eval_gen_len": 357.31360946745565,
"eval_loss": 2.5508346557617188,
"eval_rouge1": 35.3448,
"eval_rouge2": 13.8478,
"eval_rougeL": 24.9044,
"eval_rougeLsum": 33.6108,
"eval_runtime": 1768.966,
"eval_samples_per_second": 0.191,
"eval_steps_per_second": 0.024,
"step": 86
},
{
"epoch": 6.12,
"learning_rate": 0.0005,
"loss": 0.1101,
"step": 88
},
{
"epoch": 6.26,
"learning_rate": 0.0005,
"loss": 0.0985,
"step": 90
},
{
"epoch": 6.4,
"learning_rate": 0.0005,
"loss": 0.1101,
"step": 92
},
{
"epoch": 6.54,
"learning_rate": 0.0005,
"loss": 0.1013,
"step": 94
},
{
"epoch": 6.68,
"learning_rate": 0.0005,
"loss": 0.1057,
"step": 96
},
{
"epoch": 6.82,
"learning_rate": 0.0005,
"loss": 0.1102,
"step": 98
},
{
"epoch": 6.96,
"learning_rate": 0.0005,
"loss": 0.1191,
"step": 100
},
{
"epoch": 6.96,
"eval_gen_len": 408.86686390532543,
"eval_loss": 3.162177324295044,
"eval_rouge1": 37.2189,
"eval_rouge2": 16.0076,
"eval_rougeL": 25.7011,
"eval_rougeLsum": 35.294,
"eval_runtime": 1778.8704,
"eval_samples_per_second": 0.19,
"eval_steps_per_second": 0.024,
"step": 100
},
{
"epoch": 7.1,
"learning_rate": 0.0005,
"loss": 0.1,
"step": 102
},
{
"epoch": 7.23,
"learning_rate": 0.0005,
"loss": 0.0949,
"step": 104
},
{
"epoch": 7.37,
"learning_rate": 0.0005,
"loss": 0.0974,
"step": 106
},
{
"epoch": 7.51,
"learning_rate": 0.0005,
"loss": 0.096,
"step": 108
},
{
"epoch": 7.65,
"learning_rate": 0.0005,
"loss": 0.1023,
"step": 110
},
{
"epoch": 7.79,
"learning_rate": 0.0005,
"loss": 0.0829,
"step": 112
},
{
"epoch": 7.93,
"learning_rate": 0.0005,
"loss": 0.0879,
"step": 114
},
{
"epoch": 8.0,
"eval_gen_len": 318.2278106508876,
"eval_loss": 2.8510310649871826,
"eval_rouge1": 39.8825,
"eval_rouge2": 16.8073,
"eval_rougeL": 27.2428,
"eval_rougeLsum": 37.9568,
"eval_runtime": 1776.4341,
"eval_samples_per_second": 0.19,
"eval_steps_per_second": 0.024,
"step": 115
},
{
"epoch": 8.07,
"learning_rate": 0.0005,
"loss": 0.0957,
"step": 116
},
{
"epoch": 8.21,
"learning_rate": 0.0005,
"loss": 0.0991,
"step": 118
},
{
"epoch": 8.35,
"learning_rate": 0.0005,
"loss": 0.095,
"step": 120
},
{
"epoch": 8.49,
"learning_rate": 0.0005,
"loss": 0.0999,
"step": 122
},
{
"epoch": 8.63,
"learning_rate": 0.0005,
"loss": 0.0787,
"step": 124
},
{
"epoch": 8.77,
"learning_rate": 0.0005,
"loss": 0.0716,
"step": 126
},
{
"epoch": 8.9,
"learning_rate": 0.0005,
"loss": 0.0899,
"step": 128
},
{
"epoch": 8.97,
"eval_gen_len": 500.405325443787,
"eval_loss": 2.9137675762176514,
"eval_rouge1": 31.7139,
"eval_rouge2": 13.7066,
"eval_rougeL": 21.8844,
"eval_rougeLsum": 30.5075,
"eval_runtime": 1780.3113,
"eval_samples_per_second": 0.19,
"eval_steps_per_second": 0.024,
"step": 129
},
{
"epoch": 9.04,
"learning_rate": 0.0005,
"loss": 0.0945,
"step": 130
},
{
"epoch": 9.18,
"learning_rate": 0.0005,
"loss": 0.0597,
"step": 132
},
{
"epoch": 9.32,
"learning_rate": 0.0005,
"loss": 0.0586,
"step": 134
},
{
"epoch": 9.46,
"learning_rate": 0.0005,
"loss": 0.0757,
"step": 136
},
{
"epoch": 9.6,
"learning_rate": 0.0005,
"loss": 0.0665,
"step": 138
},
{
"epoch": 9.74,
"learning_rate": 0.0005,
"loss": 0.0594,
"step": 140
},
{
"epoch": 9.88,
"learning_rate": 0.0005,
"loss": 0.0656,
"step": 142
},
{
"epoch": 9.95,
"eval_gen_len": 488.1686390532544,
"eval_loss": 3.1616334915161133,
"eval_rouge1": 33.055,
"eval_rouge2": 14.5841,
"eval_rougeL": 22.5883,
"eval_rougeLsum": 31.7565,
"eval_runtime": 1782.2406,
"eval_samples_per_second": 0.19,
"eval_steps_per_second": 0.024,
"step": 143
},
{
"epoch": 10.02,
"learning_rate": 0.0005,
"loss": 0.0613,
"step": 144
},
{
"epoch": 10.16,
"learning_rate": 0.0005,
"loss": 0.0621,
"step": 146
},
{
"epoch": 10.3,
"learning_rate": 0.0005,
"loss": 0.0739,
"step": 148
},
{
"epoch": 10.43,
"learning_rate": 0.0005,
"loss": 0.0841,
"step": 150
},
{
"epoch": 10.57,
"learning_rate": 0.0005,
"loss": 0.0562,
"step": 152
},
{
"epoch": 10.71,
"learning_rate": 0.0005,
"loss": 0.0611,
"step": 154
},
{
"epoch": 10.85,
"learning_rate": 0.0005,
"loss": 0.0607,
"step": 156
},
{
"epoch": 10.99,
"learning_rate": 0.0005,
"loss": 0.0542,
"step": 158
},
{
"epoch": 10.99,
"eval_gen_len": 198.80769230769232,
"eval_loss": 3.3630056381225586,
"eval_rouge1": 43.7514,
"eval_rouge2": 18.9011,
"eval_rougeL": 29.9017,
"eval_rougeLsum": 41.6887,
"eval_runtime": 1460.3937,
"eval_samples_per_second": 0.231,
"eval_steps_per_second": 0.029,
"step": 158
}
],
"logging_steps": 2,
"max_steps": 210,
"num_train_epochs": 15,
"save_steps": 500,
"total_flos": 2.764359930466935e+18,
"trial_name": null,
"trial_params": null
}