|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 24.0, |
|
"eval_steps": 500, |
|
"global_step": 12504, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.880038387715931e-05, |
|
"loss": 1.836, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.8971, |
|
"eval_gen_len": 19.974545454545453, |
|
"eval_loss": 1.5560153722763062, |
|
"eval_precision": 0.9105, |
|
"eval_recall": 0.8843, |
|
"eval_rouge1": 0.4155, |
|
"eval_rouge2": 0.2028, |
|
"eval_rougeL": 0.3561, |
|
"eval_rougeLsum": 0.3559, |
|
"eval_runtime": 315.2437, |
|
"eval_samples_per_second": 8.723, |
|
"eval_steps_per_second": 0.546, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.760076775431862e-05, |
|
"loss": 1.5951, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.8997, |
|
"eval_gen_len": 19.93527272727273, |
|
"eval_loss": 1.5003960132598877, |
|
"eval_precision": 0.9115, |
|
"eval_recall": 0.8886, |
|
"eval_rouge1": 0.4333, |
|
"eval_rouge2": 0.2136, |
|
"eval_rougeL": 0.3695, |
|
"eval_rougeLsum": 0.3694, |
|
"eval_runtime": 311.8452, |
|
"eval_samples_per_second": 8.818, |
|
"eval_steps_per_second": 0.552, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.6401151631477927e-05, |
|
"loss": 1.469, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.9001, |
|
"eval_gen_len": 19.938545454545455, |
|
"eval_loss": 1.4690784215927124, |
|
"eval_precision": 0.912, |
|
"eval_recall": 0.8888, |
|
"eval_rouge1": 0.4355, |
|
"eval_rouge2": 0.2176, |
|
"eval_rougeL": 0.3729, |
|
"eval_rougeLsum": 0.3728, |
|
"eval_runtime": 312.4642, |
|
"eval_samples_per_second": 8.801, |
|
"eval_steps_per_second": 0.55, |
|
"step": 1563 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 1.5201535508637238e-05, |
|
"loss": 1.373, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.9003, |
|
"eval_gen_len": 19.964727272727274, |
|
"eval_loss": 1.4657667875289917, |
|
"eval_precision": 0.9137, |
|
"eval_recall": 0.8877, |
|
"eval_rouge1": 0.4311, |
|
"eval_rouge2": 0.2164, |
|
"eval_rougeL": 0.3706, |
|
"eval_rougeLsum": 0.3704, |
|
"eval_runtime": 313.2326, |
|
"eval_samples_per_second": 8.779, |
|
"eval_steps_per_second": 0.549, |
|
"step": 2084 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 1.4001919385796546e-05, |
|
"loss": 1.2902, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.9008, |
|
"eval_gen_len": 19.94981818181818, |
|
"eval_loss": 1.4541645050048828, |
|
"eval_precision": 0.9136, |
|
"eval_recall": 0.8887, |
|
"eval_rouge1": 0.4368, |
|
"eval_rouge2": 0.2218, |
|
"eval_rougeL": 0.3762, |
|
"eval_rougeLsum": 0.376, |
|
"eval_runtime": 313.1455, |
|
"eval_samples_per_second": 8.782, |
|
"eval_steps_per_second": 0.549, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 1.2802303262955855e-05, |
|
"loss": 1.222, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_f1": 0.9018, |
|
"eval_gen_len": 19.942545454545453, |
|
"eval_loss": 1.458353042602539, |
|
"eval_precision": 0.914, |
|
"eval_recall": 0.8902, |
|
"eval_rouge1": 0.4407, |
|
"eval_rouge2": 0.223, |
|
"eval_rougeL": 0.3802, |
|
"eval_rougeLsum": 0.3798, |
|
"eval_runtime": 312.4439, |
|
"eval_samples_per_second": 8.802, |
|
"eval_steps_per_second": 0.55, |
|
"step": 3126 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 1.1602687140115163e-05, |
|
"loss": 1.1655, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_f1": 0.9019, |
|
"eval_gen_len": 19.932727272727274, |
|
"eval_loss": 1.4708688259124756, |
|
"eval_precision": 0.9145, |
|
"eval_recall": 0.89, |
|
"eval_rouge1": 0.4404, |
|
"eval_rouge2": 0.2246, |
|
"eval_rougeL": 0.3806, |
|
"eval_rougeLsum": 0.3803, |
|
"eval_runtime": 313.9664, |
|
"eval_samples_per_second": 8.759, |
|
"eval_steps_per_second": 0.548, |
|
"step": 3647 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 1.0403071017274472e-05, |
|
"loss": 1.11, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_f1": 0.9026, |
|
"eval_gen_len": 19.908363636363635, |
|
"eval_loss": 1.47238028049469, |
|
"eval_precision": 0.9153, |
|
"eval_recall": 0.8906, |
|
"eval_rouge1": 0.4435, |
|
"eval_rouge2": 0.2269, |
|
"eval_rougeL": 0.383, |
|
"eval_rougeLsum": 0.3828, |
|
"eval_runtime": 312.3634, |
|
"eval_samples_per_second": 8.804, |
|
"eval_steps_per_second": 0.551, |
|
"step": 4168 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 9.203454894433782e-06, |
|
"loss": 1.0629, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_f1": 0.9028, |
|
"eval_gen_len": 19.928, |
|
"eval_loss": 1.485286831855774, |
|
"eval_precision": 0.9155, |
|
"eval_recall": 0.8908, |
|
"eval_rouge1": 0.4431, |
|
"eval_rouge2": 0.2273, |
|
"eval_rougeL": 0.3832, |
|
"eval_rougeLsum": 0.383, |
|
"eval_runtime": 312.2978, |
|
"eval_samples_per_second": 8.806, |
|
"eval_steps_per_second": 0.551, |
|
"step": 4689 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 8.003838771593091e-06, |
|
"loss": 1.023, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_f1": 0.9021, |
|
"eval_gen_len": 19.944, |
|
"eval_loss": 1.503290057182312, |
|
"eval_precision": 0.9152, |
|
"eval_recall": 0.8897, |
|
"eval_rouge1": 0.4409, |
|
"eval_rouge2": 0.2247, |
|
"eval_rougeL": 0.3819, |
|
"eval_rougeLsum": 0.3818, |
|
"eval_runtime": 312.2524, |
|
"eval_samples_per_second": 8.807, |
|
"eval_steps_per_second": 0.551, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 10.56, |
|
"learning_rate": 6.8042226487524e-06, |
|
"loss": 0.9862, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_f1": 0.9034, |
|
"eval_gen_len": 19.912363636363636, |
|
"eval_loss": 1.5074084997177124, |
|
"eval_precision": 0.9158, |
|
"eval_recall": 0.8916, |
|
"eval_rouge1": 0.4479, |
|
"eval_rouge2": 0.2278, |
|
"eval_rougeL": 0.3862, |
|
"eval_rougeLsum": 0.386, |
|
"eval_runtime": 313.5934, |
|
"eval_samples_per_second": 8.769, |
|
"eval_steps_per_second": 0.548, |
|
"step": 5731 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 5.6046065259117085e-06, |
|
"loss": 0.957, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_f1": 0.903, |
|
"eval_gen_len": 19.90327272727273, |
|
"eval_loss": 1.518417239189148, |
|
"eval_precision": 0.9159, |
|
"eval_recall": 0.8909, |
|
"eval_rouge1": 0.4461, |
|
"eval_rouge2": 0.2264, |
|
"eval_rougeL": 0.3846, |
|
"eval_rougeLsum": 0.3847, |
|
"eval_runtime": 314.9612, |
|
"eval_samples_per_second": 8.731, |
|
"eval_steps_per_second": 0.546, |
|
"step": 6252 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 4.404990403071018e-06, |
|
"loss": 0.9315, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_f1": 0.9031, |
|
"eval_gen_len": 19.908363636363635, |
|
"eval_loss": 1.5269190073013306, |
|
"eval_precision": 0.9156, |
|
"eval_recall": 0.8912, |
|
"eval_rouge1": 0.4473, |
|
"eval_rouge2": 0.2284, |
|
"eval_rougeL": 0.386, |
|
"eval_rougeLsum": 0.3858, |
|
"eval_runtime": 311.2352, |
|
"eval_samples_per_second": 8.836, |
|
"eval_steps_per_second": 0.553, |
|
"step": 6773 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 3.2053742802303266e-06, |
|
"loss": 0.9093, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_f1": 0.9029, |
|
"eval_gen_len": 19.913454545454545, |
|
"eval_loss": 1.5310986042022705, |
|
"eval_precision": 0.9155, |
|
"eval_recall": 0.8909, |
|
"eval_rouge1": 0.4453, |
|
"eval_rouge2": 0.2273, |
|
"eval_rougeL": 0.3846, |
|
"eval_rougeLsum": 0.3843, |
|
"eval_runtime": 313.2169, |
|
"eval_samples_per_second": 8.78, |
|
"eval_steps_per_second": 0.549, |
|
"step": 7294 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 2.0057581573896352e-06, |
|
"loss": 0.8927, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_f1": 0.9029, |
|
"eval_gen_len": 19.906545454545455, |
|
"eval_loss": 1.5351076126098633, |
|
"eval_precision": 0.9156, |
|
"eval_recall": 0.8909, |
|
"eval_rouge1": 0.4457, |
|
"eval_rouge2": 0.2267, |
|
"eval_rougeL": 0.3842, |
|
"eval_rougeLsum": 0.384, |
|
"eval_runtime": 314.8443, |
|
"eval_samples_per_second": 8.734, |
|
"eval_steps_per_second": 0.546, |
|
"step": 7815 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"learning_rate": 8.061420345489445e-07, |
|
"loss": 0.8773, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_f1": 0.9025, |
|
"eval_gen_len": 19.942545454545453, |
|
"eval_loss": 1.5439822673797607, |
|
"eval_precision": 0.9151, |
|
"eval_recall": 0.8905, |
|
"eval_rouge1": 0.4427, |
|
"eval_rouge2": 0.225, |
|
"eval_rougeL": 0.382, |
|
"eval_rougeLsum": 0.382, |
|
"eval_runtime": 314.8749, |
|
"eval_samples_per_second": 8.734, |
|
"eval_steps_per_second": 0.546, |
|
"step": 8336 |
|
}, |
|
{ |
|
"epoch": 16.31, |
|
"learning_rate": 6.404350607805503e-06, |
|
"loss": 0.8806, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_f1": 0.9036, |
|
"eval_gen_len": 19.88509090909091, |
|
"eval_loss": 1.5509530305862427, |
|
"eval_precision": 0.9159, |
|
"eval_recall": 0.8919, |
|
"eval_rouge1": 0.4495, |
|
"eval_rouge2": 0.2279, |
|
"eval_rougeL": 0.3868, |
|
"eval_rougeLsum": 0.3869, |
|
"eval_runtime": 312.7951, |
|
"eval_samples_per_second": 8.792, |
|
"eval_steps_per_second": 0.55, |
|
"step": 8857 |
|
}, |
|
{ |
|
"epoch": 17.27, |
|
"learning_rate": 5.6046065259117085e-06, |
|
"loss": 0.8683, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_f1": 0.9038, |
|
"eval_gen_len": 19.88290909090909, |
|
"eval_loss": 1.56792151927948, |
|
"eval_precision": 0.9161, |
|
"eval_recall": 0.8921, |
|
"eval_rouge1": 0.4473, |
|
"eval_rouge2": 0.2282, |
|
"eval_rougeL": 0.3856, |
|
"eval_rougeLsum": 0.3857, |
|
"eval_runtime": 314.8371, |
|
"eval_samples_per_second": 8.735, |
|
"eval_steps_per_second": 0.546, |
|
"step": 9378 |
|
}, |
|
{ |
|
"epoch": 18.23, |
|
"learning_rate": 4.804862444017915e-06, |
|
"loss": 0.8413, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_f1": 0.9035, |
|
"eval_gen_len": 19.913454545454545, |
|
"eval_loss": 1.574545979499817, |
|
"eval_precision": 0.9159, |
|
"eval_recall": 0.8918, |
|
"eval_rouge1": 0.4492, |
|
"eval_rouge2": 0.2282, |
|
"eval_rougeL": 0.3861, |
|
"eval_rougeLsum": 0.3864, |
|
"eval_runtime": 311.5846, |
|
"eval_samples_per_second": 8.826, |
|
"eval_steps_per_second": 0.552, |
|
"step": 9899 |
|
}, |
|
{ |
|
"epoch": 19.19, |
|
"learning_rate": 4.005118362124121e-06, |
|
"loss": 0.8257, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_f1": 0.9031, |
|
"eval_gen_len": 19.899636363636365, |
|
"eval_loss": 1.583512544631958, |
|
"eval_precision": 0.9153, |
|
"eval_recall": 0.8915, |
|
"eval_rouge1": 0.4471, |
|
"eval_rouge2": 0.2266, |
|
"eval_rougeL": 0.3852, |
|
"eval_rougeLsum": 0.3853, |
|
"eval_runtime": 311.7771, |
|
"eval_samples_per_second": 8.82, |
|
"eval_steps_per_second": 0.552, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 20.15, |
|
"learning_rate": 3.2053742802303266e-06, |
|
"loss": 0.8097, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_f1": 0.9034, |
|
"eval_gen_len": 19.907272727272726, |
|
"eval_loss": 1.59569251537323, |
|
"eval_precision": 0.9156, |
|
"eval_recall": 0.8919, |
|
"eval_rouge1": 0.4472, |
|
"eval_rouge2": 0.2271, |
|
"eval_rougeL": 0.3856, |
|
"eval_rougeLsum": 0.3856, |
|
"eval_runtime": 309.5923, |
|
"eval_samples_per_second": 8.883, |
|
"eval_steps_per_second": 0.556, |
|
"step": 10941 |
|
}, |
|
{ |
|
"epoch": 21.11, |
|
"learning_rate": 2.4056301983365325e-06, |
|
"loss": 0.7926, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_f1": 0.9034, |
|
"eval_gen_len": 19.892, |
|
"eval_loss": 1.595582127571106, |
|
"eval_precision": 0.9159, |
|
"eval_recall": 0.8916, |
|
"eval_rouge1": 0.4479, |
|
"eval_rouge2": 0.2282, |
|
"eval_rougeL": 0.3855, |
|
"eval_rougeLsum": 0.3857, |
|
"eval_runtime": 311.5772, |
|
"eval_samples_per_second": 8.826, |
|
"eval_steps_per_second": 0.552, |
|
"step": 11462 |
|
}, |
|
{ |
|
"epoch": 22.07, |
|
"learning_rate": 1.6058861164427384e-06, |
|
"loss": 0.7841, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_f1": 0.9028, |
|
"eval_gen_len": 19.912, |
|
"eval_loss": 1.5990447998046875, |
|
"eval_precision": 0.9155, |
|
"eval_recall": 0.8908, |
|
"eval_rouge1": 0.4444, |
|
"eval_rouge2": 0.2261, |
|
"eval_rougeL": 0.3833, |
|
"eval_rougeLsum": 0.3834, |
|
"eval_runtime": 311.6057, |
|
"eval_samples_per_second": 8.825, |
|
"eval_steps_per_second": 0.552, |
|
"step": 11983 |
|
}, |
|
{ |
|
"epoch": 23.03, |
|
"learning_rate": 8.061420345489445e-07, |
|
"loss": 0.7734, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 23.99, |
|
"learning_rate": 6.397952655150352e-09, |
|
"loss": 0.7669, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_f1": 0.9034, |
|
"eval_gen_len": 19.90290909090909, |
|
"eval_loss": 1.6053136587142944, |
|
"eval_precision": 0.9159, |
|
"eval_recall": 0.8916, |
|
"eval_rouge1": 0.4481, |
|
"eval_rouge2": 0.2283, |
|
"eval_rougeL": 0.3861, |
|
"eval_rougeLsum": 0.3863, |
|
"eval_runtime": 314.5795, |
|
"eval_samples_per_second": 8.742, |
|
"eval_steps_per_second": 0.547, |
|
"step": 12504 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"step": 12504, |
|
"total_flos": 2.569106349028344e+18, |
|
"train_loss": 0.22391605226564926, |
|
"train_runtime": 17118.5241, |
|
"train_samples_per_second": 70.1, |
|
"train_steps_per_second": 0.73 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 12504, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 24, |
|
"save_steps": 500, |
|
"total_flos": 2.569106349028344e+18, |
|
"train_batch_size": 24, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|