|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 16.0, |
|
"eval_steps": 500, |
|
"global_step": 8336, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.9360204734484968e-05, |
|
"loss": 2.0443, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.9049, |
|
"eval_gen_len": 28.363272727272726, |
|
"eval_loss": 1.7046316862106323, |
|
"eval_precision": 0.9041, |
|
"eval_recall": 0.9061, |
|
"eval_rouge1": 0.4488, |
|
"eval_rouge2": 0.203, |
|
"eval_rougeL": 0.3633, |
|
"eval_rougeLsum": 0.3633, |
|
"eval_runtime": 577.3748, |
|
"eval_samples_per_second": 4.763, |
|
"eval_steps_per_second": 0.298, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.872040946896993e-05, |
|
"loss": 1.7826, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.9072, |
|
"eval_gen_len": 28.19490909090909, |
|
"eval_loss": 1.6347475051879883, |
|
"eval_precision": 0.9062, |
|
"eval_recall": 0.9085, |
|
"eval_rouge1": 0.4616, |
|
"eval_rouge2": 0.2133, |
|
"eval_rougeL": 0.3761, |
|
"eval_rougeLsum": 0.3758, |
|
"eval_runtime": 524.6485, |
|
"eval_samples_per_second": 5.242, |
|
"eval_steps_per_second": 0.328, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.8080614203454897e-05, |
|
"loss": 1.7134, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.9084, |
|
"eval_gen_len": 28.521818181818183, |
|
"eval_loss": 1.5991039276123047, |
|
"eval_precision": 0.9072, |
|
"eval_recall": 0.91, |
|
"eval_rouge1": 0.4683, |
|
"eval_rouge2": 0.2186, |
|
"eval_rougeL": 0.3824, |
|
"eval_rougeLsum": 0.3822, |
|
"eval_runtime": 539.0316, |
|
"eval_samples_per_second": 5.102, |
|
"eval_steps_per_second": 0.319, |
|
"step": 1563 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 1.744081893793986e-05, |
|
"loss": 1.6664, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_f1": 0.9096, |
|
"eval_gen_len": 28.24981818181818, |
|
"eval_loss": 1.5767467021942139, |
|
"eval_precision": 0.9087, |
|
"eval_recall": 0.9109, |
|
"eval_rouge1": 0.4738, |
|
"eval_rouge2": 0.2233, |
|
"eval_rougeL": 0.3878, |
|
"eval_rougeLsum": 0.3876, |
|
"eval_runtime": 529.9968, |
|
"eval_samples_per_second": 5.189, |
|
"eval_steps_per_second": 0.325, |
|
"step": 2084 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 1.6801023672424827e-05, |
|
"loss": 1.6296, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.9103, |
|
"eval_gen_len": 28.239636363636365, |
|
"eval_loss": 1.5595422983169556, |
|
"eval_precision": 0.9093, |
|
"eval_recall": 0.9117, |
|
"eval_rouge1": 0.4775, |
|
"eval_rouge2": 0.2265, |
|
"eval_rougeL": 0.3911, |
|
"eval_rougeLsum": 0.391, |
|
"eval_runtime": 526.5193, |
|
"eval_samples_per_second": 5.223, |
|
"eval_steps_per_second": 0.327, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 1.616122840690979e-05, |
|
"loss": 1.5984, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_f1": 0.9109, |
|
"eval_gen_len": 28.28, |
|
"eval_loss": 1.5468252897262573, |
|
"eval_precision": 0.9098, |
|
"eval_recall": 0.9124, |
|
"eval_rouge1": 0.4805, |
|
"eval_rouge2": 0.2284, |
|
"eval_rougeL": 0.3941, |
|
"eval_rougeLsum": 0.3938, |
|
"eval_runtime": 512.3397, |
|
"eval_samples_per_second": 5.368, |
|
"eval_steps_per_second": 0.336, |
|
"step": 3126 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 1.5521433141394756e-05, |
|
"loss": 1.5738, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_f1": 0.9113, |
|
"eval_gen_len": 27.837818181818182, |
|
"eval_loss": 1.5370196104049683, |
|
"eval_precision": 0.9105, |
|
"eval_recall": 0.9124, |
|
"eval_rouge1": 0.4807, |
|
"eval_rouge2": 0.2296, |
|
"eval_rougeL": 0.3945, |
|
"eval_rougeLsum": 0.3946, |
|
"eval_runtime": 509.6023, |
|
"eval_samples_per_second": 5.396, |
|
"eval_steps_per_second": 0.338, |
|
"step": 3647 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 1.0403071017274472e-05, |
|
"loss": 1.5476, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_f1": 0.9114, |
|
"eval_gen_len": 27.736363636363638, |
|
"eval_loss": 1.530755639076233, |
|
"eval_precision": 0.9108, |
|
"eval_recall": 0.9125, |
|
"eval_rouge1": 0.4823, |
|
"eval_rouge2": 0.2315, |
|
"eval_rougeL": 0.3963, |
|
"eval_rougeLsum": 0.3965, |
|
"eval_runtime": 510.2185, |
|
"eval_samples_per_second": 5.39, |
|
"eval_steps_per_second": 0.337, |
|
"step": 4168 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 9.203454894433782e-06, |
|
"loss": 1.535, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_f1": 0.9116, |
|
"eval_gen_len": 27.653454545454544, |
|
"eval_loss": 1.5260871648788452, |
|
"eval_precision": 0.911, |
|
"eval_recall": 0.9125, |
|
"eval_rouge1": 0.4829, |
|
"eval_rouge2": 0.2309, |
|
"eval_rougeL": 0.3974, |
|
"eval_rougeLsum": 0.3974, |
|
"eval_runtime": 503.2649, |
|
"eval_samples_per_second": 5.464, |
|
"eval_steps_per_second": 0.342, |
|
"step": 4689 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 8.003838771593091e-06, |
|
"loss": 1.52, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_f1": 0.9117, |
|
"eval_gen_len": 27.816, |
|
"eval_loss": 1.52312433719635, |
|
"eval_precision": 0.911, |
|
"eval_recall": 0.9128, |
|
"eval_rouge1": 0.4847, |
|
"eval_rouge2": 0.2332, |
|
"eval_rougeL": 0.3992, |
|
"eval_rougeLsum": 0.3993, |
|
"eval_runtime": 522.9989, |
|
"eval_samples_per_second": 5.258, |
|
"eval_steps_per_second": 0.329, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 10.56, |
|
"learning_rate": 6.8042226487524e-06, |
|
"loss": 1.5145, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_f1": 0.9121, |
|
"eval_gen_len": 27.360363636363637, |
|
"eval_loss": 1.519996166229248, |
|
"eval_precision": 0.9119, |
|
"eval_recall": 0.9127, |
|
"eval_rouge1": 0.4851, |
|
"eval_rouge2": 0.2339, |
|
"eval_rougeL": 0.4004, |
|
"eval_rougeLsum": 0.4006, |
|
"eval_runtime": 501.564, |
|
"eval_samples_per_second": 5.483, |
|
"eval_steps_per_second": 0.343, |
|
"step": 5731 |
|
}, |
|
{ |
|
"epoch": 11.52, |
|
"learning_rate": 5.6046065259117085e-06, |
|
"loss": 1.5028, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_f1": 0.9122, |
|
"eval_gen_len": 27.462545454545456, |
|
"eval_loss": 1.5178437232971191, |
|
"eval_precision": 0.9118, |
|
"eval_recall": 0.9129, |
|
"eval_rouge1": 0.4858, |
|
"eval_rouge2": 0.2345, |
|
"eval_rougeL": 0.4001, |
|
"eval_rougeLsum": 0.4002, |
|
"eval_runtime": 501.8356, |
|
"eval_samples_per_second": 5.48, |
|
"eval_steps_per_second": 0.343, |
|
"step": 6252 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 4.404990403071018e-06, |
|
"loss": 1.4946, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_f1": 0.9121, |
|
"eval_gen_len": 27.67890909090909, |
|
"eval_loss": 1.5164216756820679, |
|
"eval_precision": 0.9115, |
|
"eval_recall": 0.9131, |
|
"eval_rouge1": 0.4859, |
|
"eval_rouge2": 0.2341, |
|
"eval_rougeL": 0.4004, |
|
"eval_rougeLsum": 0.4005, |
|
"eval_runtime": 506.9944, |
|
"eval_samples_per_second": 5.424, |
|
"eval_steps_per_second": 0.339, |
|
"step": 6773 |
|
}, |
|
{ |
|
"epoch": 13.44, |
|
"learning_rate": 3.2053742802303266e-06, |
|
"loss": 1.4877, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_f1": 0.9123, |
|
"eval_gen_len": 27.580363636363636, |
|
"eval_loss": 1.515085220336914, |
|
"eval_precision": 0.9119, |
|
"eval_recall": 0.9131, |
|
"eval_rouge1": 0.4868, |
|
"eval_rouge2": 0.235, |
|
"eval_rougeL": 0.4013, |
|
"eval_rougeLsum": 0.4013, |
|
"eval_runtime": 510.129, |
|
"eval_samples_per_second": 5.391, |
|
"eval_steps_per_second": 0.337, |
|
"step": 7294 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 2.0057581573896352e-06, |
|
"loss": 1.4855, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_f1": 0.9122, |
|
"eval_gen_len": 27.584363636363637, |
|
"eval_loss": 1.5146222114562988, |
|
"eval_precision": 0.9117, |
|
"eval_recall": 0.9131, |
|
"eval_rouge1": 0.4863, |
|
"eval_rouge2": 0.2349, |
|
"eval_rougeL": 0.4014, |
|
"eval_rougeLsum": 0.4016, |
|
"eval_runtime": 507.3504, |
|
"eval_samples_per_second": 5.42, |
|
"eval_steps_per_second": 0.339, |
|
"step": 7815 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"learning_rate": 8.061420345489445e-07, |
|
"loss": 1.4782, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_f1": 0.9122, |
|
"eval_gen_len": 27.571636363636365, |
|
"eval_loss": 1.514625906944275, |
|
"eval_precision": 0.9118, |
|
"eval_recall": 0.9131, |
|
"eval_rouge1": 0.4863, |
|
"eval_rouge2": 0.2348, |
|
"eval_rougeL": 0.4011, |
|
"eval_rougeLsum": 0.4012, |
|
"eval_runtime": 505.7467, |
|
"eval_samples_per_second": 5.438, |
|
"eval_steps_per_second": 0.34, |
|
"step": 8336 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"step": 8336, |
|
"total_flos": 1.1557816346520453e+18, |
|
"train_loss": 0.8733468595713434, |
|
"train_runtime": 22758.1802, |
|
"train_samples_per_second": 35.152, |
|
"train_steps_per_second": 0.366 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 8336, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 16, |
|
"save_steps": 500, |
|
"total_flos": 1.1557816346520453e+18, |
|
"train_batch_size": 24, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|