|
{ |
|
"best_metric": 37.6894, |
|
"best_model_checkpoint": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-2700", |
|
"epoch": 25.209095661265028, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 9.160504201680673e-05, |
|
"loss": 0.0422, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_exact_match": 22.1591, |
|
"eval_loss": 0.09354351460933685, |
|
"eval_runtime": 890.466, |
|
"eval_samples_per_second": 0.602, |
|
"eval_steps_per_second": 0.038, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 8.321008403361344e-05, |
|
"loss": 0.0266, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"eval_exact_match": 28.0303, |
|
"eval_loss": 0.08916985988616943, |
|
"eval_runtime": 876.6244, |
|
"eval_samples_per_second": 0.611, |
|
"eval_steps_per_second": 0.039, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 7.481512605042018e-05, |
|
"loss": 0.0216, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"eval_exact_match": 30.1136, |
|
"eval_loss": 0.08875476568937302, |
|
"eval_runtime": 676.1858, |
|
"eval_samples_per_second": 0.793, |
|
"eval_steps_per_second": 0.05, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 6.642016806722689e-05, |
|
"loss": 0.0187, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"eval_exact_match": 35.0379, |
|
"eval_loss": 0.08860571682453156, |
|
"eval_runtime": 717.1877, |
|
"eval_samples_per_second": 0.747, |
|
"eval_steps_per_second": 0.047, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"learning_rate": 5.802521008403361e-05, |
|
"loss": 0.0165, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.6, |
|
"eval_exact_match": 33.7121, |
|
"eval_loss": 0.09030993282794952, |
|
"eval_runtime": 719.2199, |
|
"eval_samples_per_second": 0.745, |
|
"eval_steps_per_second": 0.047, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 15.13, |
|
"learning_rate": 4.963025210084034e-05, |
|
"loss": 0.0149, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 15.13, |
|
"eval_exact_match": 36.553, |
|
"eval_loss": 0.09047655761241913, |
|
"eval_runtime": 694.4828, |
|
"eval_samples_per_second": 0.772, |
|
"eval_steps_per_second": 0.049, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 17.64, |
|
"learning_rate": 4.123529411764706e-05, |
|
"loss": 0.0136, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 17.64, |
|
"eval_exact_match": 35.7955, |
|
"eval_loss": 0.09109079837799072, |
|
"eval_runtime": 751.6054, |
|
"eval_samples_per_second": 0.713, |
|
"eval_steps_per_second": 0.045, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 20.17, |
|
"learning_rate": 3.284033613445378e-05, |
|
"loss": 0.0126, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 20.17, |
|
"eval_exact_match": 37.5, |
|
"eval_loss": 0.0928933173418045, |
|
"eval_runtime": 702.9176, |
|
"eval_samples_per_second": 0.763, |
|
"eval_steps_per_second": 0.048, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 22.69, |
|
"learning_rate": 2.4445378151260506e-05, |
|
"loss": 0.0118, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 22.69, |
|
"eval_exact_match": 37.6894, |
|
"eval_loss": 0.0941813662648201, |
|
"eval_runtime": 681.194, |
|
"eval_samples_per_second": 0.787, |
|
"eval_steps_per_second": 0.05, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 25.21, |
|
"learning_rate": 1.6050420168067226e-05, |
|
"loss": 0.0112, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 25.21, |
|
"eval_exact_match": 37.6894, |
|
"eval_loss": 0.09540851414203644, |
|
"eval_runtime": 647.9067, |
|
"eval_samples_per_second": 0.827, |
|
"eval_steps_per_second": 0.052, |
|
"step": 3000 |
|
} |
|
], |
|
"max_steps": 3570, |
|
"num_train_epochs": 30, |
|
"total_flos": 3.758975266848768e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|