|
{ |
|
"best_metric": 76.09384754933019, |
|
"best_model_checkpoint": "/root/turkic_qa/ru_uzn_models/ru_uzn_xlm_roberta_base_squad_model/checkpoint-5022", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 5580, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 558, |
|
"train_exact_match": 54.345654345654346, |
|
"train_f1": 72.79270519378026, |
|
"train_runtime": 11.7013, |
|
"train_samples_per_second": 90.247, |
|
"train_steps_per_second": 3.248 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 80.72039031982422, |
|
"learning_rate": 5e-06, |
|
"loss": 1.7207, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_exact_match": 52.21875, |
|
"eval_f1": 71.55991959566774, |
|
"eval_runtime": 37.0088, |
|
"eval_samples_per_second": 90.762, |
|
"eval_steps_per_second": 3.242, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1116, |
|
"train_exact_match": 59.54045954045954, |
|
"train_f1": 78.95661212870544, |
|
"train_runtime": 11.6455, |
|
"train_samples_per_second": 89.39, |
|
"train_steps_per_second": 3.263 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 73.76553344726562, |
|
"learning_rate": 1e-05, |
|
"loss": 1.352, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_exact_match": 55.46875, |
|
"eval_f1": 74.01581668435861, |
|
"eval_runtime": 37.1575, |
|
"eval_samples_per_second": 90.399, |
|
"eval_steps_per_second": 3.229, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1674, |
|
"train_exact_match": 66.13386613386614, |
|
"train_f1": 83.54611322142382, |
|
"train_runtime": 11.6678, |
|
"train_samples_per_second": 90.163, |
|
"train_steps_per_second": 3.257 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 73.22183227539062, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 1.1278, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_exact_match": 57.40625, |
|
"eval_f1": 75.65492929099143, |
|
"eval_runtime": 37.0792, |
|
"eval_samples_per_second": 90.59, |
|
"eval_steps_per_second": 3.236, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 2232, |
|
"train_exact_match": 67.23276723276723, |
|
"train_f1": 85.47766787697041, |
|
"train_runtime": 11.6568, |
|
"train_samples_per_second": 89.733, |
|
"train_steps_per_second": 3.26 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 12.79330825805664, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.9505, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_exact_match": 57.9375, |
|
"eval_f1": 75.99186608404226, |
|
"eval_runtime": 37.0588, |
|
"eval_samples_per_second": 90.64, |
|
"eval_steps_per_second": 3.238, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 2790, |
|
"train_exact_match": 73.62637362637362, |
|
"train_f1": 88.41775048247749, |
|
"train_runtime": 11.6282, |
|
"train_samples_per_second": 89.61, |
|
"train_steps_per_second": 3.268 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 57.348087310791016, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.8175, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_exact_match": 58.34375, |
|
"eval_f1": 76.03000703182897, |
|
"eval_runtime": 37.1357, |
|
"eval_samples_per_second": 90.452, |
|
"eval_steps_per_second": 3.231, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 3348, |
|
"train_exact_match": 78.62137862137862, |
|
"train_f1": 90.6016744099524, |
|
"train_runtime": 11.6628, |
|
"train_samples_per_second": 89.515, |
|
"train_steps_per_second": 3.258 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 90.37775421142578, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7139, |
|
"step": 3348 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_exact_match": 58.8125, |
|
"eval_f1": 75.74834491209981, |
|
"eval_runtime": 37.1026, |
|
"eval_samples_per_second": 90.533, |
|
"eval_steps_per_second": 3.234, |
|
"step": 3348 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 3906, |
|
"train_exact_match": 78.82117882117882, |
|
"train_f1": 92.66952734441121, |
|
"train_runtime": 11.5936, |
|
"train_samples_per_second": 89.705, |
|
"train_steps_per_second": 3.278 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 47.05348587036133, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.6345, |
|
"step": 3906 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_exact_match": 58.90625, |
|
"eval_f1": 76.05687183756119, |
|
"eval_runtime": 37.0467, |
|
"eval_samples_per_second": 90.669, |
|
"eval_steps_per_second": 3.239, |
|
"step": 3906 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 4464, |
|
"train_exact_match": 80.21978021978022, |
|
"train_f1": 92.08409867123048, |
|
"train_runtime": 11.6489, |
|
"train_samples_per_second": 89.708, |
|
"train_steps_per_second": 3.262 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 45.41628646850586, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.5799, |
|
"step": 4464 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_exact_match": 58.9375, |
|
"eval_f1": 75.91207274041706, |
|
"eval_runtime": 37.0443, |
|
"eval_samples_per_second": 90.675, |
|
"eval_steps_per_second": 3.239, |
|
"step": 4464 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"step": 5022, |
|
"train_exact_match": 83.11688311688312, |
|
"train_f1": 93.78525979268544, |
|
"train_runtime": 11.634, |
|
"train_samples_per_second": 89.737, |
|
"train_steps_per_second": 3.266 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 66.75804901123047, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.5267, |
|
"step": 5022 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_exact_match": 58.96875, |
|
"eval_f1": 76.09384754933019, |
|
"eval_runtime": 37.1002, |
|
"eval_samples_per_second": 90.538, |
|
"eval_steps_per_second": 3.234, |
|
"step": 5022 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 5580, |
|
"train_exact_match": 81.71828171828172, |
|
"train_f1": 93.01859252339878, |
|
"train_runtime": 11.6538, |
|
"train_samples_per_second": 89.842, |
|
"train_steps_per_second": 3.261 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 77.05963897705078, |
|
"learning_rate": 0.0, |
|
"loss": 0.4995, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_exact_match": 58.71875, |
|
"eval_f1": 76.0502313865696, |
|
"eval_runtime": 37.0653, |
|
"eval_samples_per_second": 90.624, |
|
"eval_steps_per_second": 3.238, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 5580, |
|
"total_flos": 3.056976081243648e+16, |
|
"train_loss": 0.8922852929775006, |
|
"train_runtime": 3581.679, |
|
"train_samples_per_second": 43.552, |
|
"train_steps_per_second": 1.558 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5580, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 3.056976081243648e+16, |
|
"train_batch_size": 28, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|