|
{ |
|
"best_metric": 0.87736, |
|
"best_model_checkpoint": "outputs/checkpoint-702", |
|
"epoch": 3.0, |
|
"eval_steps": 78, |
|
"global_step": 1173, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.6949152542372883e-07, |
|
"loss": 0.6918, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.3220338983050848e-05, |
|
"loss": 0.6487, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.81168, |
|
"eval_loss": 0.4401787221431732, |
|
"eval_runtime": 30.2854, |
|
"eval_samples_per_second": 825.48, |
|
"eval_steps_per_second": 12.911, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.9279620853080568e-05, |
|
"loss": 0.402, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.8416, |
|
"eval_loss": 0.356289267539978, |
|
"eval_runtime": 30.4052, |
|
"eval_samples_per_second": 822.228, |
|
"eval_steps_per_second": 12.86, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.780094786729858e-05, |
|
"loss": 0.3528, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.8434, |
|
"eval_loss": 0.3521649241447449, |
|
"eval_runtime": 30.6429, |
|
"eval_samples_per_second": 815.849, |
|
"eval_steps_per_second": 12.76, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.632227488151659e-05, |
|
"loss": 0.3362, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.86516, |
|
"eval_loss": 0.30993545055389404, |
|
"eval_runtime": 30.6544, |
|
"eval_samples_per_second": 815.543, |
|
"eval_steps_per_second": 12.755, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.4843601895734598e-05, |
|
"loss": 0.3184, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.86996, |
|
"eval_loss": 0.30280688405036926, |
|
"eval_runtime": 30.6512, |
|
"eval_samples_per_second": 815.629, |
|
"eval_steps_per_second": 12.756, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.3364928909952607e-05, |
|
"loss": 0.265, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.8738, |
|
"eval_loss": 0.305215448141098, |
|
"eval_runtime": 30.5331, |
|
"eval_samples_per_second": 818.783, |
|
"eval_steps_per_second": 12.806, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.1886255924170618e-05, |
|
"loss": 0.2593, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_accuracy": 0.87348, |
|
"eval_loss": 0.29833072423934937, |
|
"eval_runtime": 30.6351, |
|
"eval_samples_per_second": 816.057, |
|
"eval_steps_per_second": 12.763, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.0407582938388628e-05, |
|
"loss": 0.2537, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.87536, |
|
"eval_loss": 0.2977478802204132, |
|
"eval_runtime": 30.5918, |
|
"eval_samples_per_second": 817.213, |
|
"eval_steps_per_second": 12.781, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 8.928909952606636e-06, |
|
"loss": 0.2558, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_accuracy": 0.87736, |
|
"eval_loss": 0.29114434123039246, |
|
"eval_runtime": 30.9298, |
|
"eval_samples_per_second": 808.283, |
|
"eval_steps_per_second": 12.642, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 7.450236966824646e-06, |
|
"loss": 0.2476, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.87508, |
|
"eval_loss": 0.2907171845436096, |
|
"eval_runtime": 30.4929, |
|
"eval_samples_per_second": 819.862, |
|
"eval_steps_per_second": 12.823, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 5.971563981042654e-06, |
|
"loss": 0.1941, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_accuracy": 0.87736, |
|
"eval_loss": 0.3151108920574188, |
|
"eval_runtime": 30.5003, |
|
"eval_samples_per_second": 819.664, |
|
"eval_steps_per_second": 12.82, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.492890995260664e-06, |
|
"loss": 0.1873, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_accuracy": 0.87644, |
|
"eval_loss": 0.31038883328437805, |
|
"eval_runtime": 30.5889, |
|
"eval_samples_per_second": 817.29, |
|
"eval_steps_per_second": 12.782, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 3.0142180094786734e-06, |
|
"loss": 0.1869, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_accuracy": 0.87708, |
|
"eval_loss": 0.3180868625640869, |
|
"eval_runtime": 30.5304, |
|
"eval_samples_per_second": 818.855, |
|
"eval_steps_per_second": 12.807, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.5355450236966826e-06, |
|
"loss": 0.1807, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_accuracy": 0.87636, |
|
"eval_loss": 0.3148181140422821, |
|
"eval_runtime": 30.541, |
|
"eval_samples_per_second": 818.572, |
|
"eval_steps_per_second": 12.802, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 5.6872037914691944e-08, |
|
"loss": 0.1967, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.8766, |
|
"eval_loss": 0.3140537142753601, |
|
"eval_runtime": 30.4059, |
|
"eval_samples_per_second": 822.21, |
|
"eval_steps_per_second": 12.859, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1173, |
|
"total_flos": 2483763724800000.0, |
|
"train_loss": 0.2856195556334929, |
|
"train_runtime": 750.4873, |
|
"train_samples_per_second": 99.935, |
|
"train_steps_per_second": 1.563 |
|
} |
|
], |
|
"logging_steps": 78, |
|
"max_steps": 1173, |
|
"num_train_epochs": 3, |
|
"save_steps": 78, |
|
"total_flos": 2483763724800000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|