|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.187604690117253, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.66499162479062e-05, |
|
"loss": 0.0795, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.9863571086644327, |
|
"eval_loss": 0.04342207312583923, |
|
"eval_runtime": 78.4916, |
|
"eval_samples_per_second": 106.457, |
|
"eval_steps_per_second": 2.535, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.32998324958124e-05, |
|
"loss": 0.0434, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.9869554810914313, |
|
"eval_loss": 0.035848040133714676, |
|
"eval_runtime": 78.5171, |
|
"eval_samples_per_second": 106.423, |
|
"eval_steps_per_second": 2.534, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.9949748743718597e-05, |
|
"loss": 0.0375, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_accuracy": 0.9876735280038296, |
|
"eval_loss": 0.03416126221418381, |
|
"eval_runtime": 78.4728, |
|
"eval_samples_per_second": 106.483, |
|
"eval_steps_per_second": 2.536, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.6599664991624795e-05, |
|
"loss": 0.0112, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_accuracy": 0.9885112494016276, |
|
"eval_loss": 0.03930915519595146, |
|
"eval_runtime": 78.5108, |
|
"eval_samples_per_second": 106.431, |
|
"eval_steps_per_second": 2.535, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.324958123953099e-05, |
|
"loss": 0.0089, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_accuracy": 0.9877932024892293, |
|
"eval_loss": 0.04387575387954712, |
|
"eval_runtime": 78.5288, |
|
"eval_samples_per_second": 106.407, |
|
"eval_steps_per_second": 2.534, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.989949748743719e-05, |
|
"loss": 0.0117, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_accuracy": 0.989588319770225, |
|
"eval_loss": 0.04102291911840439, |
|
"eval_runtime": 78.5424, |
|
"eval_samples_per_second": 106.388, |
|
"eval_steps_per_second": 2.534, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.6549413735343385e-05, |
|
"loss": 0.0028, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_accuracy": 0.989588319770225, |
|
"eval_loss": 0.055005114525556564, |
|
"eval_runtime": 78.492, |
|
"eval_samples_per_second": 106.457, |
|
"eval_steps_per_second": 2.535, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.3199329983249583e-05, |
|
"loss": 0.0014, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_accuracy": 0.9889899473432264, |
|
"eval_loss": 0.054427579045295715, |
|
"eval_runtime": 78.5005, |
|
"eval_samples_per_second": 106.445, |
|
"eval_steps_per_second": 2.535, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.984924623115578e-05, |
|
"loss": 0.0011, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"eval_accuracy": 0.989588319770225, |
|
"eval_loss": 0.05314817279577255, |
|
"eval_runtime": 78.5501, |
|
"eval_samples_per_second": 106.378, |
|
"eval_steps_per_second": 2.533, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.6499162479061976e-05, |
|
"loss": 0.0002, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"eval_accuracy": 0.9894686452848253, |
|
"eval_loss": 0.058279525488615036, |
|
"eval_runtime": 78.4971, |
|
"eval_samples_per_second": 106.45, |
|
"eval_steps_per_second": 2.535, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 1.3149078726968176e-05, |
|
"loss": 0.0002, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_accuracy": 0.9897079942556247, |
|
"eval_loss": 0.05424511060118675, |
|
"eval_runtime": 78.4985, |
|
"eval_samples_per_second": 106.448, |
|
"eval_steps_per_second": 2.535, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 9.798994974874372e-06, |
|
"loss": 0.0002, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_accuracy": 0.989588319770225, |
|
"eval_loss": 0.05800911784172058, |
|
"eval_runtime": 78.5022, |
|
"eval_samples_per_second": 106.443, |
|
"eval_steps_per_second": 2.535, |
|
"step": 2400 |
|
} |
|
], |
|
"max_steps": 2985, |
|
"num_train_epochs": 5, |
|
"total_flos": 0.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|