|
{ |
|
"best_metric": 0.7441860465116279, |
|
"best_model_checkpoint": "vit-base-patch16-224/checkpoint-6", |
|
"epoch": 9.6, |
|
"eval_steps": 500, |
|
"global_step": 60, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.7441860465116279, |
|
"eval_loss": 0.5859283208847046, |
|
"eval_runtime": 0.2566, |
|
"eval_samples_per_second": 167.599, |
|
"eval_steps_per_second": 42.874, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 3.7175180912017822, |
|
"learning_rate": 4.62962962962963e-06, |
|
"loss": 0.605, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_accuracy": 0.7441860465116279, |
|
"eval_loss": 0.5841977000236511, |
|
"eval_runtime": 0.2709, |
|
"eval_samples_per_second": 158.721, |
|
"eval_steps_per_second": 40.603, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_accuracy": 0.7441860465116279, |
|
"eval_loss": 0.5918598771095276, |
|
"eval_runtime": 0.2627, |
|
"eval_samples_per_second": 163.663, |
|
"eval_steps_per_second": 41.867, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 2.415996551513672, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.5428, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7441860465116279, |
|
"eval_loss": 0.5884882807731628, |
|
"eval_runtime": 0.9618, |
|
"eval_samples_per_second": 44.706, |
|
"eval_steps_per_second": 11.436, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 2.7047529220581055, |
|
"learning_rate": 2.7777777777777783e-06, |
|
"loss": 0.5584, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_accuracy": 0.7441860465116279, |
|
"eval_loss": 0.588610827922821, |
|
"eval_runtime": 0.2732, |
|
"eval_samples_per_second": 157.373, |
|
"eval_steps_per_second": 40.258, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"eval_accuracy": 0.7441860465116279, |
|
"eval_loss": 0.5914657711982727, |
|
"eval_runtime": 0.2503, |
|
"eval_samples_per_second": 171.764, |
|
"eval_steps_per_second": 43.94, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"grad_norm": 3.9282095432281494, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.5593, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"eval_accuracy": 0.7441860465116279, |
|
"eval_loss": 0.5934743881225586, |
|
"eval_runtime": 0.2654, |
|
"eval_samples_per_second": 162.014, |
|
"eval_steps_per_second": 41.446, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.82995343208313, |
|
"learning_rate": 9.259259259259259e-07, |
|
"loss": 0.5097, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7441860465116279, |
|
"eval_loss": 0.5947032570838928, |
|
"eval_runtime": 0.2582, |
|
"eval_samples_per_second": 166.523, |
|
"eval_steps_per_second": 42.599, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"eval_accuracy": 0.7441860465116279, |
|
"eval_loss": 0.594927966594696, |
|
"eval_runtime": 0.2847, |
|
"eval_samples_per_second": 151.012, |
|
"eval_steps_per_second": 38.631, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"grad_norm": 3.3591854572296143, |
|
"learning_rate": 0.0, |
|
"loss": 0.5205, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"eval_accuracy": 0.7441860465116279, |
|
"eval_loss": 0.5949187874794006, |
|
"eval_runtime": 0.2565, |
|
"eval_samples_per_second": 167.627, |
|
"eval_steps_per_second": 42.881, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"step": 60, |
|
"total_flos": 7.3694882123477e+16, |
|
"train_loss": 0.5492916504542033, |
|
"train_runtime": 34.9653, |
|
"train_samples_per_second": 28.314, |
|
"train_steps_per_second": 1.716 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 60, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.3694882123477e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|