|
{ |
|
"best_metric": 0.9104332327842712, |
|
"best_model_checkpoint": "cat_breed_image_detection/checkpoint-12705", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 12705, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19677292404565133, |
|
"grad_norm": 10.679129600524902, |
|
"learning_rate": 7.715527459502173e-07, |
|
"loss": 0.8716, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.39354584809130266, |
|
"grad_norm": 12.624375343322754, |
|
"learning_rate": 7.399446858949032e-07, |
|
"loss": 0.8608, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5903187721369539, |
|
"grad_norm": 12.887909889221191, |
|
"learning_rate": 7.08336625839589e-07, |
|
"loss": 0.8653, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7870916961826053, |
|
"grad_norm": 14.339140892028809, |
|
"learning_rate": 6.76728565784275e-07, |
|
"loss": 0.8493, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9838646202282566, |
|
"grad_norm": 10.709071159362793, |
|
"learning_rate": 6.451205057289608e-07, |
|
"loss": 0.8589, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7400815618541482, |
|
"eval_loss": 0.9309104084968567, |
|
"eval_model_preparation_time": 0.006, |
|
"eval_runtime": 1193.8367, |
|
"eval_samples_per_second": 90.786, |
|
"eval_steps_per_second": 2.837, |
|
"step": 2541 |
|
}, |
|
{ |
|
"epoch": 1.1806375442739079, |
|
"grad_norm": 8.929093360900879, |
|
"learning_rate": 6.135124456736468e-07, |
|
"loss": 0.8467, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.3774104683195592, |
|
"grad_norm": 11.412522315979004, |
|
"learning_rate": 5.819043856183327e-07, |
|
"loss": 0.8531, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.5741833923652107, |
|
"grad_norm": 12.86628532409668, |
|
"learning_rate": 5.502963255630185e-07, |
|
"loss": 0.8443, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.770956316410862, |
|
"grad_norm": 10.795392990112305, |
|
"learning_rate": 5.186882655077045e-07, |
|
"loss": 0.8317, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.9677292404565132, |
|
"grad_norm": 12.041897773742676, |
|
"learning_rate": 4.870802054523904e-07, |
|
"loss": 0.8554, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7436983318571007, |
|
"eval_loss": 0.9218501448631287, |
|
"eval_model_preparation_time": 0.006, |
|
"eval_runtime": 1215.8937, |
|
"eval_samples_per_second": 89.139, |
|
"eval_steps_per_second": 2.786, |
|
"step": 5082 |
|
}, |
|
{ |
|
"epoch": 2.1645021645021645, |
|
"grad_norm": 8.663507461547852, |
|
"learning_rate": 4.5547214539707617e-07, |
|
"loss": 0.8319, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.3612750885478158, |
|
"grad_norm": 13.493363380432129, |
|
"learning_rate": 4.238640853417621e-07, |
|
"loss": 0.8396, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.558048012593467, |
|
"grad_norm": 18.810260772705078, |
|
"learning_rate": 3.92256025286448e-07, |
|
"loss": 0.8415, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.7548209366391183, |
|
"grad_norm": 11.748995780944824, |
|
"learning_rate": 3.606479652311339e-07, |
|
"loss": 0.8427, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.9515938606847696, |
|
"grad_norm": 9.686206817626953, |
|
"learning_rate": 3.290399051758198e-07, |
|
"loss": 0.8367, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7437629170357248, |
|
"eval_loss": 0.9151368737220764, |
|
"eval_model_preparation_time": 0.006, |
|
"eval_runtime": 1207.1063, |
|
"eval_samples_per_second": 89.788, |
|
"eval_steps_per_second": 2.806, |
|
"step": 7623 |
|
}, |
|
{ |
|
"epoch": 3.1483667847304213, |
|
"grad_norm": 10.070638656616211, |
|
"learning_rate": 2.974318451205057e-07, |
|
"loss": 0.8266, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.3451397087760726, |
|
"grad_norm": 8.807821273803711, |
|
"learning_rate": 2.658237850651916e-07, |
|
"loss": 0.8258, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.541912632821724, |
|
"grad_norm": 6.973104953765869, |
|
"learning_rate": 2.342157250098775e-07, |
|
"loss": 0.8382, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.738685556867375, |
|
"grad_norm": 9.397773742675781, |
|
"learning_rate": 2.026076649545634e-07, |
|
"loss": 0.8194, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.9354584809130264, |
|
"grad_norm": 9.227031707763672, |
|
"learning_rate": 1.709996048992493e-07, |
|
"loss": 0.8333, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7457742840271627, |
|
"eval_loss": 0.9115355610847473, |
|
"eval_model_preparation_time": 0.006, |
|
"eval_runtime": 1209.7892, |
|
"eval_samples_per_second": 89.589, |
|
"eval_steps_per_second": 2.8, |
|
"step": 10164 |
|
}, |
|
{ |
|
"epoch": 4.132231404958677, |
|
"grad_norm": 10.666913032531738, |
|
"learning_rate": 1.393915448439352e-07, |
|
"loss": 0.8311, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.329004329004329, |
|
"grad_norm": 8.299125671386719, |
|
"learning_rate": 1.077834847886211e-07, |
|
"loss": 0.8312, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.525777253049981, |
|
"grad_norm": 10.474702835083008, |
|
"learning_rate": 7.617542473330699e-08, |
|
"loss": 0.8224, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.7225501770956315, |
|
"grad_norm": 9.30624008178711, |
|
"learning_rate": 4.4567364677992886e-08, |
|
"loss": 0.8169, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.919323101141283, |
|
"grad_norm": 11.784387588500977, |
|
"learning_rate": 1.2959304622678783e-08, |
|
"loss": 0.8285, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7457742840271627, |
|
"eval_loss": 0.9104332327842712, |
|
"eval_model_preparation_time": 0.006, |
|
"eval_runtime": 1215.0134, |
|
"eval_samples_per_second": 89.204, |
|
"eval_steps_per_second": 2.788, |
|
"step": 12705 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 12705, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.301765894857818e+19, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|