|
{ |
|
"best_metric": 0.344835102558136, |
|
"best_model_checkpoint": "./vit-base-pets/checkpoint-235", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 235, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 216203.125, |
|
"learning_rate": 0.0002872340425531915, |
|
"loss": 3.3311, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 198156.9375, |
|
"learning_rate": 0.000274468085106383, |
|
"loss": 2.5921, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 176661.6875, |
|
"learning_rate": 0.0002617021276595745, |
|
"loss": 1.9823, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 142275.75, |
|
"learning_rate": 0.0002489361702127659, |
|
"loss": 1.5136, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8430311231393776, |
|
"eval_loss": 1.1030857563018799, |
|
"eval_runtime": 8.7542, |
|
"eval_samples_per_second": 84.417, |
|
"eval_steps_per_second": 5.369, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 124899.421875, |
|
"learning_rate": 0.00023617021276595742, |
|
"loss": 1.1858, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 98946.875, |
|
"learning_rate": 0.0002234042553191489, |
|
"loss": 0.9313, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 92924.6484375, |
|
"learning_rate": 0.0002106382978723404, |
|
"loss": 0.7466, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 90032.1484375, |
|
"learning_rate": 0.00019787234042553187, |
|
"loss": 0.6475, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"grad_norm": 68696.1875, |
|
"learning_rate": 0.0001851063829787234, |
|
"loss": 0.5547, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9269282814614344, |
|
"eval_loss": 0.5232290625572205, |
|
"eval_runtime": 8.9185, |
|
"eval_samples_per_second": 82.861, |
|
"eval_steps_per_second": 5.27, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 67699.609375, |
|
"learning_rate": 0.0001723404255319149, |
|
"loss": 0.5311, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 72000.0234375, |
|
"learning_rate": 0.00015957446808510637, |
|
"loss": 0.4636, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 54618.05078125, |
|
"learning_rate": 0.00014680851063829785, |
|
"loss": 0.4171, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"grad_norm": 57285.890625, |
|
"learning_rate": 0.00013404255319148935, |
|
"loss": 0.3946, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"grad_norm": 73116.6171875, |
|
"learning_rate": 0.00012127659574468084, |
|
"loss": 0.4111, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9309878213802436, |
|
"eval_loss": 0.39878538250923157, |
|
"eval_runtime": 9.1048, |
|
"eval_samples_per_second": 81.166, |
|
"eval_steps_per_second": 5.162, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"grad_norm": 64126.3828125, |
|
"learning_rate": 0.00010851063829787234, |
|
"loss": 0.3607, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"grad_norm": 69913.5390625, |
|
"learning_rate": 9.574468085106382e-05, |
|
"loss": 0.3387, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"grad_norm": 54300.03125, |
|
"learning_rate": 8.297872340425531e-05, |
|
"loss": 0.3568, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"grad_norm": 60929.75390625, |
|
"learning_rate": 7.02127659574468e-05, |
|
"loss": 0.3438, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9336941813261164, |
|
"eval_loss": 0.35527506470680237, |
|
"eval_runtime": 9.4286, |
|
"eval_samples_per_second": 78.378, |
|
"eval_steps_per_second": 4.985, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"grad_norm": 69279.328125, |
|
"learning_rate": 5.7446808510638294e-05, |
|
"loss": 0.3087, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"grad_norm": 49415.69140625, |
|
"learning_rate": 4.468085106382978e-05, |
|
"loss": 0.328, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"grad_norm": 62788.3359375, |
|
"learning_rate": 3.1914893617021275e-05, |
|
"loss": 0.3199, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"grad_norm": 61852.7421875, |
|
"learning_rate": 1.9148936170212762e-05, |
|
"loss": 0.3244, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"grad_norm": 64687.453125, |
|
"learning_rate": 6.382978723404255e-06, |
|
"loss": 0.298, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9296346414073072, |
|
"eval_loss": 0.344835102558136, |
|
"eval_runtime": 9.1923, |
|
"eval_samples_per_second": 80.393, |
|
"eval_steps_per_second": 5.113, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 235, |
|
"total_flos": 2.2913817801515827e+18, |
|
"train_loss": 0.8009341437765892, |
|
"train_runtime": 407.3253, |
|
"train_samples_per_second": 72.571, |
|
"train_steps_per_second": 0.577 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 235, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 2.2913817801515827e+18, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|