|
{ |
|
"best_metric": 0.84, |
|
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-student_two_classes/checkpoint-104", |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 260, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 7.759917736053467, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 0.3845, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.6474852561950684, |
|
"eval_runtime": 2.509, |
|
"eval_samples_per_second": 39.856, |
|
"eval_steps_per_second": 1.594, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 22.67352294921875, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 0.3466, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.6201274991035461, |
|
"eval_runtime": 2.4483, |
|
"eval_samples_per_second": 40.845, |
|
"eval_steps_per_second": 1.634, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 2.3076923076923075, |
|
"grad_norm": 68.59841918945312, |
|
"learning_rate": 4.9145299145299147e-05, |
|
"loss": 0.3832, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.82, |
|
"eval_loss": 0.8067967295646667, |
|
"eval_runtime": 2.4397, |
|
"eval_samples_per_second": 40.989, |
|
"eval_steps_per_second": 1.64, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"grad_norm": 16.920194625854492, |
|
"learning_rate": 4.700854700854701e-05, |
|
"loss": 0.4694, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 3.8461538461538463, |
|
"grad_norm": 59.7691764831543, |
|
"learning_rate": 4.4871794871794874e-05, |
|
"loss": 0.5344, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.81, |
|
"eval_loss": 0.6339858770370483, |
|
"eval_runtime": 2.4546, |
|
"eval_samples_per_second": 40.74, |
|
"eval_steps_per_second": 1.63, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 4.615384615384615, |
|
"grad_norm": 21.54722785949707, |
|
"learning_rate": 4.2735042735042735e-05, |
|
"loss": 0.4912, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8, |
|
"eval_loss": 0.6879615187644958, |
|
"eval_runtime": 2.8473, |
|
"eval_samples_per_second": 35.121, |
|
"eval_steps_per_second": 1.405, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 5.384615384615385, |
|
"grad_norm": 11.457103729248047, |
|
"learning_rate": 4.05982905982906e-05, |
|
"loss": 0.5093, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.699913501739502, |
|
"eval_runtime": 2.4666, |
|
"eval_samples_per_second": 40.541, |
|
"eval_steps_per_second": 1.622, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 6.153846153846154, |
|
"grad_norm": 15.323335647583008, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 0.5284, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 6.923076923076923, |
|
"grad_norm": 7.947178840637207, |
|
"learning_rate": 3.6324786324786323e-05, |
|
"loss": 0.4109, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.83, |
|
"eval_loss": 0.7294943928718567, |
|
"eval_runtime": 2.4607, |
|
"eval_samples_per_second": 40.638, |
|
"eval_steps_per_second": 1.626, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 7.6923076923076925, |
|
"grad_norm": 9.474274635314941, |
|
"learning_rate": 3.418803418803419e-05, |
|
"loss": 0.4383, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.84, |
|
"eval_loss": 0.7047725915908813, |
|
"eval_runtime": 2.8894, |
|
"eval_samples_per_second": 34.609, |
|
"eval_steps_per_second": 1.384, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 8.461538461538462, |
|
"grad_norm": 7.170910835266113, |
|
"learning_rate": 3.205128205128206e-05, |
|
"loss": 0.4534, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.82, |
|
"eval_loss": 0.609440803527832, |
|
"eval_runtime": 2.4268, |
|
"eval_samples_per_second": 41.206, |
|
"eval_steps_per_second": 1.648, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 9.23076923076923, |
|
"grad_norm": 13.206290245056152, |
|
"learning_rate": 2.9914529914529915e-05, |
|
"loss": 0.4504, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 15.387145042419434, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.4684, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.5788578987121582, |
|
"eval_runtime": 2.5101, |
|
"eval_samples_per_second": 39.839, |
|
"eval_steps_per_second": 1.594, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 10.76923076923077, |
|
"grad_norm": 8.540888786315918, |
|
"learning_rate": 2.564102564102564e-05, |
|
"loss": 0.3442, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.82, |
|
"eval_loss": 0.7296608090400696, |
|
"eval_runtime": 3.5919, |
|
"eval_samples_per_second": 27.84, |
|
"eval_steps_per_second": 1.114, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 11.538461538461538, |
|
"grad_norm": 7.705536842346191, |
|
"learning_rate": 2.3504273504273504e-05, |
|
"loss": 0.3236, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.79, |
|
"eval_loss": 0.7688478231430054, |
|
"eval_runtime": 2.457, |
|
"eval_samples_per_second": 40.7, |
|
"eval_steps_per_second": 1.628, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 12.307692307692308, |
|
"grad_norm": 4.703495502471924, |
|
"learning_rate": 2.1367521367521368e-05, |
|
"loss": 0.4645, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.6686670780181885, |
|
"eval_runtime": 2.6242, |
|
"eval_samples_per_second": 38.107, |
|
"eval_steps_per_second": 1.524, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 13.076923076923077, |
|
"grad_norm": 12.792634010314941, |
|
"learning_rate": 1.923076923076923e-05, |
|
"loss": 0.3457, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 13.846153846153847, |
|
"grad_norm": 8.466885566711426, |
|
"learning_rate": 1.7094017094017095e-05, |
|
"loss": 0.3532, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.84, |
|
"eval_loss": 0.787961483001709, |
|
"eval_runtime": 2.4969, |
|
"eval_samples_per_second": 40.05, |
|
"eval_steps_per_second": 1.602, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 14.615384615384615, |
|
"grad_norm": 13.883042335510254, |
|
"learning_rate": 1.4957264957264958e-05, |
|
"loss": 0.3394, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.79, |
|
"eval_loss": 0.7216033935546875, |
|
"eval_runtime": 2.5404, |
|
"eval_samples_per_second": 39.364, |
|
"eval_steps_per_second": 1.575, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 15.384615384615385, |
|
"grad_norm": 8.006115913391113, |
|
"learning_rate": 1.282051282051282e-05, |
|
"loss": 0.3311, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.79, |
|
"eval_loss": 0.7209141254425049, |
|
"eval_runtime": 4.7912, |
|
"eval_samples_per_second": 20.872, |
|
"eval_steps_per_second": 0.835, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 16.153846153846153, |
|
"grad_norm": 5.578493118286133, |
|
"learning_rate": 1.0683760683760684e-05, |
|
"loss": 0.3509, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 16.923076923076923, |
|
"grad_norm": 6.166889190673828, |
|
"learning_rate": 8.547008547008548e-06, |
|
"loss": 0.3367, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.6826711297035217, |
|
"eval_runtime": 2.5244, |
|
"eval_samples_per_second": 39.614, |
|
"eval_steps_per_second": 1.585, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 17.692307692307693, |
|
"grad_norm": 10.269214630126953, |
|
"learning_rate": 6.41025641025641e-06, |
|
"loss": 0.3673, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.7472490072250366, |
|
"eval_runtime": 3.2931, |
|
"eval_samples_per_second": 30.367, |
|
"eval_steps_per_second": 1.215, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 18.46153846153846, |
|
"grad_norm": 7.079315662384033, |
|
"learning_rate": 4.273504273504274e-06, |
|
"loss": 0.3024, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.79, |
|
"eval_loss": 0.7760630249977112, |
|
"eval_runtime": 2.5705, |
|
"eval_samples_per_second": 38.903, |
|
"eval_steps_per_second": 1.556, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 19.23076923076923, |
|
"grad_norm": 7.1634039878845215, |
|
"learning_rate": 2.136752136752137e-06, |
|
"loss": 0.3652, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 28.70159912109375, |
|
"learning_rate": 0.0, |
|
"loss": 0.3624, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.7436763048171997, |
|
"eval_runtime": 2.484, |
|
"eval_samples_per_second": 40.257, |
|
"eval_steps_per_second": 1.61, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 260, |
|
"total_flos": 1.98847911886848e+17, |
|
"train_loss": 0.40210363498100865, |
|
"train_runtime": 345.8785, |
|
"train_samples_per_second": 23.13, |
|
"train_steps_per_second": 0.752 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 260, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 1.98847911886848e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|