system-admin's picture
Training in progress, epoch 1
717c124 verified
{
"best_metric": 0.84,
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-student_two_classes/checkpoint-104",
"epoch": 20.0,
"eval_steps": 500,
"global_step": 260,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.7692307692307693,
"grad_norm": 7.759917736053467,
"learning_rate": 1.923076923076923e-05,
"loss": 0.3845,
"step": 10
},
{
"epoch": 1.0,
"eval_accuracy": 0.7,
"eval_loss": 0.6474852561950684,
"eval_runtime": 2.509,
"eval_samples_per_second": 39.856,
"eval_steps_per_second": 1.594,
"step": 13
},
{
"epoch": 1.5384615384615383,
"grad_norm": 22.67352294921875,
"learning_rate": 3.846153846153846e-05,
"loss": 0.3466,
"step": 20
},
{
"epoch": 2.0,
"eval_accuracy": 0.74,
"eval_loss": 0.6201274991035461,
"eval_runtime": 2.4483,
"eval_samples_per_second": 40.845,
"eval_steps_per_second": 1.634,
"step": 26
},
{
"epoch": 2.3076923076923075,
"grad_norm": 68.59841918945312,
"learning_rate": 4.9145299145299147e-05,
"loss": 0.3832,
"step": 30
},
{
"epoch": 3.0,
"eval_accuracy": 0.82,
"eval_loss": 0.8067967295646667,
"eval_runtime": 2.4397,
"eval_samples_per_second": 40.989,
"eval_steps_per_second": 1.64,
"step": 39
},
{
"epoch": 3.076923076923077,
"grad_norm": 16.920194625854492,
"learning_rate": 4.700854700854701e-05,
"loss": 0.4694,
"step": 40
},
{
"epoch": 3.8461538461538463,
"grad_norm": 59.7691764831543,
"learning_rate": 4.4871794871794874e-05,
"loss": 0.5344,
"step": 50
},
{
"epoch": 4.0,
"eval_accuracy": 0.81,
"eval_loss": 0.6339858770370483,
"eval_runtime": 2.4546,
"eval_samples_per_second": 40.74,
"eval_steps_per_second": 1.63,
"step": 52
},
{
"epoch": 4.615384615384615,
"grad_norm": 21.54722785949707,
"learning_rate": 4.2735042735042735e-05,
"loss": 0.4912,
"step": 60
},
{
"epoch": 5.0,
"eval_accuracy": 0.8,
"eval_loss": 0.6879615187644958,
"eval_runtime": 2.8473,
"eval_samples_per_second": 35.121,
"eval_steps_per_second": 1.405,
"step": 65
},
{
"epoch": 5.384615384615385,
"grad_norm": 11.457103729248047,
"learning_rate": 4.05982905982906e-05,
"loss": 0.5093,
"step": 70
},
{
"epoch": 6.0,
"eval_accuracy": 0.73,
"eval_loss": 0.699913501739502,
"eval_runtime": 2.4666,
"eval_samples_per_second": 40.541,
"eval_steps_per_second": 1.622,
"step": 78
},
{
"epoch": 6.153846153846154,
"grad_norm": 15.323335647583008,
"learning_rate": 3.846153846153846e-05,
"loss": 0.5284,
"step": 80
},
{
"epoch": 6.923076923076923,
"grad_norm": 7.947178840637207,
"learning_rate": 3.6324786324786323e-05,
"loss": 0.4109,
"step": 90
},
{
"epoch": 7.0,
"eval_accuracy": 0.83,
"eval_loss": 0.7294943928718567,
"eval_runtime": 2.4607,
"eval_samples_per_second": 40.638,
"eval_steps_per_second": 1.626,
"step": 91
},
{
"epoch": 7.6923076923076925,
"grad_norm": 9.474274635314941,
"learning_rate": 3.418803418803419e-05,
"loss": 0.4383,
"step": 100
},
{
"epoch": 8.0,
"eval_accuracy": 0.84,
"eval_loss": 0.7047725915908813,
"eval_runtime": 2.8894,
"eval_samples_per_second": 34.609,
"eval_steps_per_second": 1.384,
"step": 104
},
{
"epoch": 8.461538461538462,
"grad_norm": 7.170910835266113,
"learning_rate": 3.205128205128206e-05,
"loss": 0.4534,
"step": 110
},
{
"epoch": 9.0,
"eval_accuracy": 0.82,
"eval_loss": 0.609440803527832,
"eval_runtime": 2.4268,
"eval_samples_per_second": 41.206,
"eval_steps_per_second": 1.648,
"step": 117
},
{
"epoch": 9.23076923076923,
"grad_norm": 13.206290245056152,
"learning_rate": 2.9914529914529915e-05,
"loss": 0.4504,
"step": 120
},
{
"epoch": 10.0,
"grad_norm": 15.387145042419434,
"learning_rate": 2.777777777777778e-05,
"loss": 0.4684,
"step": 130
},
{
"epoch": 10.0,
"eval_accuracy": 0.74,
"eval_loss": 0.5788578987121582,
"eval_runtime": 2.5101,
"eval_samples_per_second": 39.839,
"eval_steps_per_second": 1.594,
"step": 130
},
{
"epoch": 10.76923076923077,
"grad_norm": 8.540888786315918,
"learning_rate": 2.564102564102564e-05,
"loss": 0.3442,
"step": 140
},
{
"epoch": 11.0,
"eval_accuracy": 0.82,
"eval_loss": 0.7296608090400696,
"eval_runtime": 3.5919,
"eval_samples_per_second": 27.84,
"eval_steps_per_second": 1.114,
"step": 143
},
{
"epoch": 11.538461538461538,
"grad_norm": 7.705536842346191,
"learning_rate": 2.3504273504273504e-05,
"loss": 0.3236,
"step": 150
},
{
"epoch": 12.0,
"eval_accuracy": 0.79,
"eval_loss": 0.7688478231430054,
"eval_runtime": 2.457,
"eval_samples_per_second": 40.7,
"eval_steps_per_second": 1.628,
"step": 156
},
{
"epoch": 12.307692307692308,
"grad_norm": 4.703495502471924,
"learning_rate": 2.1367521367521368e-05,
"loss": 0.4645,
"step": 160
},
{
"epoch": 13.0,
"eval_accuracy": 0.76,
"eval_loss": 0.6686670780181885,
"eval_runtime": 2.6242,
"eval_samples_per_second": 38.107,
"eval_steps_per_second": 1.524,
"step": 169
},
{
"epoch": 13.076923076923077,
"grad_norm": 12.792634010314941,
"learning_rate": 1.923076923076923e-05,
"loss": 0.3457,
"step": 170
},
{
"epoch": 13.846153846153847,
"grad_norm": 8.466885566711426,
"learning_rate": 1.7094017094017095e-05,
"loss": 0.3532,
"step": 180
},
{
"epoch": 14.0,
"eval_accuracy": 0.84,
"eval_loss": 0.787961483001709,
"eval_runtime": 2.4969,
"eval_samples_per_second": 40.05,
"eval_steps_per_second": 1.602,
"step": 182
},
{
"epoch": 14.615384615384615,
"grad_norm": 13.883042335510254,
"learning_rate": 1.4957264957264958e-05,
"loss": 0.3394,
"step": 190
},
{
"epoch": 15.0,
"eval_accuracy": 0.79,
"eval_loss": 0.7216033935546875,
"eval_runtime": 2.5404,
"eval_samples_per_second": 39.364,
"eval_steps_per_second": 1.575,
"step": 195
},
{
"epoch": 15.384615384615385,
"grad_norm": 8.006115913391113,
"learning_rate": 1.282051282051282e-05,
"loss": 0.3311,
"step": 200
},
{
"epoch": 16.0,
"eval_accuracy": 0.79,
"eval_loss": 0.7209141254425049,
"eval_runtime": 4.7912,
"eval_samples_per_second": 20.872,
"eval_steps_per_second": 0.835,
"step": 208
},
{
"epoch": 16.153846153846153,
"grad_norm": 5.578493118286133,
"learning_rate": 1.0683760683760684e-05,
"loss": 0.3509,
"step": 210
},
{
"epoch": 16.923076923076923,
"grad_norm": 6.166889190673828,
"learning_rate": 8.547008547008548e-06,
"loss": 0.3367,
"step": 220
},
{
"epoch": 17.0,
"eval_accuracy": 0.71,
"eval_loss": 0.6826711297035217,
"eval_runtime": 2.5244,
"eval_samples_per_second": 39.614,
"eval_steps_per_second": 1.585,
"step": 221
},
{
"epoch": 17.692307692307693,
"grad_norm": 10.269214630126953,
"learning_rate": 6.41025641025641e-06,
"loss": 0.3673,
"step": 230
},
{
"epoch": 18.0,
"eval_accuracy": 0.76,
"eval_loss": 0.7472490072250366,
"eval_runtime": 3.2931,
"eval_samples_per_second": 30.367,
"eval_steps_per_second": 1.215,
"step": 234
},
{
"epoch": 18.46153846153846,
"grad_norm": 7.079315662384033,
"learning_rate": 4.273504273504274e-06,
"loss": 0.3024,
"step": 240
},
{
"epoch": 19.0,
"eval_accuracy": 0.79,
"eval_loss": 0.7760630249977112,
"eval_runtime": 2.5705,
"eval_samples_per_second": 38.903,
"eval_steps_per_second": 1.556,
"step": 247
},
{
"epoch": 19.23076923076923,
"grad_norm": 7.1634039878845215,
"learning_rate": 2.136752136752137e-06,
"loss": 0.3652,
"step": 250
},
{
"epoch": 20.0,
"grad_norm": 28.70159912109375,
"learning_rate": 0.0,
"loss": 0.3624,
"step": 260
},
{
"epoch": 20.0,
"eval_accuracy": 0.76,
"eval_loss": 0.7436763048171997,
"eval_runtime": 2.484,
"eval_samples_per_second": 40.257,
"eval_steps_per_second": 1.61,
"step": 260
},
{
"epoch": 20.0,
"step": 260,
"total_flos": 1.98847911886848e+17,
"train_loss": 0.40210363498100865,
"train_runtime": 345.8785,
"train_samples_per_second": 23.13,
"train_steps_per_second": 0.752
}
],
"logging_steps": 10,
"max_steps": 260,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 1.98847911886848e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}