{ "best_metric": 0.84, "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-student_two_classes/checkpoint-104", "epoch": 20.0, "eval_steps": 500, "global_step": 260, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.7692307692307693, "grad_norm": 7.759917736053467, "learning_rate": 1.923076923076923e-05, "loss": 0.3845, "step": 10 }, { "epoch": 1.0, "eval_accuracy": 0.7, "eval_loss": 0.6474852561950684, "eval_runtime": 2.509, "eval_samples_per_second": 39.856, "eval_steps_per_second": 1.594, "step": 13 }, { "epoch": 1.5384615384615383, "grad_norm": 22.67352294921875, "learning_rate": 3.846153846153846e-05, "loss": 0.3466, "step": 20 }, { "epoch": 2.0, "eval_accuracy": 0.74, "eval_loss": 0.6201274991035461, "eval_runtime": 2.4483, "eval_samples_per_second": 40.845, "eval_steps_per_second": 1.634, "step": 26 }, { "epoch": 2.3076923076923075, "grad_norm": 68.59841918945312, "learning_rate": 4.9145299145299147e-05, "loss": 0.3832, "step": 30 }, { "epoch": 3.0, "eval_accuracy": 0.82, "eval_loss": 0.8067967295646667, "eval_runtime": 2.4397, "eval_samples_per_second": 40.989, "eval_steps_per_second": 1.64, "step": 39 }, { "epoch": 3.076923076923077, "grad_norm": 16.920194625854492, "learning_rate": 4.700854700854701e-05, "loss": 0.4694, "step": 40 }, { "epoch": 3.8461538461538463, "grad_norm": 59.7691764831543, "learning_rate": 4.4871794871794874e-05, "loss": 0.5344, "step": 50 }, { "epoch": 4.0, "eval_accuracy": 0.81, "eval_loss": 0.6339858770370483, "eval_runtime": 2.4546, "eval_samples_per_second": 40.74, "eval_steps_per_second": 1.63, "step": 52 }, { "epoch": 4.615384615384615, "grad_norm": 21.54722785949707, "learning_rate": 4.2735042735042735e-05, "loss": 0.4912, "step": 60 }, { "epoch": 5.0, "eval_accuracy": 0.8, "eval_loss": 0.6879615187644958, "eval_runtime": 2.8473, "eval_samples_per_second": 35.121, "eval_steps_per_second": 1.405, "step": 65 }, { "epoch": 5.384615384615385, "grad_norm": 11.457103729248047, "learning_rate": 4.05982905982906e-05, "loss": 0.5093, "step": 70 }, { "epoch": 6.0, "eval_accuracy": 0.73, "eval_loss": 0.699913501739502, "eval_runtime": 2.4666, "eval_samples_per_second": 40.541, "eval_steps_per_second": 1.622, "step": 78 }, { "epoch": 6.153846153846154, "grad_norm": 15.323335647583008, "learning_rate": 3.846153846153846e-05, "loss": 0.5284, "step": 80 }, { "epoch": 6.923076923076923, "grad_norm": 7.947178840637207, "learning_rate": 3.6324786324786323e-05, "loss": 0.4109, "step": 90 }, { "epoch": 7.0, "eval_accuracy": 0.83, "eval_loss": 0.7294943928718567, "eval_runtime": 2.4607, "eval_samples_per_second": 40.638, "eval_steps_per_second": 1.626, "step": 91 }, { "epoch": 7.6923076923076925, "grad_norm": 9.474274635314941, "learning_rate": 3.418803418803419e-05, "loss": 0.4383, "step": 100 }, { "epoch": 8.0, "eval_accuracy": 0.84, "eval_loss": 0.7047725915908813, "eval_runtime": 2.8894, "eval_samples_per_second": 34.609, "eval_steps_per_second": 1.384, "step": 104 }, { "epoch": 8.461538461538462, "grad_norm": 7.170910835266113, "learning_rate": 3.205128205128206e-05, "loss": 0.4534, "step": 110 }, { "epoch": 9.0, "eval_accuracy": 0.82, "eval_loss": 0.609440803527832, "eval_runtime": 2.4268, "eval_samples_per_second": 41.206, "eval_steps_per_second": 1.648, "step": 117 }, { "epoch": 9.23076923076923, "grad_norm": 13.206290245056152, "learning_rate": 2.9914529914529915e-05, "loss": 0.4504, "step": 120 }, { "epoch": 10.0, "grad_norm": 15.387145042419434, "learning_rate": 2.777777777777778e-05, "loss": 0.4684, "step": 130 }, { "epoch": 10.0, "eval_accuracy": 0.74, "eval_loss": 0.5788578987121582, "eval_runtime": 2.5101, "eval_samples_per_second": 39.839, "eval_steps_per_second": 1.594, "step": 130 }, { "epoch": 10.76923076923077, "grad_norm": 8.540888786315918, "learning_rate": 2.564102564102564e-05, "loss": 0.3442, "step": 140 }, { "epoch": 11.0, "eval_accuracy": 0.82, "eval_loss": 0.7296608090400696, "eval_runtime": 3.5919, "eval_samples_per_second": 27.84, "eval_steps_per_second": 1.114, "step": 143 }, { "epoch": 11.538461538461538, "grad_norm": 7.705536842346191, "learning_rate": 2.3504273504273504e-05, "loss": 0.3236, "step": 150 }, { "epoch": 12.0, "eval_accuracy": 0.79, "eval_loss": 0.7688478231430054, "eval_runtime": 2.457, "eval_samples_per_second": 40.7, "eval_steps_per_second": 1.628, "step": 156 }, { "epoch": 12.307692307692308, "grad_norm": 4.703495502471924, "learning_rate": 2.1367521367521368e-05, "loss": 0.4645, "step": 160 }, { "epoch": 13.0, "eval_accuracy": 0.76, "eval_loss": 0.6686670780181885, "eval_runtime": 2.6242, "eval_samples_per_second": 38.107, "eval_steps_per_second": 1.524, "step": 169 }, { "epoch": 13.076923076923077, "grad_norm": 12.792634010314941, "learning_rate": 1.923076923076923e-05, "loss": 0.3457, "step": 170 }, { "epoch": 13.846153846153847, "grad_norm": 8.466885566711426, "learning_rate": 1.7094017094017095e-05, "loss": 0.3532, "step": 180 }, { "epoch": 14.0, "eval_accuracy": 0.84, "eval_loss": 0.787961483001709, "eval_runtime": 2.4969, "eval_samples_per_second": 40.05, "eval_steps_per_second": 1.602, "step": 182 }, { "epoch": 14.615384615384615, "grad_norm": 13.883042335510254, "learning_rate": 1.4957264957264958e-05, "loss": 0.3394, "step": 190 }, { "epoch": 15.0, "eval_accuracy": 0.79, "eval_loss": 0.7216033935546875, "eval_runtime": 2.5404, "eval_samples_per_second": 39.364, "eval_steps_per_second": 1.575, "step": 195 }, { "epoch": 15.384615384615385, "grad_norm": 8.006115913391113, "learning_rate": 1.282051282051282e-05, "loss": 0.3311, "step": 200 }, { "epoch": 16.0, "eval_accuracy": 0.79, "eval_loss": 0.7209141254425049, "eval_runtime": 4.7912, "eval_samples_per_second": 20.872, "eval_steps_per_second": 0.835, "step": 208 }, { "epoch": 16.153846153846153, "grad_norm": 5.578493118286133, "learning_rate": 1.0683760683760684e-05, "loss": 0.3509, "step": 210 }, { "epoch": 16.923076923076923, "grad_norm": 6.166889190673828, "learning_rate": 8.547008547008548e-06, "loss": 0.3367, "step": 220 }, { "epoch": 17.0, "eval_accuracy": 0.71, "eval_loss": 0.6826711297035217, "eval_runtime": 2.5244, "eval_samples_per_second": 39.614, "eval_steps_per_second": 1.585, "step": 221 }, { "epoch": 17.692307692307693, "grad_norm": 10.269214630126953, "learning_rate": 6.41025641025641e-06, "loss": 0.3673, "step": 230 }, { "epoch": 18.0, "eval_accuracy": 0.76, "eval_loss": 0.7472490072250366, "eval_runtime": 3.2931, "eval_samples_per_second": 30.367, "eval_steps_per_second": 1.215, "step": 234 }, { "epoch": 18.46153846153846, "grad_norm": 7.079315662384033, "learning_rate": 4.273504273504274e-06, "loss": 0.3024, "step": 240 }, { "epoch": 19.0, "eval_accuracy": 0.79, "eval_loss": 0.7760630249977112, "eval_runtime": 2.5705, "eval_samples_per_second": 38.903, "eval_steps_per_second": 1.556, "step": 247 }, { "epoch": 19.23076923076923, "grad_norm": 7.1634039878845215, "learning_rate": 2.136752136752137e-06, "loss": 0.3652, "step": 250 }, { "epoch": 20.0, "grad_norm": 28.70159912109375, "learning_rate": 0.0, "loss": 0.3624, "step": 260 }, { "epoch": 20.0, "eval_accuracy": 0.76, "eval_loss": 0.7436763048171997, "eval_runtime": 2.484, "eval_samples_per_second": 40.257, "eval_steps_per_second": 1.61, "step": 260 }, { "epoch": 20.0, "step": 260, "total_flos": 1.98847911886848e+17, "train_loss": 0.40210363498100865, "train_runtime": 345.8785, "train_samples_per_second": 23.13, "train_steps_per_second": 0.752 } ], "logging_steps": 10, "max_steps": 260, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 1.98847911886848e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }