{ "best_metric": 0.9338235294117647, "best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-papsmear/checkpoint-419", "epoch": 46.15384615384615, "eval_steps": 500, "global_step": 450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9230769230769231, "eval_accuracy": 0.2647058823529412, "eval_loss": 1.7345659732818604, "eval_runtime": 30.9498, "eval_samples_per_second": 4.394, "eval_steps_per_second": 0.162, "step": 9 }, { "epoch": 1.0256410256410255, "grad_norm": 1.5590156316757202, "learning_rate": 1.1111111111111112e-05, "loss": 1.7645, "step": 10 }, { "epoch": 1.9487179487179487, "eval_accuracy": 0.3088235294117647, "eval_loss": 1.6151821613311768, "eval_runtime": 30.7387, "eval_samples_per_second": 4.424, "eval_steps_per_second": 0.163, "step": 19 }, { "epoch": 2.051282051282051, "grad_norm": 1.000002384185791, "learning_rate": 2.2222222222222223e-05, "loss": 1.661, "step": 20 }, { "epoch": 2.9743589743589745, "eval_accuracy": 0.4117647058823529, "eval_loss": 1.4663141965866089, "eval_runtime": 30.7848, "eval_samples_per_second": 4.418, "eval_steps_per_second": 0.162, "step": 29 }, { "epoch": 3.076923076923077, "grad_norm": 1.144618272781372, "learning_rate": 3.3333333333333335e-05, "loss": 1.496, "step": 30 }, { "epoch": 4.0, "eval_accuracy": 0.4852941176470588, "eval_loss": 1.2988938093185425, "eval_runtime": 29.3106, "eval_samples_per_second": 4.64, "eval_steps_per_second": 0.171, "step": 39 }, { "epoch": 4.102564102564102, "grad_norm": 0.9698243141174316, "learning_rate": 4.4444444444444447e-05, "loss": 1.3097, "step": 40 }, { "epoch": 4.923076923076923, "eval_accuracy": 0.5588235294117647, "eval_loss": 1.1490617990493774, "eval_runtime": 29.781, "eval_samples_per_second": 4.567, "eval_steps_per_second": 0.168, "step": 48 }, { "epoch": 5.128205128205128, "grad_norm": 1.00479257106781, "learning_rate": 4.938271604938271e-05, "loss": 1.091, "step": 50 }, { "epoch": 5.948717948717949, "eval_accuracy": 0.7205882352941176, "eval_loss": 0.9932733178138733, "eval_runtime": 30.4494, "eval_samples_per_second": 4.466, "eval_steps_per_second": 0.164, "step": 58 }, { "epoch": 6.153846153846154, "grad_norm": 1.4680265188217163, "learning_rate": 4.814814814814815e-05, "loss": 0.9088, "step": 60 }, { "epoch": 6.9743589743589745, "eval_accuracy": 0.6985294117647058, "eval_loss": 0.9170573353767395, "eval_runtime": 29.5689, "eval_samples_per_second": 4.599, "eval_steps_per_second": 0.169, "step": 68 }, { "epoch": 7.17948717948718, "grad_norm": 1.2828285694122314, "learning_rate": 4.691358024691358e-05, "loss": 0.7858, "step": 70 }, { "epoch": 8.0, "eval_accuracy": 0.7720588235294118, "eval_loss": 0.8300582766532898, "eval_runtime": 30.9278, "eval_samples_per_second": 4.397, "eval_steps_per_second": 0.162, "step": 78 }, { "epoch": 8.205128205128204, "grad_norm": 1.7692698240280151, "learning_rate": 4.567901234567901e-05, "loss": 0.7016, "step": 80 }, { "epoch": 8.923076923076923, "eval_accuracy": 0.7352941176470589, "eval_loss": 0.7925190925598145, "eval_runtime": 32.3375, "eval_samples_per_second": 4.206, "eval_steps_per_second": 0.155, "step": 87 }, { "epoch": 9.23076923076923, "grad_norm": 1.7534127235412598, "learning_rate": 4.4444444444444447e-05, "loss": 0.6136, "step": 90 }, { "epoch": 9.948717948717949, "eval_accuracy": 0.7647058823529411, "eval_loss": 0.6992273330688477, "eval_runtime": 30.502, "eval_samples_per_second": 4.459, "eval_steps_per_second": 0.164, "step": 97 }, { "epoch": 10.256410256410255, "grad_norm": 1.2740432024002075, "learning_rate": 4.3209876543209875e-05, "loss": 0.532, "step": 100 }, { "epoch": 10.974358974358974, "eval_accuracy": 0.8308823529411765, "eval_loss": 0.6400743126869202, "eval_runtime": 31.9094, "eval_samples_per_second": 4.262, "eval_steps_per_second": 0.157, "step": 107 }, { "epoch": 11.282051282051283, "grad_norm": 1.563349723815918, "learning_rate": 4.197530864197531e-05, "loss": 0.5018, "step": 110 }, { "epoch": 12.0, "eval_accuracy": 0.8382352941176471, "eval_loss": 0.5786880254745483, "eval_runtime": 29.8171, "eval_samples_per_second": 4.561, "eval_steps_per_second": 0.168, "step": 117 }, { "epoch": 12.307692307692308, "grad_norm": 2.5129995346069336, "learning_rate": 4.074074074074074e-05, "loss": 0.4279, "step": 120 }, { "epoch": 12.923076923076923, "eval_accuracy": 0.8088235294117647, "eval_loss": 0.6129594445228577, "eval_runtime": 29.826, "eval_samples_per_second": 4.56, "eval_steps_per_second": 0.168, "step": 126 }, { "epoch": 13.333333333333334, "grad_norm": 1.6143475770950317, "learning_rate": 3.950617283950617e-05, "loss": 0.4116, "step": 130 }, { "epoch": 13.948717948717949, "eval_accuracy": 0.8382352941176471, "eval_loss": 0.5089898109436035, "eval_runtime": 29.9983, "eval_samples_per_second": 4.534, "eval_steps_per_second": 0.167, "step": 136 }, { "epoch": 14.35897435897436, "grad_norm": 1.6241114139556885, "learning_rate": 3.82716049382716e-05, "loss": 0.3848, "step": 140 }, { "epoch": 14.974358974358974, "eval_accuracy": 0.8676470588235294, "eval_loss": 0.5165212154388428, "eval_runtime": 30.2024, "eval_samples_per_second": 4.503, "eval_steps_per_second": 0.166, "step": 146 }, { "epoch": 15.384615384615385, "grad_norm": 2.7321174144744873, "learning_rate": 3.7037037037037037e-05, "loss": 0.3449, "step": 150 }, { "epoch": 16.0, "eval_accuracy": 0.8382352941176471, "eval_loss": 0.4842991530895233, "eval_runtime": 29.5578, "eval_samples_per_second": 4.601, "eval_steps_per_second": 0.169, "step": 156 }, { "epoch": 16.41025641025641, "grad_norm": 1.7041429281234741, "learning_rate": 3.580246913580247e-05, "loss": 0.3008, "step": 160 }, { "epoch": 16.923076923076923, "eval_accuracy": 0.8455882352941176, "eval_loss": 0.5460208058357239, "eval_runtime": 29.7055, "eval_samples_per_second": 4.578, "eval_steps_per_second": 0.168, "step": 165 }, { "epoch": 17.435897435897434, "grad_norm": 2.1159939765930176, "learning_rate": 3.45679012345679e-05, "loss": 0.2797, "step": 170 }, { "epoch": 17.94871794871795, "eval_accuracy": 0.8308823529411765, "eval_loss": 0.4984971880912781, "eval_runtime": 29.7303, "eval_samples_per_second": 4.574, "eval_steps_per_second": 0.168, "step": 175 }, { "epoch": 18.46153846153846, "grad_norm": 1.050848126411438, "learning_rate": 3.3333333333333335e-05, "loss": 0.2696, "step": 180 }, { "epoch": 18.974358974358974, "eval_accuracy": 0.8455882352941176, "eval_loss": 0.5585992336273193, "eval_runtime": 29.5612, "eval_samples_per_second": 4.601, "eval_steps_per_second": 0.169, "step": 185 }, { "epoch": 19.487179487179485, "grad_norm": 2.385378360748291, "learning_rate": 3.209876543209876e-05, "loss": 0.2633, "step": 190 }, { "epoch": 20.0, "eval_accuracy": 0.9044117647058824, "eval_loss": 0.43493831157684326, "eval_runtime": 29.6728, "eval_samples_per_second": 4.583, "eval_steps_per_second": 0.169, "step": 195 }, { "epoch": 20.51282051282051, "grad_norm": 1.972583293914795, "learning_rate": 3.08641975308642e-05, "loss": 0.2569, "step": 200 }, { "epoch": 20.923076923076923, "eval_accuracy": 0.8897058823529411, "eval_loss": 0.4017449617385864, "eval_runtime": 31.5372, "eval_samples_per_second": 4.312, "eval_steps_per_second": 0.159, "step": 204 }, { "epoch": 21.53846153846154, "grad_norm": 2.712113380432129, "learning_rate": 2.962962962962963e-05, "loss": 0.27, "step": 210 }, { "epoch": 21.94871794871795, "eval_accuracy": 0.8602941176470589, "eval_loss": 0.4758412837982178, "eval_runtime": 30.0407, "eval_samples_per_second": 4.527, "eval_steps_per_second": 0.166, "step": 214 }, { "epoch": 22.564102564102566, "grad_norm": 1.7184677124023438, "learning_rate": 2.839506172839506e-05, "loss": 0.2706, "step": 220 }, { "epoch": 22.974358974358974, "eval_accuracy": 0.8897058823529411, "eval_loss": 0.41326794028282166, "eval_runtime": 29.7459, "eval_samples_per_second": 4.572, "eval_steps_per_second": 0.168, "step": 224 }, { "epoch": 23.58974358974359, "grad_norm": 1.8792766332626343, "learning_rate": 2.7160493827160493e-05, "loss": 0.2211, "step": 230 }, { "epoch": 24.0, "eval_accuracy": 0.9117647058823529, "eval_loss": 0.3844151794910431, "eval_runtime": 29.8072, "eval_samples_per_second": 4.563, "eval_steps_per_second": 0.168, "step": 234 }, { "epoch": 24.615384615384617, "grad_norm": 2.2763214111328125, "learning_rate": 2.5925925925925925e-05, "loss": 0.1977, "step": 240 }, { "epoch": 24.923076923076923, "eval_accuracy": 0.9264705882352942, "eval_loss": 0.34974199533462524, "eval_runtime": 29.6847, "eval_samples_per_second": 4.581, "eval_steps_per_second": 0.168, "step": 243 }, { "epoch": 25.641025641025642, "grad_norm": 4.190616130828857, "learning_rate": 2.4691358024691357e-05, "loss": 0.1969, "step": 250 }, { "epoch": 25.94871794871795, "eval_accuracy": 0.9044117647058824, "eval_loss": 0.37360796332359314, "eval_runtime": 31.1254, "eval_samples_per_second": 4.369, "eval_steps_per_second": 0.161, "step": 253 }, { "epoch": 26.666666666666668, "grad_norm": 0.6893692016601562, "learning_rate": 2.345679012345679e-05, "loss": 0.1776, "step": 260 }, { "epoch": 26.974358974358974, "eval_accuracy": 0.9044117647058824, "eval_loss": 0.3796656131744385, "eval_runtime": 29.8455, "eval_samples_per_second": 4.557, "eval_steps_per_second": 0.168, "step": 263 }, { "epoch": 27.692307692307693, "grad_norm": 2.18731427192688, "learning_rate": 2.2222222222222223e-05, "loss": 0.1787, "step": 270 }, { "epoch": 28.0, "eval_accuracy": 0.8897058823529411, "eval_loss": 0.39490777254104614, "eval_runtime": 29.2795, "eval_samples_per_second": 4.645, "eval_steps_per_second": 0.171, "step": 273 }, { "epoch": 28.71794871794872, "grad_norm": 1.656586766242981, "learning_rate": 2.0987654320987655e-05, "loss": 0.18, "step": 280 }, { "epoch": 28.923076923076923, "eval_accuracy": 0.9264705882352942, "eval_loss": 0.32775887846946716, "eval_runtime": 29.3934, "eval_samples_per_second": 4.627, "eval_steps_per_second": 0.17, "step": 282 }, { "epoch": 29.743589743589745, "grad_norm": 1.3221951723098755, "learning_rate": 1.9753086419753087e-05, "loss": 0.1797, "step": 290 }, { "epoch": 29.94871794871795, "eval_accuracy": 0.9044117647058824, "eval_loss": 0.36148715019226074, "eval_runtime": 29.46, "eval_samples_per_second": 4.616, "eval_steps_per_second": 0.17, "step": 292 }, { "epoch": 30.76923076923077, "grad_norm": 1.227844476699829, "learning_rate": 1.8518518518518518e-05, "loss": 0.1665, "step": 300 }, { "epoch": 30.974358974358974, "eval_accuracy": 0.8602941176470589, "eval_loss": 0.4174344539642334, "eval_runtime": 29.6798, "eval_samples_per_second": 4.582, "eval_steps_per_second": 0.168, "step": 302 }, { "epoch": 31.794871794871796, "grad_norm": 2.3732173442840576, "learning_rate": 1.728395061728395e-05, "loss": 0.163, "step": 310 }, { "epoch": 32.0, "eval_accuracy": 0.8970588235294118, "eval_loss": 0.3574081063270569, "eval_runtime": 29.8114, "eval_samples_per_second": 4.562, "eval_steps_per_second": 0.168, "step": 312 }, { "epoch": 32.82051282051282, "grad_norm": 2.783026933670044, "learning_rate": 1.604938271604938e-05, "loss": 0.1498, "step": 320 }, { "epoch": 32.92307692307692, "eval_accuracy": 0.9044117647058824, "eval_loss": 0.35905760526657104, "eval_runtime": 30.5584, "eval_samples_per_second": 4.45, "eval_steps_per_second": 0.164, "step": 321 }, { "epoch": 33.84615384615385, "grad_norm": 2.496835231781006, "learning_rate": 1.4814814814814815e-05, "loss": 0.1405, "step": 330 }, { "epoch": 33.94871794871795, "eval_accuracy": 0.9191176470588235, "eval_loss": 0.30173459649086, "eval_runtime": 30.2786, "eval_samples_per_second": 4.492, "eval_steps_per_second": 0.165, "step": 331 }, { "epoch": 34.87179487179487, "grad_norm": 2.3252382278442383, "learning_rate": 1.3580246913580247e-05, "loss": 0.155, "step": 340 }, { "epoch": 34.97435897435897, "eval_accuracy": 0.9264705882352942, "eval_loss": 0.3303259611129761, "eval_runtime": 29.5808, "eval_samples_per_second": 4.598, "eval_steps_per_second": 0.169, "step": 341 }, { "epoch": 35.8974358974359, "grad_norm": 0.8514438271522522, "learning_rate": 1.2345679012345678e-05, "loss": 0.1519, "step": 350 }, { "epoch": 36.0, "eval_accuracy": 0.8970588235294118, "eval_loss": 0.3559113144874573, "eval_runtime": 30.264, "eval_samples_per_second": 4.494, "eval_steps_per_second": 0.165, "step": 351 }, { "epoch": 36.92307692307692, "grad_norm": 0.9986769556999207, "learning_rate": 1.1111111111111112e-05, "loss": 0.1415, "step": 360 }, { "epoch": 36.92307692307692, "eval_accuracy": 0.9191176470588235, "eval_loss": 0.2890462279319763, "eval_runtime": 30.7844, "eval_samples_per_second": 4.418, "eval_steps_per_second": 0.162, "step": 360 }, { "epoch": 37.94871794871795, "grad_norm": 0.9639208912849426, "learning_rate": 9.876543209876543e-06, "loss": 0.1256, "step": 370 }, { "epoch": 37.94871794871795, "eval_accuracy": 0.8897058823529411, "eval_loss": 0.3445207476615906, "eval_runtime": 29.9807, "eval_samples_per_second": 4.536, "eval_steps_per_second": 0.167, "step": 370 }, { "epoch": 38.97435897435897, "grad_norm": 0.9389523267745972, "learning_rate": 8.641975308641975e-06, "loss": 0.1217, "step": 380 }, { "epoch": 38.97435897435897, "eval_accuracy": 0.9117647058823529, "eval_loss": 0.3434993028640747, "eval_runtime": 30.2833, "eval_samples_per_second": 4.491, "eval_steps_per_second": 0.165, "step": 380 }, { "epoch": 40.0, "grad_norm": 7.135587215423584, "learning_rate": 7.4074074074074075e-06, "loss": 0.1285, "step": 390 }, { "epoch": 40.0, "eval_accuracy": 0.9191176470588235, "eval_loss": 0.30249181389808655, "eval_runtime": 29.8301, "eval_samples_per_second": 4.559, "eval_steps_per_second": 0.168, "step": 390 }, { "epoch": 40.92307692307692, "eval_accuracy": 0.8823529411764706, "eval_loss": 0.36019423604011536, "eval_runtime": 29.9271, "eval_samples_per_second": 4.544, "eval_steps_per_second": 0.167, "step": 399 }, { "epoch": 41.02564102564103, "grad_norm": 0.5793786644935608, "learning_rate": 6.172839506172839e-06, "loss": 0.1301, "step": 400 }, { "epoch": 41.94871794871795, "eval_accuracy": 0.8897058823529411, "eval_loss": 0.3336350917816162, "eval_runtime": 32.0768, "eval_samples_per_second": 4.24, "eval_steps_per_second": 0.156, "step": 409 }, { "epoch": 42.05128205128205, "grad_norm": 2.5887136459350586, "learning_rate": 4.938271604938272e-06, "loss": 0.1243, "step": 410 }, { "epoch": 42.97435897435897, "eval_accuracy": 0.9338235294117647, "eval_loss": 0.28249993920326233, "eval_runtime": 29.95, "eval_samples_per_second": 4.541, "eval_steps_per_second": 0.167, "step": 419 }, { "epoch": 43.07692307692308, "grad_norm": 1.4639347791671753, "learning_rate": 3.7037037037037037e-06, "loss": 0.1191, "step": 420 }, { "epoch": 44.0, "eval_accuracy": 0.9264705882352942, "eval_loss": 0.28346800804138184, "eval_runtime": 32.4599, "eval_samples_per_second": 4.19, "eval_steps_per_second": 0.154, "step": 429 }, { "epoch": 44.1025641025641, "grad_norm": 1.6088590621948242, "learning_rate": 2.469135802469136e-06, "loss": 0.1221, "step": 430 }, { "epoch": 44.92307692307692, "eval_accuracy": 0.9191176470588235, "eval_loss": 0.2723533809185028, "eval_runtime": 30.5746, "eval_samples_per_second": 4.448, "eval_steps_per_second": 0.164, "step": 438 }, { "epoch": 45.12820512820513, "grad_norm": 0.848240077495575, "learning_rate": 1.234567901234568e-06, "loss": 0.1151, "step": 440 }, { "epoch": 45.94871794871795, "eval_accuracy": 0.9191176470588235, "eval_loss": 0.27075088024139404, "eval_runtime": 30.0665, "eval_samples_per_second": 4.523, "eval_steps_per_second": 0.166, "step": 448 }, { "epoch": 46.15384615384615, "grad_norm": 3.371528148651123, "learning_rate": 0.0, "loss": 0.1195, "step": 450 }, { "epoch": 46.15384615384615, "eval_accuracy": 0.9191176470588235, "eval_loss": 0.2707464396953583, "eval_runtime": 33.0815, "eval_samples_per_second": 4.111, "eval_steps_per_second": 0.151, "step": 450 }, { "epoch": 46.15384615384615, "step": 450, "total_flos": 4.3781443993328026e+18, "train_loss": 0.4078153912226359, "train_runtime": 14896.2203, "train_samples_per_second": 4.108, "train_steps_per_second": 0.03 } ], "logging_steps": 10, "max_steps": 450, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.3781443993328026e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }