{ "best_metric": 0.75, "best_model_checkpoint": "swin-tiny-patch4-window7-224-dmae-va-U5-42\\checkpoint-108", "epoch": 37.935483870967744, "eval_steps": 500, "global_step": 294, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9, "eval_accuracy": 0.45, "eval_loss": 1.383138656616211, "eval_runtime": 0.8504, "eval_samples_per_second": 70.553, "eval_steps_per_second": 2.352, "step": 7 }, { "epoch": 1.29, "learning_rate": 1.6666666666666667e-05, "loss": 1.3852, "step": 10 }, { "epoch": 1.94, "eval_accuracy": 0.45, "eval_loss": 1.3624320030212402, "eval_runtime": 0.8336, "eval_samples_per_second": 71.981, "eval_steps_per_second": 2.399, "step": 15 }, { "epoch": 2.58, "learning_rate": 3.3333333333333335e-05, "loss": 1.3728, "step": 20 }, { "epoch": 2.97, "eval_accuracy": 0.4666666666666667, "eval_loss": 1.2926621437072754, "eval_runtime": 0.8819, "eval_samples_per_second": 68.033, "eval_steps_per_second": 2.268, "step": 23 }, { "epoch": 3.87, "learning_rate": 5e-05, "loss": 1.2791, "step": 30 }, { "epoch": 4.0, "eval_accuracy": 0.48333333333333334, "eval_loss": 1.1181586980819702, "eval_runtime": 0.9023, "eval_samples_per_second": 66.499, "eval_steps_per_second": 2.217, "step": 31 }, { "epoch": 4.9, "eval_accuracy": 0.4666666666666667, "eval_loss": 1.0064685344696045, "eval_runtime": 1.1345, "eval_samples_per_second": 52.886, "eval_steps_per_second": 1.763, "step": 38 }, { "epoch": 5.16, "learning_rate": 4.810606060606061e-05, "loss": 1.094, "step": 40 }, { "epoch": 5.94, "eval_accuracy": 0.6, "eval_loss": 0.8931151032447815, "eval_runtime": 0.9014, "eval_samples_per_second": 66.566, "eval_steps_per_second": 2.219, "step": 46 }, { "epoch": 6.45, "learning_rate": 4.621212121212121e-05, "loss": 0.9601, "step": 50 }, { "epoch": 6.97, "eval_accuracy": 0.48333333333333334, "eval_loss": 0.9209610819816589, "eval_runtime": 1.4464, "eval_samples_per_second": 41.482, "eval_steps_per_second": 1.383, "step": 54 }, { "epoch": 7.74, "learning_rate": 4.431818181818182e-05, "loss": 0.8598, "step": 60 }, { "epoch": 8.0, "eval_accuracy": 0.5166666666666667, "eval_loss": 0.947771430015564, "eval_runtime": 1.4436, "eval_samples_per_second": 41.563, "eval_steps_per_second": 1.385, "step": 62 }, { "epoch": 8.9, "eval_accuracy": 0.5833333333333334, "eval_loss": 0.8557999134063721, "eval_runtime": 1.0161, "eval_samples_per_second": 59.05, "eval_steps_per_second": 1.968, "step": 69 }, { "epoch": 9.03, "learning_rate": 4.242424242424243e-05, "loss": 0.7558, "step": 70 }, { "epoch": 9.94, "eval_accuracy": 0.65, "eval_loss": 0.9258978366851807, "eval_runtime": 0.9985, "eval_samples_per_second": 60.091, "eval_steps_per_second": 2.003, "step": 77 }, { "epoch": 10.32, "learning_rate": 4.053030303030303e-05, "loss": 0.6696, "step": 80 }, { "epoch": 10.97, "eval_accuracy": 0.6166666666666667, "eval_loss": 0.7952563762664795, "eval_runtime": 1.3622, "eval_samples_per_second": 44.048, "eval_steps_per_second": 1.468, "step": 85 }, { "epoch": 11.61, "learning_rate": 3.8636363636363636e-05, "loss": 0.6079, "step": 90 }, { "epoch": 12.0, "eval_accuracy": 0.7, "eval_loss": 0.7281058430671692, "eval_runtime": 1.1664, "eval_samples_per_second": 51.442, "eval_steps_per_second": 1.715, "step": 93 }, { "epoch": 12.9, "learning_rate": 3.6742424242424246e-05, "loss": 0.516, "step": 100 }, { "epoch": 12.9, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.8551703691482544, "eval_runtime": 2.0248, "eval_samples_per_second": 29.633, "eval_steps_per_second": 0.988, "step": 100 }, { "epoch": 13.94, "eval_accuracy": 0.75, "eval_loss": 0.6653277277946472, "eval_runtime": 3.2967, "eval_samples_per_second": 18.2, "eval_steps_per_second": 0.607, "step": 108 }, { "epoch": 14.19, "learning_rate": 3.484848484848485e-05, "loss": 0.4475, "step": 110 }, { "epoch": 14.97, "eval_accuracy": 0.6833333333333333, "eval_loss": 0.7548192739486694, "eval_runtime": 1.3318, "eval_samples_per_second": 45.053, "eval_steps_per_second": 1.502, "step": 116 }, { "epoch": 15.48, "learning_rate": 3.295454545454545e-05, "loss": 0.4152, "step": 120 }, { "epoch": 16.0, "eval_accuracy": 0.7166666666666667, "eval_loss": 0.7556443810462952, "eval_runtime": 2.1003, "eval_samples_per_second": 28.568, "eval_steps_per_second": 0.952, "step": 124 }, { "epoch": 16.77, "learning_rate": 3.106060606060606e-05, "loss": 0.3759, "step": 130 }, { "epoch": 16.9, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.7037982940673828, "eval_runtime": 1.1281, "eval_samples_per_second": 53.188, "eval_steps_per_second": 1.773, "step": 131 }, { "epoch": 17.94, "eval_accuracy": 0.7166666666666667, "eval_loss": 0.7355785369873047, "eval_runtime": 1.7841, "eval_samples_per_second": 33.631, "eval_steps_per_second": 1.121, "step": 139 }, { "epoch": 18.06, "learning_rate": 2.916666666666667e-05, "loss": 0.3366, "step": 140 }, { "epoch": 18.97, "eval_accuracy": 0.75, "eval_loss": 0.6649565100669861, "eval_runtime": 1.014, "eval_samples_per_second": 59.17, "eval_steps_per_second": 1.972, "step": 147 }, { "epoch": 19.35, "learning_rate": 2.7272727272727273e-05, "loss": 0.3212, "step": 150 }, { "epoch": 20.0, "eval_accuracy": 0.7166666666666667, "eval_loss": 0.7667668461799622, "eval_runtime": 1.2664, "eval_samples_per_second": 47.379, "eval_steps_per_second": 1.579, "step": 155 }, { "epoch": 20.65, "learning_rate": 2.537878787878788e-05, "loss": 0.2903, "step": 160 }, { "epoch": 20.9, "eval_accuracy": 0.6833333333333333, "eval_loss": 0.7996882796287537, "eval_runtime": 0.8672, "eval_samples_per_second": 69.189, "eval_steps_per_second": 2.306, "step": 162 }, { "epoch": 21.94, "learning_rate": 2.3484848484848487e-05, "loss": 0.312, "step": 170 }, { "epoch": 21.94, "eval_accuracy": 0.7166666666666667, "eval_loss": 0.7473248243331909, "eval_runtime": 0.9023, "eval_samples_per_second": 66.497, "eval_steps_per_second": 2.217, "step": 170 }, { "epoch": 22.97, "eval_accuracy": 0.7, "eval_loss": 0.8479262590408325, "eval_runtime": 0.7981, "eval_samples_per_second": 75.176, "eval_steps_per_second": 2.506, "step": 178 }, { "epoch": 23.23, "learning_rate": 2.1590909090909093e-05, "loss": 0.2488, "step": 180 }, { "epoch": 24.0, "eval_accuracy": 0.7, "eval_loss": 0.8069732785224915, "eval_runtime": 1.0488, "eval_samples_per_second": 57.211, "eval_steps_per_second": 1.907, "step": 186 }, { "epoch": 24.52, "learning_rate": 1.9696969696969697e-05, "loss": 0.283, "step": 190 }, { "epoch": 24.9, "eval_accuracy": 0.6833333333333333, "eval_loss": 0.8079617619514465, "eval_runtime": 1.2667, "eval_samples_per_second": 47.366, "eval_steps_per_second": 1.579, "step": 193 }, { "epoch": 25.81, "learning_rate": 1.7803030303030303e-05, "loss": 0.2109, "step": 200 }, { "epoch": 25.94, "eval_accuracy": 0.7, "eval_loss": 0.8220053315162659, "eval_runtime": 1.2171, "eval_samples_per_second": 49.296, "eval_steps_per_second": 1.643, "step": 201 }, { "epoch": 26.97, "eval_accuracy": 0.7166666666666667, "eval_loss": 0.8354397416114807, "eval_runtime": 0.8636, "eval_samples_per_second": 69.476, "eval_steps_per_second": 2.316, "step": 209 }, { "epoch": 27.1, "learning_rate": 1.590909090909091e-05, "loss": 0.2215, "step": 210 }, { "epoch": 28.0, "eval_accuracy": 0.7, "eval_loss": 0.8667593598365784, "eval_runtime": 1.1633, "eval_samples_per_second": 51.577, "eval_steps_per_second": 1.719, "step": 217 }, { "epoch": 28.39, "learning_rate": 1.4015151515151515e-05, "loss": 0.2067, "step": 220 }, { "epoch": 28.9, "eval_accuracy": 0.75, "eval_loss": 0.8478964567184448, "eval_runtime": 1.6328, "eval_samples_per_second": 36.746, "eval_steps_per_second": 1.225, "step": 224 }, { "epoch": 29.68, "learning_rate": 1.2121212121212122e-05, "loss": 0.1967, "step": 230 }, { "epoch": 29.94, "eval_accuracy": 0.7166666666666667, "eval_loss": 0.8867913484573364, "eval_runtime": 1.015, "eval_samples_per_second": 59.114, "eval_steps_per_second": 1.97, "step": 232 }, { "epoch": 30.97, "learning_rate": 1.0227272727272729e-05, "loss": 0.1948, "step": 240 }, { "epoch": 30.97, "eval_accuracy": 0.7, "eval_loss": 0.8882503509521484, "eval_runtime": 1.8662, "eval_samples_per_second": 32.152, "eval_steps_per_second": 1.072, "step": 240 }, { "epoch": 32.0, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.8612008690834045, "eval_runtime": 1.182, "eval_samples_per_second": 50.764, "eval_steps_per_second": 1.692, "step": 248 }, { "epoch": 32.26, "learning_rate": 8.333333333333334e-06, "loss": 0.186, "step": 250 }, { "epoch": 32.9, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.8859888315200806, "eval_runtime": 1.7167, "eval_samples_per_second": 34.951, "eval_steps_per_second": 1.165, "step": 255 }, { "epoch": 33.55, "learning_rate": 6.43939393939394e-06, "loss": 0.1662, "step": 260 }, { "epoch": 33.94, "eval_accuracy": 0.75, "eval_loss": 0.9057492613792419, "eval_runtime": 0.9173, "eval_samples_per_second": 65.409, "eval_steps_per_second": 2.18, "step": 263 }, { "epoch": 34.84, "learning_rate": 4.5454545454545455e-06, "loss": 0.1773, "step": 270 }, { "epoch": 34.97, "eval_accuracy": 0.7166666666666667, "eval_loss": 0.9140269160270691, "eval_runtime": 1.1633, "eval_samples_per_second": 51.579, "eval_steps_per_second": 1.719, "step": 271 }, { "epoch": 36.0, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.9013189673423767, "eval_runtime": 0.8518, "eval_samples_per_second": 70.443, "eval_steps_per_second": 2.348, "step": 279 }, { "epoch": 36.13, "learning_rate": 2.651515151515152e-06, "loss": 0.1519, "step": 280 }, { "epoch": 36.9, "eval_accuracy": 0.75, "eval_loss": 0.8868784308433533, "eval_runtime": 0.8675, "eval_samples_per_second": 69.164, "eval_steps_per_second": 2.305, "step": 286 }, { "epoch": 37.42, "learning_rate": 7.575757575757576e-07, "loss": 0.1775, "step": 290 }, { "epoch": 37.94, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.8840016722679138, "eval_runtime": 1.1992, "eval_samples_per_second": 50.034, "eval_steps_per_second": 1.668, "step": 294 }, { "epoch": 37.94, "step": 294, "total_flos": 9.188778373008998e+17, "train_loss": 0.4929502831429851, "train_runtime": 549.9582, "train_samples_per_second": 74.384, "train_steps_per_second": 0.535 } ], "logging_steps": 10, "max_steps": 294, "num_input_tokens_seen": 0, "num_train_epochs": 42, "save_steps": 500, "total_flos": 9.188778373008998e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }