|
{ |
|
"best_metric": 0.75, |
|
"best_model_checkpoint": "swin-tiny-patch4-window7-224-dmae-va-U5-42\\checkpoint-108", |
|
"epoch": 37.935483870967744, |
|
"eval_steps": 500, |
|
"global_step": 294, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.45, |
|
"eval_loss": 1.383138656616211, |
|
"eval_runtime": 0.8504, |
|
"eval_samples_per_second": 70.553, |
|
"eval_steps_per_second": 2.352, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 1.3852, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.45, |
|
"eval_loss": 1.3624320030212402, |
|
"eval_runtime": 0.8336, |
|
"eval_samples_per_second": 71.981, |
|
"eval_steps_per_second": 2.399, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.3728, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.4666666666666667, |
|
"eval_loss": 1.2926621437072754, |
|
"eval_runtime": 0.8819, |
|
"eval_samples_per_second": 68.033, |
|
"eval_steps_per_second": 2.268, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2791, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.48333333333333334, |
|
"eval_loss": 1.1181586980819702, |
|
"eval_runtime": 0.9023, |
|
"eval_samples_per_second": 66.499, |
|
"eval_steps_per_second": 2.217, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_accuracy": 0.4666666666666667, |
|
"eval_loss": 1.0064685344696045, |
|
"eval_runtime": 1.1345, |
|
"eval_samples_per_second": 52.886, |
|
"eval_steps_per_second": 1.763, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 4.810606060606061e-05, |
|
"loss": 1.094, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.8931151032447815, |
|
"eval_runtime": 0.9014, |
|
"eval_samples_per_second": 66.566, |
|
"eval_steps_per_second": 2.219, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 4.621212121212121e-05, |
|
"loss": 0.9601, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_accuracy": 0.48333333333333334, |
|
"eval_loss": 0.9209610819816589, |
|
"eval_runtime": 1.4464, |
|
"eval_samples_per_second": 41.482, |
|
"eval_steps_per_second": 1.383, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 4.431818181818182e-05, |
|
"loss": 0.8598, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5166666666666667, |
|
"eval_loss": 0.947771430015564, |
|
"eval_runtime": 1.4436, |
|
"eval_samples_per_second": 41.563, |
|
"eval_steps_per_second": 1.385, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"eval_accuracy": 0.5833333333333334, |
|
"eval_loss": 0.8557999134063721, |
|
"eval_runtime": 1.0161, |
|
"eval_samples_per_second": 59.05, |
|
"eval_steps_per_second": 1.968, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 4.242424242424243e-05, |
|
"loss": 0.7558, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.9258978366851807, |
|
"eval_runtime": 0.9985, |
|
"eval_samples_per_second": 60.091, |
|
"eval_steps_per_second": 2.003, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 4.053030303030303e-05, |
|
"loss": 0.6696, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 10.97, |
|
"eval_accuracy": 0.6166666666666667, |
|
"eval_loss": 0.7952563762664795, |
|
"eval_runtime": 1.3622, |
|
"eval_samples_per_second": 44.048, |
|
"eval_steps_per_second": 1.468, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 3.8636363636363636e-05, |
|
"loss": 0.6079, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.7281058430671692, |
|
"eval_runtime": 1.1664, |
|
"eval_samples_per_second": 51.442, |
|
"eval_steps_per_second": 1.715, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 3.6742424242424246e-05, |
|
"loss": 0.516, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_accuracy": 0.6666666666666666, |
|
"eval_loss": 0.8551703691482544, |
|
"eval_runtime": 2.0248, |
|
"eval_samples_per_second": 29.633, |
|
"eval_steps_per_second": 0.988, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6653277277946472, |
|
"eval_runtime": 3.2967, |
|
"eval_samples_per_second": 18.2, |
|
"eval_steps_per_second": 0.607, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 3.484848484848485e-05, |
|
"loss": 0.4475, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"eval_accuracy": 0.6833333333333333, |
|
"eval_loss": 0.7548192739486694, |
|
"eval_runtime": 1.3318, |
|
"eval_samples_per_second": 45.053, |
|
"eval_steps_per_second": 1.502, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 3.295454545454545e-05, |
|
"loss": 0.4152, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.7556443810462952, |
|
"eval_runtime": 2.1003, |
|
"eval_samples_per_second": 28.568, |
|
"eval_steps_per_second": 0.952, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 16.77, |
|
"learning_rate": 3.106060606060606e-05, |
|
"loss": 0.3759, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.7037982940673828, |
|
"eval_runtime": 1.1281, |
|
"eval_samples_per_second": 53.188, |
|
"eval_steps_per_second": 1.773, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.7355785369873047, |
|
"eval_runtime": 1.7841, |
|
"eval_samples_per_second": 33.631, |
|
"eval_steps_per_second": 1.121, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"learning_rate": 2.916666666666667e-05, |
|
"loss": 0.3366, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 18.97, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6649565100669861, |
|
"eval_runtime": 1.014, |
|
"eval_samples_per_second": 59.17, |
|
"eval_steps_per_second": 1.972, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 2.7272727272727273e-05, |
|
"loss": 0.3212, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.7667668461799622, |
|
"eval_runtime": 1.2664, |
|
"eval_samples_per_second": 47.379, |
|
"eval_steps_per_second": 1.579, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"learning_rate": 2.537878787878788e-05, |
|
"loss": 0.2903, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"eval_accuracy": 0.6833333333333333, |
|
"eval_loss": 0.7996882796287537, |
|
"eval_runtime": 0.8672, |
|
"eval_samples_per_second": 69.189, |
|
"eval_steps_per_second": 2.306, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"learning_rate": 2.3484848484848487e-05, |
|
"loss": 0.312, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.7473248243331909, |
|
"eval_runtime": 0.9023, |
|
"eval_samples_per_second": 66.497, |
|
"eval_steps_per_second": 2.217, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 22.97, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8479262590408325, |
|
"eval_runtime": 0.7981, |
|
"eval_samples_per_second": 75.176, |
|
"eval_steps_per_second": 2.506, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"learning_rate": 2.1590909090909093e-05, |
|
"loss": 0.2488, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8069732785224915, |
|
"eval_runtime": 1.0488, |
|
"eval_samples_per_second": 57.211, |
|
"eval_steps_per_second": 1.907, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 24.52, |
|
"learning_rate": 1.9696969696969697e-05, |
|
"loss": 0.283, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 24.9, |
|
"eval_accuracy": 0.6833333333333333, |
|
"eval_loss": 0.8079617619514465, |
|
"eval_runtime": 1.2667, |
|
"eval_samples_per_second": 47.366, |
|
"eval_steps_per_second": 1.579, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 25.81, |
|
"learning_rate": 1.7803030303030303e-05, |
|
"loss": 0.2109, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 25.94, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8220053315162659, |
|
"eval_runtime": 1.2171, |
|
"eval_samples_per_second": 49.296, |
|
"eval_steps_per_second": 1.643, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 26.97, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.8354397416114807, |
|
"eval_runtime": 0.8636, |
|
"eval_samples_per_second": 69.476, |
|
"eval_steps_per_second": 2.316, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 27.1, |
|
"learning_rate": 1.590909090909091e-05, |
|
"loss": 0.2215, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8667593598365784, |
|
"eval_runtime": 1.1633, |
|
"eval_samples_per_second": 51.577, |
|
"eval_steps_per_second": 1.719, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 28.39, |
|
"learning_rate": 1.4015151515151515e-05, |
|
"loss": 0.2067, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 28.9, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.8478964567184448, |
|
"eval_runtime": 1.6328, |
|
"eval_samples_per_second": 36.746, |
|
"eval_steps_per_second": 1.225, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 29.68, |
|
"learning_rate": 1.2121212121212122e-05, |
|
"loss": 0.1967, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 29.94, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.8867913484573364, |
|
"eval_runtime": 1.015, |
|
"eval_samples_per_second": 59.114, |
|
"eval_steps_per_second": 1.97, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"learning_rate": 1.0227272727272729e-05, |
|
"loss": 0.1948, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 30.97, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8882503509521484, |
|
"eval_runtime": 1.8662, |
|
"eval_samples_per_second": 32.152, |
|
"eval_steps_per_second": 1.072, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.8612008690834045, |
|
"eval_runtime": 1.182, |
|
"eval_samples_per_second": 50.764, |
|
"eval_steps_per_second": 1.692, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 32.26, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.186, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 32.9, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.8859888315200806, |
|
"eval_runtime": 1.7167, |
|
"eval_samples_per_second": 34.951, |
|
"eval_steps_per_second": 1.165, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 33.55, |
|
"learning_rate": 6.43939393939394e-06, |
|
"loss": 0.1662, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 33.94, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.9057492613792419, |
|
"eval_runtime": 0.9173, |
|
"eval_samples_per_second": 65.409, |
|
"eval_steps_per_second": 2.18, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 34.84, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"loss": 0.1773, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 34.97, |
|
"eval_accuracy": 0.7166666666666667, |
|
"eval_loss": 0.9140269160270691, |
|
"eval_runtime": 1.1633, |
|
"eval_samples_per_second": 51.579, |
|
"eval_steps_per_second": 1.719, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.9013189673423767, |
|
"eval_runtime": 0.8518, |
|
"eval_samples_per_second": 70.443, |
|
"eval_steps_per_second": 2.348, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 36.13, |
|
"learning_rate": 2.651515151515152e-06, |
|
"loss": 0.1519, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 36.9, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.8868784308433533, |
|
"eval_runtime": 0.8675, |
|
"eval_samples_per_second": 69.164, |
|
"eval_steps_per_second": 2.305, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 37.42, |
|
"learning_rate": 7.575757575757576e-07, |
|
"loss": 0.1775, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 37.94, |
|
"eval_accuracy": 0.7333333333333333, |
|
"eval_loss": 0.8840016722679138, |
|
"eval_runtime": 1.1992, |
|
"eval_samples_per_second": 50.034, |
|
"eval_steps_per_second": 1.668, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 37.94, |
|
"step": 294, |
|
"total_flos": 9.188778373008998e+17, |
|
"train_loss": 0.4929502831429851, |
|
"train_runtime": 549.9582, |
|
"train_samples_per_second": 74.384, |
|
"train_steps_per_second": 0.535 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 294, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 42, |
|
"save_steps": 500, |
|
"total_flos": 9.188778373008998e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|