|
{ |
|
"best_metric": 0.8391304347826087, |
|
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-teeth_dataset/checkpoint-120", |
|
"epoch": 40.0, |
|
"eval_steps": 500, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.015217391304347827, |
|
"eval_loss": 4.579592704772949, |
|
"eval_runtime": 349.3053, |
|
"eval_samples_per_second": 1.317, |
|
"eval_steps_per_second": 0.043, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_accuracy": 0.02608695652173913, |
|
"eval_loss": 4.519979000091553, |
|
"eval_runtime": 4.4643, |
|
"eval_samples_per_second": 103.04, |
|
"eval_steps_per_second": 3.36, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"grad_norm": 4.626802921295166, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 4.5616, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_accuracy": 0.03260869565217391, |
|
"eval_loss": 4.470459461212158, |
|
"eval_runtime": 4.8338, |
|
"eval_samples_per_second": 95.164, |
|
"eval_steps_per_second": 3.103, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.06739130434782609, |
|
"eval_loss": 4.412718296051025, |
|
"eval_runtime": 5.0133, |
|
"eval_samples_per_second": 91.756, |
|
"eval_steps_per_second": 2.992, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_accuracy": 0.08043478260869565, |
|
"eval_loss": 4.349262714385986, |
|
"eval_runtime": 3.9868, |
|
"eval_samples_per_second": 115.382, |
|
"eval_steps_per_second": 3.762, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"grad_norm": 4.411898136138916, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 4.44, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"eval_accuracy": 0.11304347826086956, |
|
"eval_loss": 4.242533206939697, |
|
"eval_runtime": 4.0721, |
|
"eval_samples_per_second": 112.963, |
|
"eval_steps_per_second": 3.684, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"eval_accuracy": 0.13695652173913042, |
|
"eval_loss": 4.110694885253906, |
|
"eval_runtime": 4.2677, |
|
"eval_samples_per_second": 107.788, |
|
"eval_steps_per_second": 3.515, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 10.778473854064941, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 4.1823, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.1608695652173913, |
|
"eval_loss": 3.9340145587921143, |
|
"eval_runtime": 4.4318, |
|
"eval_samples_per_second": 103.796, |
|
"eval_steps_per_second": 3.385, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"eval_accuracy": 0.1934782608695652, |
|
"eval_loss": 3.7821249961853027, |
|
"eval_runtime": 4.4828, |
|
"eval_samples_per_second": 102.615, |
|
"eval_steps_per_second": 3.346, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"eval_accuracy": 0.2782608695652174, |
|
"eval_loss": 3.5313777923583984, |
|
"eval_runtime": 4.1068, |
|
"eval_samples_per_second": 112.01, |
|
"eval_steps_per_second": 3.653, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"grad_norm": 14.638664245605469, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 3.6357, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"eval_accuracy": 0.30434782608695654, |
|
"eval_loss": 3.285728693008423, |
|
"eval_runtime": 4.0688, |
|
"eval_samples_per_second": 113.055, |
|
"eval_steps_per_second": 3.687, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.3695652173913043, |
|
"eval_loss": 3.106356143951416, |
|
"eval_runtime": 4.0947, |
|
"eval_samples_per_second": 112.339, |
|
"eval_steps_per_second": 3.663, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"eval_accuracy": 0.3826086956521739, |
|
"eval_loss": 2.9712679386138916, |
|
"eval_runtime": 4.065, |
|
"eval_samples_per_second": 113.162, |
|
"eval_steps_per_second": 3.69, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"grad_norm": 24.2481746673584, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 3.0041, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 13.87, |
|
"eval_accuracy": 0.48695652173913045, |
|
"eval_loss": 2.7171707153320312, |
|
"eval_runtime": 4.147, |
|
"eval_samples_per_second": 110.923, |
|
"eval_steps_per_second": 3.617, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"eval_accuracy": 0.5434782608695652, |
|
"eval_loss": 2.511073589324951, |
|
"eval_runtime": 4.329, |
|
"eval_samples_per_second": 106.261, |
|
"eval_steps_per_second": 3.465, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 17.775148391723633, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 2.4604, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5695652173913044, |
|
"eval_loss": 2.356055736541748, |
|
"eval_runtime": 4.5447, |
|
"eval_samples_per_second": 101.216, |
|
"eval_steps_per_second": 3.301, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 16.8, |
|
"eval_accuracy": 0.5717391304347826, |
|
"eval_loss": 2.2684459686279297, |
|
"eval_runtime": 4.3589, |
|
"eval_samples_per_second": 105.531, |
|
"eval_steps_per_second": 3.441, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 17.87, |
|
"eval_accuracy": 0.6347826086956522, |
|
"eval_loss": 2.0961129665374756, |
|
"eval_runtime": 4.0684, |
|
"eval_samples_per_second": 113.066, |
|
"eval_steps_per_second": 3.687, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"grad_norm": 28.70151710510254, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 1.971, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 18.93, |
|
"eval_accuracy": 0.6782608695652174, |
|
"eval_loss": 1.9555323123931885, |
|
"eval_runtime": 4.1018, |
|
"eval_samples_per_second": 112.147, |
|
"eval_steps_per_second": 3.657, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6891304347826087, |
|
"eval_loss": 1.8400135040283203, |
|
"eval_runtime": 4.114, |
|
"eval_samples_per_second": 111.814, |
|
"eval_steps_per_second": 3.646, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"eval_accuracy": 0.7239130434782609, |
|
"eval_loss": 1.78555166721344, |
|
"eval_runtime": 4.1163, |
|
"eval_samples_per_second": 111.752, |
|
"eval_steps_per_second": 3.644, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 21.33, |
|
"grad_norm": 25.09538459777832, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 1.651, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 21.87, |
|
"eval_accuracy": 0.7369565217391304, |
|
"eval_loss": 1.6797059774398804, |
|
"eval_runtime": 4.243, |
|
"eval_samples_per_second": 108.413, |
|
"eval_steps_per_second": 3.535, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 22.93, |
|
"eval_accuracy": 0.7717391304347826, |
|
"eval_loss": 1.600671410560608, |
|
"eval_runtime": 4.4052, |
|
"eval_samples_per_second": 104.422, |
|
"eval_steps_per_second": 3.405, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 30.617403030395508, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 1.3665, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7739130434782608, |
|
"eval_loss": 1.5255870819091797, |
|
"eval_runtime": 4.5557, |
|
"eval_samples_per_second": 100.972, |
|
"eval_steps_per_second": 3.293, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 24.8, |
|
"eval_accuracy": 0.7652173913043478, |
|
"eval_loss": 1.4875919818878174, |
|
"eval_runtime": 4.9234, |
|
"eval_samples_per_second": 93.432, |
|
"eval_steps_per_second": 3.047, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 25.87, |
|
"eval_accuracy": 0.7782608695652173, |
|
"eval_loss": 1.4394937753677368, |
|
"eval_runtime": 4.1294, |
|
"eval_samples_per_second": 111.395, |
|
"eval_steps_per_second": 3.632, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"grad_norm": 17.45021629333496, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 1.1954, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 26.93, |
|
"eval_accuracy": 0.7869565217391304, |
|
"eval_loss": 1.3679003715515137, |
|
"eval_runtime": 4.1015, |
|
"eval_samples_per_second": 112.155, |
|
"eval_steps_per_second": 3.657, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8021739130434783, |
|
"eval_loss": 1.304268479347229, |
|
"eval_runtime": 4.1469, |
|
"eval_samples_per_second": 110.928, |
|
"eval_steps_per_second": 3.617, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"eval_accuracy": 0.8021739130434783, |
|
"eval_loss": 1.2906242609024048, |
|
"eval_runtime": 4.1542, |
|
"eval_samples_per_second": 110.731, |
|
"eval_steps_per_second": 3.611, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 29.33, |
|
"grad_norm": 16.89618492126465, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.9886, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 29.87, |
|
"eval_accuracy": 0.8108695652173913, |
|
"eval_loss": 1.2312711477279663, |
|
"eval_runtime": 4.2901, |
|
"eval_samples_per_second": 107.224, |
|
"eval_steps_per_second": 3.496, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 30.93, |
|
"eval_accuracy": 0.8347826086956521, |
|
"eval_loss": 1.182868480682373, |
|
"eval_runtime": 4.4407, |
|
"eval_samples_per_second": 103.588, |
|
"eval_steps_per_second": 3.378, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 14.743515968322754, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.8803, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8391304347826087, |
|
"eval_loss": 1.1564103364944458, |
|
"eval_runtime": 4.4277, |
|
"eval_samples_per_second": 103.892, |
|
"eval_steps_per_second": 3.388, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 32.8, |
|
"eval_accuracy": 0.8304347826086956, |
|
"eval_loss": 1.1421394348144531, |
|
"eval_runtime": 4.2096, |
|
"eval_samples_per_second": 109.275, |
|
"eval_steps_per_second": 3.563, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 33.87, |
|
"eval_accuracy": 0.8326086956521739, |
|
"eval_loss": 1.114410400390625, |
|
"eval_runtime": 4.0812, |
|
"eval_samples_per_second": 112.713, |
|
"eval_steps_per_second": 3.675, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 34.67, |
|
"grad_norm": 15.186381340026855, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.815, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 34.93, |
|
"eval_accuracy": 0.8304347826086956, |
|
"eval_loss": 1.107426404953003, |
|
"eval_runtime": 4.1243, |
|
"eval_samples_per_second": 111.534, |
|
"eval_steps_per_second": 3.637, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8282608695652174, |
|
"eval_loss": 1.0919322967529297, |
|
"eval_runtime": 4.1615, |
|
"eval_samples_per_second": 110.537, |
|
"eval_steps_per_second": 3.604, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"eval_accuracy": 0.8326086956521739, |
|
"eval_loss": 1.082062840461731, |
|
"eval_runtime": 4.184, |
|
"eval_samples_per_second": 109.942, |
|
"eval_steps_per_second": 3.585, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 37.33, |
|
"grad_norm": 12.77287483215332, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.7619, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 37.87, |
|
"eval_accuracy": 0.8347826086956521, |
|
"eval_loss": 1.070085883140564, |
|
"eval_runtime": 4.3491, |
|
"eval_samples_per_second": 105.769, |
|
"eval_steps_per_second": 3.449, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 38.93, |
|
"eval_accuracy": 0.8347826086956521, |
|
"eval_loss": 1.0642296075820923, |
|
"eval_runtime": 4.4959, |
|
"eval_samples_per_second": 102.315, |
|
"eval_steps_per_second": 3.336, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 20.172346115112305, |
|
"learning_rate": 0.0, |
|
"loss": 0.6991, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8391304347826087, |
|
"eval_loss": 1.063112497329712, |
|
"eval_runtime": 4.3636, |
|
"eval_samples_per_second": 105.417, |
|
"eval_steps_per_second": 3.438, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"step": 150, |
|
"total_flos": 4.585003492737024e+17, |
|
"train_loss": 2.1741886361440024, |
|
"train_runtime": 1273.2626, |
|
"train_samples_per_second": 18.064, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 4.585003492737024e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|