|
{ |
|
"best_metric": 0.9157343919162757, |
|
"best_model_checkpoint": "violation-classification-bantai-vit-v100ep/checkpoint-808", |
|
"epoch": 10.99753086419753, |
|
"global_step": 1111, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.950495049504951e-07, |
|
"loss": 0.2829, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.900990099009902e-07, |
|
"loss": 0.2759, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4851485148514852e-06, |
|
"loss": 0.2657, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.9801980198019803e-06, |
|
"loss": 0.2642, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.4752475247524753e-06, |
|
"loss": 0.2586, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.9702970297029703e-06, |
|
"loss": 0.2344, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.4653465346534657e-06, |
|
"loss": 0.2868, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.960396039603961e-06, |
|
"loss": 0.2831, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.455445544554456e-06, |
|
"loss": 0.2574, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.950495049504951e-06, |
|
"loss": 0.2811, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9027426921688921, |
|
"eval_loss": 0.28554767370224, |
|
"eval_runtime": 59.413, |
|
"eval_samples_per_second": 93.279, |
|
"eval_steps_per_second": 2.929, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5.445544554455446e-06, |
|
"loss": 0.2769, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5.940594059405941e-06, |
|
"loss": 0.2356, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 6.4356435643564364e-06, |
|
"loss": 0.2493, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 6.9306930693069314e-06, |
|
"loss": 0.2455, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.4257425742574256e-06, |
|
"loss": 0.2305, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 7.920792079207921e-06, |
|
"loss": 0.2439, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.415841584158417e-06, |
|
"loss": 0.2493, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.910891089108911e-06, |
|
"loss": 0.2357, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 9.405940594059407e-06, |
|
"loss": 0.2709, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 9.900990099009901e-06, |
|
"loss": 0.2382, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.908516780945507, |
|
"eval_loss": 0.27634507417678833, |
|
"eval_runtime": 58.9068, |
|
"eval_samples_per_second": 94.081, |
|
"eval_steps_per_second": 2.954, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.0396039603960395e-05, |
|
"loss": 0.2447, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.0891089108910891e-05, |
|
"loss": 0.2369, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.1386138613861387e-05, |
|
"loss": 0.2213, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1881188118811881e-05, |
|
"loss": 0.2433, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.2376237623762377e-05, |
|
"loss": 0.2184, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.2871287128712873e-05, |
|
"loss": 0.22, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.3366336633663367e-05, |
|
"loss": 0.2111, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.3861386138613863e-05, |
|
"loss": 0.2134, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.4356435643564355e-05, |
|
"loss": 0.222, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.4851485148514851e-05, |
|
"loss": 0.2361, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9108625045110068, |
|
"eval_loss": 0.2605169415473938, |
|
"eval_runtime": 59.2161, |
|
"eval_samples_per_second": 93.589, |
|
"eval_steps_per_second": 2.938, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.534653465346535e-05, |
|
"loss": 0.2091, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.5841584158415843e-05, |
|
"loss": 0.2363, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 1.6336633663366337e-05, |
|
"loss": 0.2161, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.6831683168316834e-05, |
|
"loss": 0.2232, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.7326732673267325e-05, |
|
"loss": 0.2071, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.7821782178217823e-05, |
|
"loss": 0.1942, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.8316831683168317e-05, |
|
"loss": 0.2114, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 1.8811881188118814e-05, |
|
"loss": 0.2158, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 1.930693069306931e-05, |
|
"loss": 0.1908, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 1.9801980198019803e-05, |
|
"loss": 0.196, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.911042944785276, |
|
"eval_loss": 0.2652387320995331, |
|
"eval_runtime": 59.3905, |
|
"eval_samples_per_second": 93.315, |
|
"eval_steps_per_second": 2.93, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 2.02970297029703e-05, |
|
"loss": 0.2102, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 2.079207920792079e-05, |
|
"loss": 0.1816, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 2.128712871287129e-05, |
|
"loss": 0.195, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 2.1782178217821783e-05, |
|
"loss": 0.1772, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 2.227722772277228e-05, |
|
"loss": 0.2059, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 2.2772277227722774e-05, |
|
"loss": 0.1992, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 2.326732673267327e-05, |
|
"loss": 0.2042, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 2.3762376237623762e-05, |
|
"loss": 0.1945, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 2.4257425742574257e-05, |
|
"loss": 0.1775, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 2.4752475247524754e-05, |
|
"loss": 0.1395, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9133886683507759, |
|
"eval_loss": 0.2648361623287201, |
|
"eval_runtime": 59.3489, |
|
"eval_samples_per_second": 93.38, |
|
"eval_steps_per_second": 2.932, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 2.5247524752475248e-05, |
|
"loss": 0.1543, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 2.5742574257425746e-05, |
|
"loss": 0.1534, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 2.623762376237624e-05, |
|
"loss": 0.1504, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 2.6732673267326734e-05, |
|
"loss": 0.1909, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 2.722772277227723e-05, |
|
"loss": 0.1579, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 2.7722772277227726e-05, |
|
"loss": 0.1695, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 2.8217821782178216e-05, |
|
"loss": 0.1638, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 2.871287128712871e-05, |
|
"loss": 0.1574, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 2.9207920792079208e-05, |
|
"loss": 0.1471, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 2.9702970297029702e-05, |
|
"loss": 0.155, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.915193071093468, |
|
"eval_loss": 0.26563403010368347, |
|
"eval_runtime": 59.8577, |
|
"eval_samples_per_second": 92.586, |
|
"eval_steps_per_second": 2.907, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 3.01980198019802e-05, |
|
"loss": 0.1669, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 3.06930693069307e-05, |
|
"loss": 0.137, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 3.118811881188119e-05, |
|
"loss": 0.1461, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 3.1683168316831686e-05, |
|
"loss": 0.1362, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 3.217821782178218e-05, |
|
"loss": 0.1111, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 3.2673267326732674e-05, |
|
"loss": 0.1515, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 3.3168316831683175e-05, |
|
"loss": 0.1637, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 3.366336633663367e-05, |
|
"loss": 0.1567, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 3.415841584158416e-05, |
|
"loss": 0.1492, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 3.465346534653465e-05, |
|
"loss": 0.1422, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9141104294478528, |
|
"eval_loss": 0.26065394282341003, |
|
"eval_runtime": 59.5203, |
|
"eval_samples_per_second": 93.111, |
|
"eval_steps_per_second": 2.923, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 3.514851485148515e-05, |
|
"loss": 0.1479, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 3.5643564356435645e-05, |
|
"loss": 0.1464, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 3.613861386138614e-05, |
|
"loss": 0.1469, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 3.6633663366336634e-05, |
|
"loss": 0.1403, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 3.712871287128713e-05, |
|
"loss": 0.1166, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 3.762376237623763e-05, |
|
"loss": 0.1276, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 3.811881188118812e-05, |
|
"loss": 0.1356, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 3.861386138613862e-05, |
|
"loss": 0.1563, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 3.910891089108911e-05, |
|
"loss": 0.1256, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 3.9603960396039605e-05, |
|
"loss": 0.1511, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9157343919162757, |
|
"eval_loss": 0.25571852922439575, |
|
"eval_runtime": 59.7308, |
|
"eval_samples_per_second": 92.783, |
|
"eval_steps_per_second": 2.913, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 4.0099009900990106e-05, |
|
"loss": 0.165, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 4.05940594059406e-05, |
|
"loss": 0.2171, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 4.108910891089109e-05, |
|
"loss": 0.2175, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 4.158415841584158e-05, |
|
"loss": 0.1698, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 4.207920792079208e-05, |
|
"loss": 0.183, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 4.257425742574258e-05, |
|
"loss": 0.1738, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 4.306930693069307e-05, |
|
"loss": 0.1784, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 4.3564356435643565e-05, |
|
"loss": 0.1965, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 4.405940594059406e-05, |
|
"loss": 0.2112, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 4.455445544554456e-05, |
|
"loss": 0.1938, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9049079754601227, |
|
"eval_loss": 0.2679148316383362, |
|
"eval_runtime": 59.5803, |
|
"eval_samples_per_second": 93.017, |
|
"eval_steps_per_second": 2.92, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 4.5049504950495054e-05, |
|
"loss": 0.2518, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 4.554455445544555e-05, |
|
"loss": 0.1973, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 4.603960396039604e-05, |
|
"loss": 0.1798, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 4.653465346534654e-05, |
|
"loss": 0.1841, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 4.702970297029703e-05, |
|
"loss": 0.1806, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 4.7524752475247525e-05, |
|
"loss": 0.1602, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 4.801980198019802e-05, |
|
"loss": 0.1913, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 4.851485148514851e-05, |
|
"loss": 0.2061, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 4.9009900990099014e-05, |
|
"loss": 0.1838, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 4.950495049504951e-05, |
|
"loss": 0.1629, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2094, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9137495488993144, |
|
"eval_loss": 0.23915652930736542, |
|
"eval_runtime": 59.6978, |
|
"eval_samples_per_second": 92.834, |
|
"eval_steps_per_second": 2.915, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 4.994499449944995e-05, |
|
"loss": 0.2019, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 4.9889988998899896e-05, |
|
"loss": 0.1756, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 4.9834983498349835e-05, |
|
"loss": 0.1626, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 4.977997799779978e-05, |
|
"loss": 0.1608, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 4.972497249724973e-05, |
|
"loss": 0.1912, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"learning_rate": 4.9669966996699675e-05, |
|
"loss": 0.1716, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"learning_rate": 4.961496149614962e-05, |
|
"loss": 0.1683, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 10.79, |
|
"learning_rate": 4.955995599559956e-05, |
|
"loss": 0.1497, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 10.89, |
|
"learning_rate": 4.950495049504951e-05, |
|
"loss": 0.1827, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 4.9449944994499455e-05, |
|
"loss": 0.1835, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9155539516420065, |
|
"eval_loss": 0.23998872935771942, |
|
"eval_runtime": 59.7253, |
|
"eval_samples_per_second": 92.792, |
|
"eval_steps_per_second": 2.913, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"step": 1111, |
|
"total_flos": 1.1021883250648596e+19, |
|
"train_loss": 0.1948466427711406, |
|
"train_runtime": 3819.4144, |
|
"train_samples_per_second": 338.534, |
|
"train_steps_per_second": 2.644 |
|
} |
|
], |
|
"max_steps": 10100, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.1021883250648596e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|