|
{ |
|
"best_metric": 0.10026070475578308, |
|
"best_model_checkpoint": "./vit-base-ecg/checkpoint-300", |
|
"epoch": 20.0, |
|
"eval_steps": 100, |
|
"global_step": 820, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 3.298177480697632, |
|
"learning_rate": 0.0001975609756097561, |
|
"loss": 1.3469, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 1.9206117391586304, |
|
"learning_rate": 0.0001951219512195122, |
|
"loss": 1.0603, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 3.621140956878662, |
|
"learning_rate": 0.0001926829268292683, |
|
"loss": 0.9322, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 7.4158735275268555, |
|
"learning_rate": 0.0001902439024390244, |
|
"loss": 0.7773, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"grad_norm": 4.021040916442871, |
|
"learning_rate": 0.0001878048780487805, |
|
"loss": 0.5297, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.4634146341463414, |
|
"grad_norm": 2.9927070140838623, |
|
"learning_rate": 0.0001853658536585366, |
|
"loss": 0.4686, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.7073170731707317, |
|
"grad_norm": 1.2623863220214844, |
|
"learning_rate": 0.0001829268292682927, |
|
"loss": 0.4439, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.951219512195122, |
|
"grad_norm": 10.744318962097168, |
|
"learning_rate": 0.0001804878048780488, |
|
"loss": 0.3923, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.1951219512195124, |
|
"grad_norm": 8.121638298034668, |
|
"learning_rate": 0.00017804878048780488, |
|
"loss": 0.2701, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"grad_norm": 6.202785015106201, |
|
"learning_rate": 0.000175609756097561, |
|
"loss": 0.596, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"eval_accuracy": 0.8214285714285714, |
|
"eval_loss": 0.5431132912635803, |
|
"eval_runtime": 8.502, |
|
"eval_samples_per_second": 16.467, |
|
"eval_steps_per_second": 2.117, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.682926829268293, |
|
"grad_norm": 1.8862333297729492, |
|
"learning_rate": 0.00017317073170731708, |
|
"loss": 0.3225, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.926829268292683, |
|
"grad_norm": 2.015392780303955, |
|
"learning_rate": 0.0001707317073170732, |
|
"loss": 0.3347, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.1707317073170733, |
|
"grad_norm": 1.7934260368347168, |
|
"learning_rate": 0.00016829268292682927, |
|
"loss": 0.202, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.4146341463414633, |
|
"grad_norm": 2.0693511962890625, |
|
"learning_rate": 0.00016585365853658536, |
|
"loss": 0.2906, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.658536585365854, |
|
"grad_norm": 3.4595236778259277, |
|
"learning_rate": 0.00016341463414634147, |
|
"loss": 0.2004, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.902439024390244, |
|
"grad_norm": 3.176142692565918, |
|
"learning_rate": 0.00016097560975609758, |
|
"loss": 0.1719, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.146341463414634, |
|
"grad_norm": 1.0586614608764648, |
|
"learning_rate": 0.00015853658536585366, |
|
"loss": 0.0913, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.390243902439025, |
|
"grad_norm": 0.48713743686676025, |
|
"learning_rate": 0.00015609756097560978, |
|
"loss": 0.0923, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.634146341463414, |
|
"grad_norm": 0.5843382477760315, |
|
"learning_rate": 0.00015365853658536586, |
|
"loss": 0.1999, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.878048780487805, |
|
"grad_norm": 0.11404013633728027, |
|
"learning_rate": 0.00015121951219512197, |
|
"loss": 0.0656, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.878048780487805, |
|
"eval_accuracy": 0.95, |
|
"eval_loss": 0.16283166408538818, |
|
"eval_runtime": 8.0607, |
|
"eval_samples_per_second": 17.368, |
|
"eval_steps_per_second": 2.233, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.121951219512195, |
|
"grad_norm": 0.10728397220373154, |
|
"learning_rate": 0.00014878048780487806, |
|
"loss": 0.0267, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.365853658536586, |
|
"grad_norm": 3.2944324016571045, |
|
"learning_rate": 0.00014634146341463414, |
|
"loss": 0.1612, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.609756097560975, |
|
"grad_norm": 0.12557579576969147, |
|
"learning_rate": 0.00014390243902439025, |
|
"loss": 0.1216, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 5.853658536585366, |
|
"grad_norm": 6.2914252281188965, |
|
"learning_rate": 0.00014146341463414634, |
|
"loss": 0.1067, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.097560975609756, |
|
"grad_norm": 3.4353785514831543, |
|
"learning_rate": 0.00013902439024390245, |
|
"loss": 0.0795, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 6.341463414634147, |
|
"grad_norm": 0.08993230760097504, |
|
"learning_rate": 0.00013658536585365856, |
|
"loss": 0.0919, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 6.585365853658536, |
|
"grad_norm": 7.937359809875488, |
|
"learning_rate": 0.00013414634146341464, |
|
"loss": 0.0621, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 6.829268292682927, |
|
"grad_norm": 0.10531166195869446, |
|
"learning_rate": 0.00013170731707317076, |
|
"loss": 0.0341, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 7.073170731707317, |
|
"grad_norm": 0.33791643381118774, |
|
"learning_rate": 0.00012926829268292684, |
|
"loss": 0.047, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 7.317073170731708, |
|
"grad_norm": 0.06028667464852333, |
|
"learning_rate": 0.00012682926829268293, |
|
"loss": 0.0192, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.317073170731708, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.10026070475578308, |
|
"eval_runtime": 7.8254, |
|
"eval_samples_per_second": 17.89, |
|
"eval_steps_per_second": 2.3, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 7.560975609756097, |
|
"grad_norm": 0.11959153413772583, |
|
"learning_rate": 0.00012439024390243904, |
|
"loss": 0.0145, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 7.804878048780488, |
|
"grad_norm": 0.05471642687916756, |
|
"learning_rate": 0.00012195121951219512, |
|
"loss": 0.0987, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 8.048780487804878, |
|
"grad_norm": 0.0556318461894989, |
|
"learning_rate": 0.00011951219512195122, |
|
"loss": 0.0447, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 8.292682926829269, |
|
"grad_norm": 0.04877757653594017, |
|
"learning_rate": 0.00011707317073170732, |
|
"loss": 0.0226, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 8.536585365853659, |
|
"grad_norm": 0.6035917401313782, |
|
"learning_rate": 0.00011463414634146342, |
|
"loss": 0.0465, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 8.78048780487805, |
|
"grad_norm": 0.04456908255815506, |
|
"learning_rate": 0.00011219512195121953, |
|
"loss": 0.0116, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 9.024390243902438, |
|
"grad_norm": 0.04949244484305382, |
|
"learning_rate": 0.00010975609756097563, |
|
"loss": 0.0349, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 9.268292682926829, |
|
"grad_norm": 0.047532856464385986, |
|
"learning_rate": 0.00010731707317073172, |
|
"loss": 0.0097, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 9.512195121951219, |
|
"grad_norm": 7.109127521514893, |
|
"learning_rate": 0.00010487804878048781, |
|
"loss": 0.0565, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 9.75609756097561, |
|
"grad_norm": 0.04661267623305321, |
|
"learning_rate": 0.0001024390243902439, |
|
"loss": 0.0926, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 9.75609756097561, |
|
"eval_accuracy": 0.95, |
|
"eval_loss": 0.1261894255876541, |
|
"eval_runtime": 7.971, |
|
"eval_samples_per_second": 17.564, |
|
"eval_steps_per_second": 2.258, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.12421295046806335, |
|
"learning_rate": 0.0001, |
|
"loss": 0.0375, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 10.24390243902439, |
|
"grad_norm": 0.04042995348572731, |
|
"learning_rate": 9.75609756097561e-05, |
|
"loss": 0.0199, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 10.487804878048781, |
|
"grad_norm": 0.05262177810072899, |
|
"learning_rate": 9.51219512195122e-05, |
|
"loss": 0.0095, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 10.731707317073171, |
|
"grad_norm": 0.03628499433398247, |
|
"learning_rate": 9.26829268292683e-05, |
|
"loss": 0.0078, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 10.975609756097562, |
|
"grad_norm": 0.04206101596355438, |
|
"learning_rate": 9.02439024390244e-05, |
|
"loss": 0.0369, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 11.21951219512195, |
|
"grad_norm": 0.03796745836734772, |
|
"learning_rate": 8.78048780487805e-05, |
|
"loss": 0.0081, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 11.463414634146341, |
|
"grad_norm": 0.03188019618391991, |
|
"learning_rate": 8.53658536585366e-05, |
|
"loss": 0.0075, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 11.707317073170731, |
|
"grad_norm": 0.04463886842131615, |
|
"learning_rate": 8.292682926829268e-05, |
|
"loss": 0.0071, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 11.951219512195122, |
|
"grad_norm": 0.024716157466173172, |
|
"learning_rate": 8.048780487804879e-05, |
|
"loss": 0.0066, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 12.195121951219512, |
|
"grad_norm": 0.030284544453024864, |
|
"learning_rate": 7.804878048780489e-05, |
|
"loss": 0.0064, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 12.195121951219512, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.1611199826002121, |
|
"eval_runtime": 8.7409, |
|
"eval_samples_per_second": 16.017, |
|
"eval_steps_per_second": 2.059, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 12.439024390243903, |
|
"grad_norm": 0.027342507615685463, |
|
"learning_rate": 7.560975609756099e-05, |
|
"loss": 0.0061, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 12.682926829268293, |
|
"grad_norm": 0.03514489158987999, |
|
"learning_rate": 7.317073170731707e-05, |
|
"loss": 0.006, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 12.926829268292684, |
|
"grad_norm": 0.02839997597038746, |
|
"learning_rate": 7.073170731707317e-05, |
|
"loss": 0.0056, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 13.170731707317072, |
|
"grad_norm": 0.025196045637130737, |
|
"learning_rate": 6.829268292682928e-05, |
|
"loss": 0.0057, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 13.414634146341463, |
|
"grad_norm": 0.022951927036046982, |
|
"learning_rate": 6.585365853658538e-05, |
|
"loss": 0.0053, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 13.658536585365853, |
|
"grad_norm": 0.02220899611711502, |
|
"learning_rate": 6.341463414634146e-05, |
|
"loss": 0.0056, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 13.902439024390244, |
|
"grad_norm": 0.026446521282196045, |
|
"learning_rate": 6.097560975609756e-05, |
|
"loss": 0.0051, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 14.146341463414634, |
|
"grad_norm": 0.021074431017041206, |
|
"learning_rate": 5.853658536585366e-05, |
|
"loss": 0.0051, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 14.390243902439025, |
|
"grad_norm": 0.026110034435987473, |
|
"learning_rate": 5.6097560975609764e-05, |
|
"loss": 0.0051, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 14.634146341463415, |
|
"grad_norm": 0.024351950734853745, |
|
"learning_rate": 5.365853658536586e-05, |
|
"loss": 0.0049, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 14.634146341463415, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.15386557579040527, |
|
"eval_runtime": 24.339, |
|
"eval_samples_per_second": 5.752, |
|
"eval_steps_per_second": 0.74, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 14.878048780487806, |
|
"grad_norm": 0.020671434700489044, |
|
"learning_rate": 5.121951219512195e-05, |
|
"loss": 0.0048, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 15.121951219512194, |
|
"grad_norm": 0.02226124331355095, |
|
"learning_rate": 4.878048780487805e-05, |
|
"loss": 0.0046, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 15.365853658536585, |
|
"grad_norm": 0.02063142880797386, |
|
"learning_rate": 4.634146341463415e-05, |
|
"loss": 0.0046, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 15.609756097560975, |
|
"grad_norm": 0.02496950887143612, |
|
"learning_rate": 4.390243902439025e-05, |
|
"loss": 0.0048, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 15.853658536585366, |
|
"grad_norm": 0.023448029533028603, |
|
"learning_rate": 4.146341463414634e-05, |
|
"loss": 0.0046, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 16.097560975609756, |
|
"grad_norm": 0.02106618881225586, |
|
"learning_rate": 3.9024390243902444e-05, |
|
"loss": 0.0045, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 16.341463414634145, |
|
"grad_norm": 0.02371513471007347, |
|
"learning_rate": 3.6585365853658535e-05, |
|
"loss": 0.0043, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 16.585365853658537, |
|
"grad_norm": 0.02032136172056198, |
|
"learning_rate": 3.414634146341464e-05, |
|
"loss": 0.0044, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 16.829268292682926, |
|
"grad_norm": 0.020455192774534225, |
|
"learning_rate": 3.170731707317073e-05, |
|
"loss": 0.0043, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 17.073170731707318, |
|
"grad_norm": 0.023487282916903496, |
|
"learning_rate": 2.926829268292683e-05, |
|
"loss": 0.0044, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 17.073170731707318, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.15085048973560333, |
|
"eval_runtime": 11.3536, |
|
"eval_samples_per_second": 12.331, |
|
"eval_steps_per_second": 1.585, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 17.317073170731707, |
|
"grad_norm": 0.018334366381168365, |
|
"learning_rate": 2.682926829268293e-05, |
|
"loss": 0.0044, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 17.5609756097561, |
|
"grad_norm": 0.01927388273179531, |
|
"learning_rate": 2.4390243902439026e-05, |
|
"loss": 0.0042, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 17.804878048780488, |
|
"grad_norm": 0.021918119862675667, |
|
"learning_rate": 2.1951219512195124e-05, |
|
"loss": 0.0042, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 18.048780487804876, |
|
"grad_norm": 0.019147571176290512, |
|
"learning_rate": 1.9512195121951222e-05, |
|
"loss": 0.0042, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 18.29268292682927, |
|
"grad_norm": 0.020710714161396027, |
|
"learning_rate": 1.707317073170732e-05, |
|
"loss": 0.0041, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 18.536585365853657, |
|
"grad_norm": 0.01830894872546196, |
|
"learning_rate": 1.4634146341463415e-05, |
|
"loss": 0.004, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 18.78048780487805, |
|
"grad_norm": 0.019078999757766724, |
|
"learning_rate": 1.2195121951219513e-05, |
|
"loss": 0.0042, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 19.024390243902438, |
|
"grad_norm": 0.020716849714517593, |
|
"learning_rate": 9.756097560975611e-06, |
|
"loss": 0.0042, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 19.26829268292683, |
|
"grad_norm": 0.018130987882614136, |
|
"learning_rate": 7.317073170731707e-06, |
|
"loss": 0.0041, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 19.51219512195122, |
|
"grad_norm": 0.018373191356658936, |
|
"learning_rate": 4.8780487804878055e-06, |
|
"loss": 0.0041, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 19.51219512195122, |
|
"eval_accuracy": 0.9642857142857143, |
|
"eval_loss": 0.14993587136268616, |
|
"eval_runtime": 14.7019, |
|
"eval_samples_per_second": 9.523, |
|
"eval_steps_per_second": 1.224, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 19.75609756097561, |
|
"grad_norm": 0.01835489086806774, |
|
"learning_rate": 2.4390243902439027e-06, |
|
"loss": 0.0039, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.02105889283120632, |
|
"learning_rate": 0.0, |
|
"loss": 0.0041, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 820, |
|
"total_flos": 1.004314187783209e+18, |
|
"train_loss": 0.1252097422029914, |
|
"train_runtime": 1111.906, |
|
"train_samples_per_second": 11.656, |
|
"train_steps_per_second": 0.737 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 820, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.004314187783209e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|