{ "best_metric": 0.10026070475578308, "best_model_checkpoint": "./vit-base-ecg/checkpoint-300", "epoch": 20.0, "eval_steps": 100, "global_step": 820, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.24390243902439024, "grad_norm": 3.298177480697632, "learning_rate": 0.0001975609756097561, "loss": 1.3469, "step": 10 }, { "epoch": 0.4878048780487805, "grad_norm": 1.9206117391586304, "learning_rate": 0.0001951219512195122, "loss": 1.0603, "step": 20 }, { "epoch": 0.7317073170731707, "grad_norm": 3.621140956878662, "learning_rate": 0.0001926829268292683, "loss": 0.9322, "step": 30 }, { "epoch": 0.975609756097561, "grad_norm": 7.4158735275268555, "learning_rate": 0.0001902439024390244, "loss": 0.7773, "step": 40 }, { "epoch": 1.2195121951219512, "grad_norm": 4.021040916442871, "learning_rate": 0.0001878048780487805, "loss": 0.5297, "step": 50 }, { "epoch": 1.4634146341463414, "grad_norm": 2.9927070140838623, "learning_rate": 0.0001853658536585366, "loss": 0.4686, "step": 60 }, { "epoch": 1.7073170731707317, "grad_norm": 1.2623863220214844, "learning_rate": 0.0001829268292682927, "loss": 0.4439, "step": 70 }, { "epoch": 1.951219512195122, "grad_norm": 10.744318962097168, "learning_rate": 0.0001804878048780488, "loss": 0.3923, "step": 80 }, { "epoch": 2.1951219512195124, "grad_norm": 8.121638298034668, "learning_rate": 0.00017804878048780488, "loss": 0.2701, "step": 90 }, { "epoch": 2.4390243902439024, "grad_norm": 6.202785015106201, "learning_rate": 0.000175609756097561, "loss": 0.596, "step": 100 }, { "epoch": 2.4390243902439024, "eval_accuracy": 0.8214285714285714, "eval_loss": 0.5431132912635803, "eval_runtime": 8.502, "eval_samples_per_second": 16.467, "eval_steps_per_second": 2.117, "step": 100 }, { "epoch": 2.682926829268293, "grad_norm": 1.8862333297729492, "learning_rate": 0.00017317073170731708, "loss": 0.3225, "step": 110 }, { "epoch": 2.926829268292683, "grad_norm": 2.015392780303955, "learning_rate": 0.0001707317073170732, "loss": 0.3347, "step": 120 }, { "epoch": 3.1707317073170733, "grad_norm": 1.7934260368347168, "learning_rate": 0.00016829268292682927, "loss": 0.202, "step": 130 }, { "epoch": 3.4146341463414633, "grad_norm": 2.0693511962890625, "learning_rate": 0.00016585365853658536, "loss": 0.2906, "step": 140 }, { "epoch": 3.658536585365854, "grad_norm": 3.4595236778259277, "learning_rate": 0.00016341463414634147, "loss": 0.2004, "step": 150 }, { "epoch": 3.902439024390244, "grad_norm": 3.176142692565918, "learning_rate": 0.00016097560975609758, "loss": 0.1719, "step": 160 }, { "epoch": 4.146341463414634, "grad_norm": 1.0586614608764648, "learning_rate": 0.00015853658536585366, "loss": 0.0913, "step": 170 }, { "epoch": 4.390243902439025, "grad_norm": 0.48713743686676025, "learning_rate": 0.00015609756097560978, "loss": 0.0923, "step": 180 }, { "epoch": 4.634146341463414, "grad_norm": 0.5843382477760315, "learning_rate": 0.00015365853658536586, "loss": 0.1999, "step": 190 }, { "epoch": 4.878048780487805, "grad_norm": 0.11404013633728027, "learning_rate": 0.00015121951219512197, "loss": 0.0656, "step": 200 }, { "epoch": 4.878048780487805, "eval_accuracy": 0.95, "eval_loss": 0.16283166408538818, "eval_runtime": 8.0607, "eval_samples_per_second": 17.368, "eval_steps_per_second": 2.233, "step": 200 }, { "epoch": 5.121951219512195, "grad_norm": 0.10728397220373154, "learning_rate": 0.00014878048780487806, "loss": 0.0267, "step": 210 }, { "epoch": 5.365853658536586, "grad_norm": 3.2944324016571045, "learning_rate": 0.00014634146341463414, "loss": 0.1612, "step": 220 }, { "epoch": 5.609756097560975, "grad_norm": 0.12557579576969147, "learning_rate": 0.00014390243902439025, "loss": 0.1216, "step": 230 }, { "epoch": 5.853658536585366, "grad_norm": 6.2914252281188965, "learning_rate": 0.00014146341463414634, "loss": 0.1067, "step": 240 }, { "epoch": 6.097560975609756, "grad_norm": 3.4353785514831543, "learning_rate": 0.00013902439024390245, "loss": 0.0795, "step": 250 }, { "epoch": 6.341463414634147, "grad_norm": 0.08993230760097504, "learning_rate": 0.00013658536585365856, "loss": 0.0919, "step": 260 }, { "epoch": 6.585365853658536, "grad_norm": 7.937359809875488, "learning_rate": 0.00013414634146341464, "loss": 0.0621, "step": 270 }, { "epoch": 6.829268292682927, "grad_norm": 0.10531166195869446, "learning_rate": 0.00013170731707317076, "loss": 0.0341, "step": 280 }, { "epoch": 7.073170731707317, "grad_norm": 0.33791643381118774, "learning_rate": 0.00012926829268292684, "loss": 0.047, "step": 290 }, { "epoch": 7.317073170731708, "grad_norm": 0.06028667464852333, "learning_rate": 0.00012682926829268293, "loss": 0.0192, "step": 300 }, { "epoch": 7.317073170731708, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.10026070475578308, "eval_runtime": 7.8254, "eval_samples_per_second": 17.89, "eval_steps_per_second": 2.3, "step": 300 }, { "epoch": 7.560975609756097, "grad_norm": 0.11959153413772583, "learning_rate": 0.00012439024390243904, "loss": 0.0145, "step": 310 }, { "epoch": 7.804878048780488, "grad_norm": 0.05471642687916756, "learning_rate": 0.00012195121951219512, "loss": 0.0987, "step": 320 }, { "epoch": 8.048780487804878, "grad_norm": 0.0556318461894989, "learning_rate": 0.00011951219512195122, "loss": 0.0447, "step": 330 }, { "epoch": 8.292682926829269, "grad_norm": 0.04877757653594017, "learning_rate": 0.00011707317073170732, "loss": 0.0226, "step": 340 }, { "epoch": 8.536585365853659, "grad_norm": 0.6035917401313782, "learning_rate": 0.00011463414634146342, "loss": 0.0465, "step": 350 }, { "epoch": 8.78048780487805, "grad_norm": 0.04456908255815506, "learning_rate": 0.00011219512195121953, "loss": 0.0116, "step": 360 }, { "epoch": 9.024390243902438, "grad_norm": 0.04949244484305382, "learning_rate": 0.00010975609756097563, "loss": 0.0349, "step": 370 }, { "epoch": 9.268292682926829, "grad_norm": 0.047532856464385986, "learning_rate": 0.00010731707317073172, "loss": 0.0097, "step": 380 }, { "epoch": 9.512195121951219, "grad_norm": 7.109127521514893, "learning_rate": 0.00010487804878048781, "loss": 0.0565, "step": 390 }, { "epoch": 9.75609756097561, "grad_norm": 0.04661267623305321, "learning_rate": 0.0001024390243902439, "loss": 0.0926, "step": 400 }, { "epoch": 9.75609756097561, "eval_accuracy": 0.95, "eval_loss": 0.1261894255876541, "eval_runtime": 7.971, "eval_samples_per_second": 17.564, "eval_steps_per_second": 2.258, "step": 400 }, { "epoch": 10.0, "grad_norm": 0.12421295046806335, "learning_rate": 0.0001, "loss": 0.0375, "step": 410 }, { "epoch": 10.24390243902439, "grad_norm": 0.04042995348572731, "learning_rate": 9.75609756097561e-05, "loss": 0.0199, "step": 420 }, { "epoch": 10.487804878048781, "grad_norm": 0.05262177810072899, "learning_rate": 9.51219512195122e-05, "loss": 0.0095, "step": 430 }, { "epoch": 10.731707317073171, "grad_norm": 0.03628499433398247, "learning_rate": 9.26829268292683e-05, "loss": 0.0078, "step": 440 }, { "epoch": 10.975609756097562, "grad_norm": 0.04206101596355438, "learning_rate": 9.02439024390244e-05, "loss": 0.0369, "step": 450 }, { "epoch": 11.21951219512195, "grad_norm": 0.03796745836734772, "learning_rate": 8.78048780487805e-05, "loss": 0.0081, "step": 460 }, { "epoch": 11.463414634146341, "grad_norm": 0.03188019618391991, "learning_rate": 8.53658536585366e-05, "loss": 0.0075, "step": 470 }, { "epoch": 11.707317073170731, "grad_norm": 0.04463886842131615, "learning_rate": 8.292682926829268e-05, "loss": 0.0071, "step": 480 }, { "epoch": 11.951219512195122, "grad_norm": 0.024716157466173172, "learning_rate": 8.048780487804879e-05, "loss": 0.0066, "step": 490 }, { "epoch": 12.195121951219512, "grad_norm": 0.030284544453024864, "learning_rate": 7.804878048780489e-05, "loss": 0.0064, "step": 500 }, { "epoch": 12.195121951219512, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.1611199826002121, "eval_runtime": 8.7409, "eval_samples_per_second": 16.017, "eval_steps_per_second": 2.059, "step": 500 }, { "epoch": 12.439024390243903, "grad_norm": 0.027342507615685463, "learning_rate": 7.560975609756099e-05, "loss": 0.0061, "step": 510 }, { "epoch": 12.682926829268293, "grad_norm": 0.03514489158987999, "learning_rate": 7.317073170731707e-05, "loss": 0.006, "step": 520 }, { "epoch": 12.926829268292684, "grad_norm": 0.02839997597038746, "learning_rate": 7.073170731707317e-05, "loss": 0.0056, "step": 530 }, { "epoch": 13.170731707317072, "grad_norm": 0.025196045637130737, "learning_rate": 6.829268292682928e-05, "loss": 0.0057, "step": 540 }, { "epoch": 13.414634146341463, "grad_norm": 0.022951927036046982, "learning_rate": 6.585365853658538e-05, "loss": 0.0053, "step": 550 }, { "epoch": 13.658536585365853, "grad_norm": 0.02220899611711502, "learning_rate": 6.341463414634146e-05, "loss": 0.0056, "step": 560 }, { "epoch": 13.902439024390244, "grad_norm": 0.026446521282196045, "learning_rate": 6.097560975609756e-05, "loss": 0.0051, "step": 570 }, { "epoch": 14.146341463414634, "grad_norm": 0.021074431017041206, "learning_rate": 5.853658536585366e-05, "loss": 0.0051, "step": 580 }, { "epoch": 14.390243902439025, "grad_norm": 0.026110034435987473, "learning_rate": 5.6097560975609764e-05, "loss": 0.0051, "step": 590 }, { "epoch": 14.634146341463415, "grad_norm": 0.024351950734853745, "learning_rate": 5.365853658536586e-05, "loss": 0.0049, "step": 600 }, { "epoch": 14.634146341463415, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.15386557579040527, "eval_runtime": 24.339, "eval_samples_per_second": 5.752, "eval_steps_per_second": 0.74, "step": 600 }, { "epoch": 14.878048780487806, "grad_norm": 0.020671434700489044, "learning_rate": 5.121951219512195e-05, "loss": 0.0048, "step": 610 }, { "epoch": 15.121951219512194, "grad_norm": 0.02226124331355095, "learning_rate": 4.878048780487805e-05, "loss": 0.0046, "step": 620 }, { "epoch": 15.365853658536585, "grad_norm": 0.02063142880797386, "learning_rate": 4.634146341463415e-05, "loss": 0.0046, "step": 630 }, { "epoch": 15.609756097560975, "grad_norm": 0.02496950887143612, "learning_rate": 4.390243902439025e-05, "loss": 0.0048, "step": 640 }, { "epoch": 15.853658536585366, "grad_norm": 0.023448029533028603, "learning_rate": 4.146341463414634e-05, "loss": 0.0046, "step": 650 }, { "epoch": 16.097560975609756, "grad_norm": 0.02106618881225586, "learning_rate": 3.9024390243902444e-05, "loss": 0.0045, "step": 660 }, { "epoch": 16.341463414634145, "grad_norm": 0.02371513471007347, "learning_rate": 3.6585365853658535e-05, "loss": 0.0043, "step": 670 }, { "epoch": 16.585365853658537, "grad_norm": 0.02032136172056198, "learning_rate": 3.414634146341464e-05, "loss": 0.0044, "step": 680 }, { "epoch": 16.829268292682926, "grad_norm": 0.020455192774534225, "learning_rate": 3.170731707317073e-05, "loss": 0.0043, "step": 690 }, { "epoch": 17.073170731707318, "grad_norm": 0.023487282916903496, "learning_rate": 2.926829268292683e-05, "loss": 0.0044, "step": 700 }, { "epoch": 17.073170731707318, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.15085048973560333, "eval_runtime": 11.3536, "eval_samples_per_second": 12.331, "eval_steps_per_second": 1.585, "step": 700 }, { "epoch": 17.317073170731707, "grad_norm": 0.018334366381168365, "learning_rate": 2.682926829268293e-05, "loss": 0.0044, "step": 710 }, { "epoch": 17.5609756097561, "grad_norm": 0.01927388273179531, "learning_rate": 2.4390243902439026e-05, "loss": 0.0042, "step": 720 }, { "epoch": 17.804878048780488, "grad_norm": 0.021918119862675667, "learning_rate": 2.1951219512195124e-05, "loss": 0.0042, "step": 730 }, { "epoch": 18.048780487804876, "grad_norm": 0.019147571176290512, "learning_rate": 1.9512195121951222e-05, "loss": 0.0042, "step": 740 }, { "epoch": 18.29268292682927, "grad_norm": 0.020710714161396027, "learning_rate": 1.707317073170732e-05, "loss": 0.0041, "step": 750 }, { "epoch": 18.536585365853657, "grad_norm": 0.01830894872546196, "learning_rate": 1.4634146341463415e-05, "loss": 0.004, "step": 760 }, { "epoch": 18.78048780487805, "grad_norm": 0.019078999757766724, "learning_rate": 1.2195121951219513e-05, "loss": 0.0042, "step": 770 }, { "epoch": 19.024390243902438, "grad_norm": 0.020716849714517593, "learning_rate": 9.756097560975611e-06, "loss": 0.0042, "step": 780 }, { "epoch": 19.26829268292683, "grad_norm": 0.018130987882614136, "learning_rate": 7.317073170731707e-06, "loss": 0.0041, "step": 790 }, { "epoch": 19.51219512195122, "grad_norm": 0.018373191356658936, "learning_rate": 4.8780487804878055e-06, "loss": 0.0041, "step": 800 }, { "epoch": 19.51219512195122, "eval_accuracy": 0.9642857142857143, "eval_loss": 0.14993587136268616, "eval_runtime": 14.7019, "eval_samples_per_second": 9.523, "eval_steps_per_second": 1.224, "step": 800 }, { "epoch": 19.75609756097561, "grad_norm": 0.01835489086806774, "learning_rate": 2.4390243902439027e-06, "loss": 0.0039, "step": 810 }, { "epoch": 20.0, "grad_norm": 0.02105889283120632, "learning_rate": 0.0, "loss": 0.0041, "step": 820 }, { "epoch": 20.0, "step": 820, "total_flos": 1.004314187783209e+18, "train_loss": 0.1252097422029914, "train_runtime": 1111.906, "train_samples_per_second": 11.656, "train_steps_per_second": 0.737 } ], "logging_steps": 10, "max_steps": 820, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.004314187783209e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }