Augusto777's picture
End of training
9d26d70 verified
{
"best_metric": 0.8548387096774194,
"best_model_checkpoint": "swinv2-tiny-patch4-window8-256-Ocular-Toxoplasmosis-DA/checkpoint-256",
"epoch": 38.51851851851852,
"eval_steps": 500,
"global_step": 520,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.7407407407407407,
"grad_norm": 4.370074272155762,
"learning_rate": 9.615384615384616e-06,
"loss": 1.3402,
"step": 10
},
{
"epoch": 0.9629629629629629,
"eval_accuracy": 0.5483870967741935,
"eval_loss": 1.1682088375091553,
"eval_runtime": 2.3908,
"eval_samples_per_second": 25.932,
"eval_steps_per_second": 0.837,
"step": 13
},
{
"epoch": 1.4814814814814814,
"grad_norm": 7.86944580078125,
"learning_rate": 1.923076923076923e-05,
"loss": 1.1725,
"step": 20
},
{
"epoch": 2.0,
"eval_accuracy": 0.6290322580645161,
"eval_loss": 1.0024793148040771,
"eval_runtime": 3.324,
"eval_samples_per_second": 18.652,
"eval_steps_per_second": 0.602,
"step": 27
},
{
"epoch": 2.2222222222222223,
"grad_norm": 4.94896936416626,
"learning_rate": 2.8846153846153845e-05,
"loss": 1.0671,
"step": 30
},
{
"epoch": 2.962962962962963,
"grad_norm": 4.811951160430908,
"learning_rate": 3.846153846153846e-05,
"loss": 0.8824,
"step": 40
},
{
"epoch": 2.962962962962963,
"eval_accuracy": 0.6612903225806451,
"eval_loss": 0.7644360065460205,
"eval_runtime": 3.388,
"eval_samples_per_second": 18.3,
"eval_steps_per_second": 0.59,
"step": 40
},
{
"epoch": 3.7037037037037037,
"grad_norm": 7.974093914031982,
"learning_rate": 4.8076923076923084e-05,
"loss": 0.7342,
"step": 50
},
{
"epoch": 4.0,
"eval_accuracy": 0.7258064516129032,
"eval_loss": 0.5839676260948181,
"eval_runtime": 3.0543,
"eval_samples_per_second": 20.299,
"eval_steps_per_second": 0.655,
"step": 54
},
{
"epoch": 4.444444444444445,
"grad_norm": 8.472794532775879,
"learning_rate": 4.9145299145299147e-05,
"loss": 0.6734,
"step": 60
},
{
"epoch": 4.962962962962963,
"eval_accuracy": 0.6451612903225806,
"eval_loss": 0.6753666400909424,
"eval_runtime": 2.3642,
"eval_samples_per_second": 26.225,
"eval_steps_per_second": 0.846,
"step": 67
},
{
"epoch": 5.185185185185185,
"grad_norm": 9.15774917602539,
"learning_rate": 4.8076923076923084e-05,
"loss": 0.6373,
"step": 70
},
{
"epoch": 5.925925925925926,
"grad_norm": 12.02450942993164,
"learning_rate": 4.700854700854701e-05,
"loss": 0.5167,
"step": 80
},
{
"epoch": 6.0,
"eval_accuracy": 0.6935483870967742,
"eval_loss": 0.5904402136802673,
"eval_runtime": 2.3866,
"eval_samples_per_second": 25.979,
"eval_steps_per_second": 0.838,
"step": 81
},
{
"epoch": 6.666666666666667,
"grad_norm": 7.348090648651123,
"learning_rate": 4.594017094017094e-05,
"loss": 0.5009,
"step": 90
},
{
"epoch": 6.962962962962963,
"eval_accuracy": 0.6935483870967742,
"eval_loss": 0.5549384355545044,
"eval_runtime": 2.9982,
"eval_samples_per_second": 20.679,
"eval_steps_per_second": 0.667,
"step": 94
},
{
"epoch": 7.407407407407407,
"grad_norm": 5.642479419708252,
"learning_rate": 4.4871794871794874e-05,
"loss": 0.4988,
"step": 100
},
{
"epoch": 8.0,
"eval_accuracy": 0.6774193548387096,
"eval_loss": 0.620449423789978,
"eval_runtime": 2.4283,
"eval_samples_per_second": 25.532,
"eval_steps_per_second": 0.824,
"step": 108
},
{
"epoch": 8.148148148148149,
"grad_norm": 6.128896713256836,
"learning_rate": 4.3803418803418805e-05,
"loss": 0.4619,
"step": 110
},
{
"epoch": 8.88888888888889,
"grad_norm": 7.555347919464111,
"learning_rate": 4.2735042735042735e-05,
"loss": 0.3856,
"step": 120
},
{
"epoch": 8.962962962962964,
"eval_accuracy": 0.8225806451612904,
"eval_loss": 0.44631102681159973,
"eval_runtime": 2.3506,
"eval_samples_per_second": 26.376,
"eval_steps_per_second": 0.851,
"step": 121
},
{
"epoch": 9.62962962962963,
"grad_norm": 9.627432823181152,
"learning_rate": 4.166666666666667e-05,
"loss": 0.4057,
"step": 130
},
{
"epoch": 10.0,
"eval_accuracy": 0.7903225806451613,
"eval_loss": 0.5231879353523254,
"eval_runtime": 3.1544,
"eval_samples_per_second": 19.655,
"eval_steps_per_second": 0.634,
"step": 135
},
{
"epoch": 10.37037037037037,
"grad_norm": 8.669109344482422,
"learning_rate": 4.05982905982906e-05,
"loss": 0.3929,
"step": 140
},
{
"epoch": 10.962962962962964,
"eval_accuracy": 0.8387096774193549,
"eval_loss": 0.45801177620887756,
"eval_runtime": 2.3878,
"eval_samples_per_second": 25.965,
"eval_steps_per_second": 0.838,
"step": 148
},
{
"epoch": 11.11111111111111,
"grad_norm": 6.289756774902344,
"learning_rate": 3.952991452991453e-05,
"loss": 0.3673,
"step": 150
},
{
"epoch": 11.851851851851851,
"grad_norm": 12.90579605102539,
"learning_rate": 3.846153846153846e-05,
"loss": 0.3638,
"step": 160
},
{
"epoch": 12.0,
"eval_accuracy": 0.7741935483870968,
"eval_loss": 0.5114619135856628,
"eval_runtime": 3.3569,
"eval_samples_per_second": 18.47,
"eval_steps_per_second": 0.596,
"step": 162
},
{
"epoch": 12.592592592592592,
"grad_norm": 10.698553085327148,
"learning_rate": 3.739316239316239e-05,
"loss": 0.3248,
"step": 170
},
{
"epoch": 12.962962962962964,
"eval_accuracy": 0.7741935483870968,
"eval_loss": 0.5312773585319519,
"eval_runtime": 2.4335,
"eval_samples_per_second": 25.478,
"eval_steps_per_second": 0.822,
"step": 175
},
{
"epoch": 13.333333333333334,
"grad_norm": 6.529489994049072,
"learning_rate": 3.6324786324786323e-05,
"loss": 0.2673,
"step": 180
},
{
"epoch": 14.0,
"eval_accuracy": 0.7903225806451613,
"eval_loss": 0.5203306674957275,
"eval_runtime": 3.4828,
"eval_samples_per_second": 17.802,
"eval_steps_per_second": 0.574,
"step": 189
},
{
"epoch": 14.074074074074074,
"grad_norm": 6.994911193847656,
"learning_rate": 3.525641025641026e-05,
"loss": 0.3216,
"step": 190
},
{
"epoch": 14.814814814814815,
"grad_norm": 9.194233894348145,
"learning_rate": 3.418803418803419e-05,
"loss": 0.2922,
"step": 200
},
{
"epoch": 14.962962962962964,
"eval_accuracy": 0.8387096774193549,
"eval_loss": 0.4315454959869385,
"eval_runtime": 2.3822,
"eval_samples_per_second": 26.026,
"eval_steps_per_second": 0.84,
"step": 202
},
{
"epoch": 15.555555555555555,
"grad_norm": 6.076256275177002,
"learning_rate": 3.311965811965812e-05,
"loss": 0.2803,
"step": 210
},
{
"epoch": 16.0,
"eval_accuracy": 0.8387096774193549,
"eval_loss": 0.4577220380306244,
"eval_runtime": 2.9439,
"eval_samples_per_second": 21.06,
"eval_steps_per_second": 0.679,
"step": 216
},
{
"epoch": 16.296296296296298,
"grad_norm": 12.038761138916016,
"learning_rate": 3.205128205128206e-05,
"loss": 0.2735,
"step": 220
},
{
"epoch": 16.962962962962962,
"eval_accuracy": 0.8064516129032258,
"eval_loss": 0.5466907024383545,
"eval_runtime": 2.3229,
"eval_samples_per_second": 26.691,
"eval_steps_per_second": 0.861,
"step": 229
},
{
"epoch": 17.037037037037038,
"grad_norm": 8.897506713867188,
"learning_rate": 3.098290598290599e-05,
"loss": 0.2776,
"step": 230
},
{
"epoch": 17.77777777777778,
"grad_norm": 9.66178035736084,
"learning_rate": 2.9914529914529915e-05,
"loss": 0.2586,
"step": 240
},
{
"epoch": 18.0,
"eval_accuracy": 0.8387096774193549,
"eval_loss": 0.5236416459083557,
"eval_runtime": 3.4253,
"eval_samples_per_second": 18.101,
"eval_steps_per_second": 0.584,
"step": 243
},
{
"epoch": 18.51851851851852,
"grad_norm": 7.729655742645264,
"learning_rate": 2.8846153846153845e-05,
"loss": 0.2366,
"step": 250
},
{
"epoch": 18.962962962962962,
"eval_accuracy": 0.8548387096774194,
"eval_loss": 0.5075119137763977,
"eval_runtime": 2.3943,
"eval_samples_per_second": 25.895,
"eval_steps_per_second": 0.835,
"step": 256
},
{
"epoch": 19.25925925925926,
"grad_norm": 11.543585777282715,
"learning_rate": 2.777777777777778e-05,
"loss": 0.252,
"step": 260
},
{
"epoch": 20.0,
"grad_norm": 7.877120494842529,
"learning_rate": 2.670940170940171e-05,
"loss": 0.2347,
"step": 270
},
{
"epoch": 20.0,
"eval_accuracy": 0.8387096774193549,
"eval_loss": 0.5178562998771667,
"eval_runtime": 2.4124,
"eval_samples_per_second": 25.701,
"eval_steps_per_second": 0.829,
"step": 270
},
{
"epoch": 20.74074074074074,
"grad_norm": 7.83768892288208,
"learning_rate": 2.564102564102564e-05,
"loss": 0.2046,
"step": 280
},
{
"epoch": 20.962962962962962,
"eval_accuracy": 0.8387096774193549,
"eval_loss": 0.5427502393722534,
"eval_runtime": 3.4728,
"eval_samples_per_second": 17.853,
"eval_steps_per_second": 0.576,
"step": 283
},
{
"epoch": 21.48148148148148,
"grad_norm": 7.919957637786865,
"learning_rate": 2.4572649572649573e-05,
"loss": 0.2289,
"step": 290
},
{
"epoch": 22.0,
"eval_accuracy": 0.8387096774193549,
"eval_loss": 0.57480788230896,
"eval_runtime": 2.4021,
"eval_samples_per_second": 25.811,
"eval_steps_per_second": 0.833,
"step": 297
},
{
"epoch": 22.22222222222222,
"grad_norm": 8.665252685546875,
"learning_rate": 2.3504273504273504e-05,
"loss": 0.2394,
"step": 300
},
{
"epoch": 22.962962962962962,
"grad_norm": 7.902819633483887,
"learning_rate": 2.2435897435897437e-05,
"loss": 0.2195,
"step": 310
},
{
"epoch": 22.962962962962962,
"eval_accuracy": 0.8225806451612904,
"eval_loss": 0.5968937277793884,
"eval_runtime": 3.4133,
"eval_samples_per_second": 18.164,
"eval_steps_per_second": 0.586,
"step": 310
},
{
"epoch": 23.703703703703702,
"grad_norm": 9.844597816467285,
"learning_rate": 2.1367521367521368e-05,
"loss": 0.2224,
"step": 320
},
{
"epoch": 24.0,
"eval_accuracy": 0.8225806451612904,
"eval_loss": 0.6092303991317749,
"eval_runtime": 2.3949,
"eval_samples_per_second": 25.888,
"eval_steps_per_second": 0.835,
"step": 324
},
{
"epoch": 24.444444444444443,
"grad_norm": 6.439063549041748,
"learning_rate": 2.02991452991453e-05,
"loss": 0.2167,
"step": 330
},
{
"epoch": 24.962962962962962,
"eval_accuracy": 0.8225806451612904,
"eval_loss": 0.6333113312721252,
"eval_runtime": 2.4482,
"eval_samples_per_second": 25.325,
"eval_steps_per_second": 0.817,
"step": 337
},
{
"epoch": 25.185185185185187,
"grad_norm": 8.865224838256836,
"learning_rate": 1.923076923076923e-05,
"loss": 0.2323,
"step": 340
},
{
"epoch": 25.925925925925927,
"grad_norm": 6.462991237640381,
"learning_rate": 1.8162393162393162e-05,
"loss": 0.1956,
"step": 350
},
{
"epoch": 26.0,
"eval_accuracy": 0.8225806451612904,
"eval_loss": 0.5993022322654724,
"eval_runtime": 2.3358,
"eval_samples_per_second": 26.543,
"eval_steps_per_second": 0.856,
"step": 351
},
{
"epoch": 26.666666666666668,
"grad_norm": 6.978143692016602,
"learning_rate": 1.7094017094017095e-05,
"loss": 0.2174,
"step": 360
},
{
"epoch": 26.962962962962962,
"eval_accuracy": 0.8548387096774194,
"eval_loss": 0.6063364744186401,
"eval_runtime": 2.3579,
"eval_samples_per_second": 26.295,
"eval_steps_per_second": 0.848,
"step": 364
},
{
"epoch": 27.40740740740741,
"grad_norm": 8.283989906311035,
"learning_rate": 1.602564102564103e-05,
"loss": 0.1999,
"step": 370
},
{
"epoch": 28.0,
"eval_accuracy": 0.8387096774193549,
"eval_loss": 0.6413679718971252,
"eval_runtime": 3.4435,
"eval_samples_per_second": 18.005,
"eval_steps_per_second": 0.581,
"step": 378
},
{
"epoch": 28.14814814814815,
"grad_norm": 5.77383279800415,
"learning_rate": 1.4957264957264958e-05,
"loss": 0.1783,
"step": 380
},
{
"epoch": 28.88888888888889,
"grad_norm": 7.4615654945373535,
"learning_rate": 1.388888888888889e-05,
"loss": 0.1667,
"step": 390
},
{
"epoch": 28.962962962962962,
"eval_accuracy": 0.8387096774193549,
"eval_loss": 0.6296666860580444,
"eval_runtime": 2.3485,
"eval_samples_per_second": 26.4,
"eval_steps_per_second": 0.852,
"step": 391
},
{
"epoch": 29.62962962962963,
"grad_norm": 9.373270034790039,
"learning_rate": 1.282051282051282e-05,
"loss": 0.1835,
"step": 400
},
{
"epoch": 30.0,
"eval_accuracy": 0.8225806451612904,
"eval_loss": 0.6148854494094849,
"eval_runtime": 3.1829,
"eval_samples_per_second": 19.479,
"eval_steps_per_second": 0.628,
"step": 405
},
{
"epoch": 30.37037037037037,
"grad_norm": 8.87562370300293,
"learning_rate": 1.1752136752136752e-05,
"loss": 0.186,
"step": 410
},
{
"epoch": 30.962962962962962,
"eval_accuracy": 0.8387096774193549,
"eval_loss": 0.6429581642150879,
"eval_runtime": 2.4503,
"eval_samples_per_second": 25.303,
"eval_steps_per_second": 0.816,
"step": 418
},
{
"epoch": 31.11111111111111,
"grad_norm": 5.281705856323242,
"learning_rate": 1.0683760683760684e-05,
"loss": 0.1706,
"step": 420
},
{
"epoch": 31.85185185185185,
"grad_norm": 4.753020286560059,
"learning_rate": 9.615384615384616e-06,
"loss": 0.1749,
"step": 430
},
{
"epoch": 32.0,
"eval_accuracy": 0.8387096774193549,
"eval_loss": 0.6677759885787964,
"eval_runtime": 2.3885,
"eval_samples_per_second": 25.957,
"eval_steps_per_second": 0.837,
"step": 432
},
{
"epoch": 32.592592592592595,
"grad_norm": 7.2512526512146,
"learning_rate": 8.547008547008548e-06,
"loss": 0.1663,
"step": 440
},
{
"epoch": 32.96296296296296,
"eval_accuracy": 0.8387096774193549,
"eval_loss": 0.6828835010528564,
"eval_runtime": 2.3483,
"eval_samples_per_second": 26.402,
"eval_steps_per_second": 0.852,
"step": 445
},
{
"epoch": 33.333333333333336,
"grad_norm": 9.678658485412598,
"learning_rate": 7.478632478632479e-06,
"loss": 0.1557,
"step": 450
},
{
"epoch": 34.0,
"eval_accuracy": 0.8387096774193549,
"eval_loss": 0.655702531337738,
"eval_runtime": 3.1084,
"eval_samples_per_second": 19.946,
"eval_steps_per_second": 0.643,
"step": 459
},
{
"epoch": 34.074074074074076,
"grad_norm": 5.886323928833008,
"learning_rate": 6.41025641025641e-06,
"loss": 0.2095,
"step": 460
},
{
"epoch": 34.81481481481482,
"grad_norm": 5.312963485717773,
"learning_rate": 5.341880341880342e-06,
"loss": 0.1913,
"step": 470
},
{
"epoch": 34.96296296296296,
"eval_accuracy": 0.8387096774193549,
"eval_loss": 0.6274862885475159,
"eval_runtime": 3.2878,
"eval_samples_per_second": 18.858,
"eval_steps_per_second": 0.608,
"step": 472
},
{
"epoch": 35.55555555555556,
"grad_norm": 7.064798355102539,
"learning_rate": 4.273504273504274e-06,
"loss": 0.1775,
"step": 480
},
{
"epoch": 36.0,
"eval_accuracy": 0.8548387096774194,
"eval_loss": 0.6554756760597229,
"eval_runtime": 2.4759,
"eval_samples_per_second": 25.041,
"eval_steps_per_second": 0.808,
"step": 486
},
{
"epoch": 36.2962962962963,
"grad_norm": 5.463845729827881,
"learning_rate": 3.205128205128205e-06,
"loss": 0.152,
"step": 490
},
{
"epoch": 36.96296296296296,
"eval_accuracy": 0.8548387096774194,
"eval_loss": 0.6653042435646057,
"eval_runtime": 3.3751,
"eval_samples_per_second": 18.37,
"eval_steps_per_second": 0.593,
"step": 499
},
{
"epoch": 37.03703703703704,
"grad_norm": 5.512512683868408,
"learning_rate": 2.136752136752137e-06,
"loss": 0.1681,
"step": 500
},
{
"epoch": 37.77777777777778,
"grad_norm": 6.535687446594238,
"learning_rate": 1.0683760683760685e-06,
"loss": 0.1897,
"step": 510
},
{
"epoch": 38.0,
"eval_accuracy": 0.8548387096774194,
"eval_loss": 0.6681959629058838,
"eval_runtime": 2.3421,
"eval_samples_per_second": 26.472,
"eval_steps_per_second": 0.854,
"step": 513
},
{
"epoch": 38.51851851851852,
"grad_norm": 8.290581703186035,
"learning_rate": 0.0,
"loss": 0.1589,
"step": 520
},
{
"epoch": 38.51851851851852,
"eval_accuracy": 0.8548387096774194,
"eval_loss": 0.6678970456123352,
"eval_runtime": 2.3455,
"eval_samples_per_second": 26.434,
"eval_steps_per_second": 0.853,
"step": 520
},
{
"epoch": 38.51851851851852,
"step": 520,
"total_flos": 2.140878196703232e+18,
"train_loss": 0.35049390150950505,
"train_runtime": 3356.7171,
"train_samples_per_second": 20.353,
"train_steps_per_second": 0.155
}
],
"logging_steps": 10,
"max_steps": 520,
"num_input_tokens_seen": 0,
"num_train_epochs": 40,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.140878196703232e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}