gruhntm's picture
End of training
0c72413 verified
raw
history blame
9.52 kB
{
"best_metric": 0.9787037037037037,
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-456",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 456,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06578947368421052,
"grad_norm": 6.047317981719971,
"learning_rate": 1.0869565217391305e-05,
"loss": 2.3059,
"step": 10
},
{
"epoch": 0.13157894736842105,
"grad_norm": 5.119813442230225,
"learning_rate": 2.173913043478261e-05,
"loss": 2.0868,
"step": 20
},
{
"epoch": 0.19736842105263158,
"grad_norm": 8.138529777526855,
"learning_rate": 3.260869565217392e-05,
"loss": 1.5345,
"step": 30
},
{
"epoch": 0.2631578947368421,
"grad_norm": 14.628665924072266,
"learning_rate": 4.347826086956522e-05,
"loss": 0.8696,
"step": 40
},
{
"epoch": 0.32894736842105265,
"grad_norm": 14.560131072998047,
"learning_rate": 4.951219512195122e-05,
"loss": 0.6197,
"step": 50
},
{
"epoch": 0.39473684210526316,
"grad_norm": 14.169949531555176,
"learning_rate": 4.829268292682927e-05,
"loss": 0.487,
"step": 60
},
{
"epoch": 0.4605263157894737,
"grad_norm": 15.145587921142578,
"learning_rate": 4.707317073170732e-05,
"loss": 0.5187,
"step": 70
},
{
"epoch": 0.5263157894736842,
"grad_norm": 14.87001895904541,
"learning_rate": 4.585365853658537e-05,
"loss": 0.4016,
"step": 80
},
{
"epoch": 0.5921052631578947,
"grad_norm": 21.998821258544922,
"learning_rate": 4.4634146341463416e-05,
"loss": 0.3696,
"step": 90
},
{
"epoch": 0.6578947368421053,
"grad_norm": 16.311429977416992,
"learning_rate": 4.3414634146341465e-05,
"loss": 0.3486,
"step": 100
},
{
"epoch": 0.7236842105263158,
"grad_norm": 17.85744857788086,
"learning_rate": 4.2195121951219514e-05,
"loss": 0.3221,
"step": 110
},
{
"epoch": 0.7894736842105263,
"grad_norm": 11.210858345031738,
"learning_rate": 4.097560975609756e-05,
"loss": 0.3001,
"step": 120
},
{
"epoch": 0.8552631578947368,
"grad_norm": 14.170101165771484,
"learning_rate": 3.975609756097561e-05,
"loss": 0.2871,
"step": 130
},
{
"epoch": 0.9210526315789473,
"grad_norm": 8.286581993103027,
"learning_rate": 3.853658536585366e-05,
"loss": 0.2954,
"step": 140
},
{
"epoch": 0.9868421052631579,
"grad_norm": 8.435368537902832,
"learning_rate": 3.731707317073171e-05,
"loss": 0.2774,
"step": 150
},
{
"epoch": 1.0,
"eval_accuracy": 0.9578703703703704,
"eval_loss": 0.12819555401802063,
"eval_runtime": 12.8415,
"eval_samples_per_second": 168.205,
"eval_steps_per_second": 5.295,
"step": 152
},
{
"epoch": 1.0526315789473684,
"grad_norm": 11.784485816955566,
"learning_rate": 3.609756097560976e-05,
"loss": 0.2328,
"step": 160
},
{
"epoch": 1.118421052631579,
"grad_norm": 13.228792190551758,
"learning_rate": 3.48780487804878e-05,
"loss": 0.2462,
"step": 170
},
{
"epoch": 1.1842105263157894,
"grad_norm": 10.550958633422852,
"learning_rate": 3.365853658536586e-05,
"loss": 0.2399,
"step": 180
},
{
"epoch": 1.25,
"grad_norm": 5.997585296630859,
"learning_rate": 3.2439024390243906e-05,
"loss": 0.2252,
"step": 190
},
{
"epoch": 1.3157894736842106,
"grad_norm": 8.08625602722168,
"learning_rate": 3.1219512195121955e-05,
"loss": 0.2357,
"step": 200
},
{
"epoch": 1.381578947368421,
"grad_norm": 17.572458267211914,
"learning_rate": 3e-05,
"loss": 0.2258,
"step": 210
},
{
"epoch": 1.4473684210526316,
"grad_norm": 12.296513557434082,
"learning_rate": 2.8780487804878046e-05,
"loss": 0.2645,
"step": 220
},
{
"epoch": 1.513157894736842,
"grad_norm": 8.180034637451172,
"learning_rate": 2.7560975609756102e-05,
"loss": 0.2159,
"step": 230
},
{
"epoch": 1.5789473684210527,
"grad_norm": 12.796661376953125,
"learning_rate": 2.6341463414634148e-05,
"loss": 0.2375,
"step": 240
},
{
"epoch": 1.6447368421052633,
"grad_norm": 10.516425132751465,
"learning_rate": 2.5121951219512197e-05,
"loss": 0.2049,
"step": 250
},
{
"epoch": 1.7105263157894737,
"grad_norm": 6.393877983093262,
"learning_rate": 2.3902439024390243e-05,
"loss": 0.211,
"step": 260
},
{
"epoch": 1.776315789473684,
"grad_norm": 9.698746681213379,
"learning_rate": 2.2682926829268295e-05,
"loss": 0.2134,
"step": 270
},
{
"epoch": 1.8421052631578947,
"grad_norm": 6.169530391693115,
"learning_rate": 2.146341463414634e-05,
"loss": 0.1787,
"step": 280
},
{
"epoch": 1.9078947368421053,
"grad_norm": 9.887014389038086,
"learning_rate": 2.0243902439024393e-05,
"loss": 0.1795,
"step": 290
},
{
"epoch": 1.973684210526316,
"grad_norm": 7.333874225616455,
"learning_rate": 1.902439024390244e-05,
"loss": 0.2199,
"step": 300
},
{
"epoch": 2.0,
"eval_accuracy": 0.975462962962963,
"eval_loss": 0.07434366643428802,
"eval_runtime": 12.8241,
"eval_samples_per_second": 168.433,
"eval_steps_per_second": 5.303,
"step": 304
},
{
"epoch": 2.039473684210526,
"grad_norm": 14.901606559753418,
"learning_rate": 1.7804878048780488e-05,
"loss": 0.1771,
"step": 310
},
{
"epoch": 2.1052631578947367,
"grad_norm": 15.063396453857422,
"learning_rate": 1.6585365853658537e-05,
"loss": 0.1884,
"step": 320
},
{
"epoch": 2.1710526315789473,
"grad_norm": 11.021356582641602,
"learning_rate": 1.5365853658536586e-05,
"loss": 0.1874,
"step": 330
},
{
"epoch": 2.236842105263158,
"grad_norm": 12.141759872436523,
"learning_rate": 1.4146341463414633e-05,
"loss": 0.1737,
"step": 340
},
{
"epoch": 2.3026315789473686,
"grad_norm": 8.017112731933594,
"learning_rate": 1.2926829268292684e-05,
"loss": 0.1793,
"step": 350
},
{
"epoch": 2.3684210526315788,
"grad_norm": 8.373170852661133,
"learning_rate": 1.1707317073170733e-05,
"loss": 0.1863,
"step": 360
},
{
"epoch": 2.4342105263157894,
"grad_norm": 9.354153633117676,
"learning_rate": 1.048780487804878e-05,
"loss": 0.1579,
"step": 370
},
{
"epoch": 2.5,
"grad_norm": 8.749258041381836,
"learning_rate": 9.26829268292683e-06,
"loss": 0.1672,
"step": 380
},
{
"epoch": 2.5657894736842106,
"grad_norm": 8.372227668762207,
"learning_rate": 8.048780487804879e-06,
"loss": 0.1624,
"step": 390
},
{
"epoch": 2.6315789473684212,
"grad_norm": 11.717450141906738,
"learning_rate": 6.829268292682928e-06,
"loss": 0.1826,
"step": 400
},
{
"epoch": 2.6973684210526314,
"grad_norm": 6.175394535064697,
"learning_rate": 5.609756097560976e-06,
"loss": 0.192,
"step": 410
},
{
"epoch": 2.763157894736842,
"grad_norm": 9.798537254333496,
"learning_rate": 4.390243902439024e-06,
"loss": 0.158,
"step": 420
},
{
"epoch": 2.8289473684210527,
"grad_norm": 7.753054141998291,
"learning_rate": 3.1707317073170736e-06,
"loss": 0.1463,
"step": 430
},
{
"epoch": 2.8947368421052633,
"grad_norm": 9.600172996520996,
"learning_rate": 1.951219512195122e-06,
"loss": 0.1926,
"step": 440
},
{
"epoch": 2.9605263157894735,
"grad_norm": 7.151966094970703,
"learning_rate": 7.317073170731708e-07,
"loss": 0.1356,
"step": 450
},
{
"epoch": 3.0,
"eval_accuracy": 0.9787037037037037,
"eval_loss": 0.06794986128807068,
"eval_runtime": 13.1952,
"eval_samples_per_second": 163.695,
"eval_steps_per_second": 5.153,
"step": 456
},
{
"epoch": 3.0,
"step": 456,
"total_flos": 1.4499253204608614e+18,
"train_loss": 0.3743210191789426,
"train_runtime": 869.999,
"train_samples_per_second": 67.035,
"train_steps_per_second": 0.524
}
],
"logging_steps": 10,
"max_steps": 456,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.4499253204608614e+18,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}