resnet-50-finetuned-cifar10 / trainer_state.json
phuong-tk-nguyen's picture
End of training
fd3c79e
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.997867803837953,
"eval_steps": 10,
"global_step": 351,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 1.388888888888889e-05,
"loss": 2.3058,
"step": 10
},
{
"epoch": 0.03,
"eval_accuracy": 0.0794,
"eval_loss": 2.310584306716919,
"eval_runtime": 23.1302,
"eval_samples_per_second": 216.168,
"eval_steps_per_second": 6.788,
"step": 10
},
{
"epoch": 0.06,
"learning_rate": 2.777777777777778e-05,
"loss": 2.3033,
"step": 20
},
{
"epoch": 0.06,
"eval_accuracy": 0.0892,
"eval_loss": 2.302612066268921,
"eval_runtime": 20.5744,
"eval_samples_per_second": 243.02,
"eval_steps_per_second": 7.631,
"step": 20
},
{
"epoch": 0.09,
"learning_rate": 4.166666666666667e-05,
"loss": 2.3012,
"step": 30
},
{
"epoch": 0.09,
"eval_accuracy": 0.1042,
"eval_loss": 2.2971391677856445,
"eval_runtime": 19.7054,
"eval_samples_per_second": 253.737,
"eval_steps_per_second": 7.967,
"step": 30
},
{
"epoch": 0.11,
"learning_rate": 4.936507936507937e-05,
"loss": 2.2914,
"step": 40
},
{
"epoch": 0.11,
"eval_accuracy": 0.1254,
"eval_loss": 2.2889511585235596,
"eval_runtime": 20.5875,
"eval_samples_per_second": 242.866,
"eval_steps_per_second": 7.626,
"step": 40
},
{
"epoch": 0.14,
"learning_rate": 4.7777777777777784e-05,
"loss": 2.2869,
"step": 50
},
{
"epoch": 0.14,
"eval_accuracy": 0.16,
"eval_loss": 2.281619071960449,
"eval_runtime": 20.7333,
"eval_samples_per_second": 241.158,
"eval_steps_per_second": 7.572,
"step": 50
},
{
"epoch": 0.17,
"learning_rate": 4.6190476190476194e-05,
"loss": 2.2785,
"step": 60
},
{
"epoch": 0.17,
"eval_accuracy": 0.1902,
"eval_loss": 2.269974708557129,
"eval_runtime": 20.4734,
"eval_samples_per_second": 244.22,
"eval_steps_per_second": 7.669,
"step": 60
},
{
"epoch": 0.2,
"learning_rate": 4.460317460317461e-05,
"loss": 2.2712,
"step": 70
},
{
"epoch": 0.2,
"eval_accuracy": 0.2354,
"eval_loss": 2.260219097137451,
"eval_runtime": 19.9116,
"eval_samples_per_second": 251.11,
"eval_steps_per_second": 7.885,
"step": 70
},
{
"epoch": 0.23,
"learning_rate": 4.301587301587302e-05,
"loss": 2.2619,
"step": 80
},
{
"epoch": 0.23,
"eval_accuracy": 0.2688,
"eval_loss": 2.250117063522339,
"eval_runtime": 21.2411,
"eval_samples_per_second": 235.392,
"eval_steps_per_second": 7.391,
"step": 80
},
{
"epoch": 0.26,
"learning_rate": 4.1428571428571437e-05,
"loss": 2.2509,
"step": 90
},
{
"epoch": 0.26,
"eval_accuracy": 0.3022,
"eval_loss": 2.2383360862731934,
"eval_runtime": 20.8445,
"eval_samples_per_second": 239.872,
"eval_steps_per_second": 7.532,
"step": 90
},
{
"epoch": 0.28,
"learning_rate": 3.984126984126984e-05,
"loss": 2.2382,
"step": 100
},
{
"epoch": 0.28,
"eval_accuracy": 0.3268,
"eval_loss": 2.222919225692749,
"eval_runtime": 19.2819,
"eval_samples_per_second": 259.311,
"eval_steps_per_second": 8.142,
"step": 100
},
{
"epoch": 0.31,
"learning_rate": 3.8253968253968256e-05,
"loss": 2.2255,
"step": 110
},
{
"epoch": 0.31,
"eval_accuracy": 0.353,
"eval_loss": 2.2083821296691895,
"eval_runtime": 23.1248,
"eval_samples_per_second": 216.218,
"eval_steps_per_second": 6.789,
"step": 110
},
{
"epoch": 0.34,
"learning_rate": 3.6666666666666666e-05,
"loss": 2.2164,
"step": 120
},
{
"epoch": 0.34,
"eval_accuracy": 0.3608,
"eval_loss": 2.1939358711242676,
"eval_runtime": 25.8088,
"eval_samples_per_second": 193.732,
"eval_steps_per_second": 6.083,
"step": 120
},
{
"epoch": 0.37,
"learning_rate": 3.5079365079365075e-05,
"loss": 2.2028,
"step": 130
},
{
"epoch": 0.37,
"eval_accuracy": 0.3668,
"eval_loss": 2.182861804962158,
"eval_runtime": 25.579,
"eval_samples_per_second": 195.473,
"eval_steps_per_second": 6.138,
"step": 130
},
{
"epoch": 0.4,
"learning_rate": 3.349206349206349e-05,
"loss": 2.1977,
"step": 140
},
{
"epoch": 0.4,
"eval_accuracy": 0.401,
"eval_loss": 2.164577007293701,
"eval_runtime": 25.165,
"eval_samples_per_second": 198.688,
"eval_steps_per_second": 6.239,
"step": 140
},
{
"epoch": 0.43,
"learning_rate": 3.19047619047619e-05,
"loss": 2.1844,
"step": 150
},
{
"epoch": 0.43,
"eval_accuracy": 0.4244,
"eval_loss": 2.144054651260376,
"eval_runtime": 24.2393,
"eval_samples_per_second": 206.277,
"eval_steps_per_second": 6.477,
"step": 150
},
{
"epoch": 0.45,
"learning_rate": 3.0317460317460318e-05,
"loss": 2.1689,
"step": 160
},
{
"epoch": 0.45,
"eval_accuracy": 0.437,
"eval_loss": 2.1322500705718994,
"eval_runtime": 25.4332,
"eval_samples_per_second": 196.593,
"eval_steps_per_second": 6.173,
"step": 160
},
{
"epoch": 0.48,
"learning_rate": 2.8730158730158728e-05,
"loss": 2.1555,
"step": 170
},
{
"epoch": 0.48,
"eval_accuracy": 0.4462,
"eval_loss": 2.1159207820892334,
"eval_runtime": 25.3372,
"eval_samples_per_second": 197.338,
"eval_steps_per_second": 6.196,
"step": 170
},
{
"epoch": 0.51,
"learning_rate": 2.714285714285714e-05,
"loss": 2.1448,
"step": 180
},
{
"epoch": 0.51,
"eval_accuracy": 0.45,
"eval_loss": 2.0992112159729004,
"eval_runtime": 25.0688,
"eval_samples_per_second": 199.451,
"eval_steps_per_second": 6.263,
"step": 180
},
{
"epoch": 0.54,
"learning_rate": 2.5555555555555554e-05,
"loss": 2.1313,
"step": 190
},
{
"epoch": 0.54,
"eval_accuracy": 0.4642,
"eval_loss": 2.080961227416992,
"eval_runtime": 25.2129,
"eval_samples_per_second": 198.311,
"eval_steps_per_second": 6.227,
"step": 190
},
{
"epoch": 0.57,
"learning_rate": 2.396825396825397e-05,
"loss": 2.1189,
"step": 200
},
{
"epoch": 0.57,
"eval_accuracy": 0.4708,
"eval_loss": 2.0589163303375244,
"eval_runtime": 25.6278,
"eval_samples_per_second": 195.101,
"eval_steps_per_second": 6.126,
"step": 200
},
{
"epoch": 0.6,
"learning_rate": 2.2380952380952384e-05,
"loss": 2.1111,
"step": 210
},
{
"epoch": 0.6,
"eval_accuracy": 0.4828,
"eval_loss": 2.0430362224578857,
"eval_runtime": 19.2415,
"eval_samples_per_second": 259.854,
"eval_steps_per_second": 8.159,
"step": 210
},
{
"epoch": 0.63,
"learning_rate": 2.0793650793650797e-05,
"loss": 2.0905,
"step": 220
},
{
"epoch": 0.63,
"eval_accuracy": 0.4938,
"eval_loss": 2.028820753097534,
"eval_runtime": 20.6864,
"eval_samples_per_second": 241.705,
"eval_steps_per_second": 7.59,
"step": 220
},
{
"epoch": 0.65,
"learning_rate": 1.920634920634921e-05,
"loss": 2.082,
"step": 230
},
{
"epoch": 0.65,
"eval_accuracy": 0.4938,
"eval_loss": 2.008862257003784,
"eval_runtime": 20.028,
"eval_samples_per_second": 249.65,
"eval_steps_per_second": 7.839,
"step": 230
},
{
"epoch": 0.68,
"learning_rate": 1.761904761904762e-05,
"loss": 2.0646,
"step": 240
},
{
"epoch": 0.68,
"eval_accuracy": 0.5014,
"eval_loss": 1.9969898462295532,
"eval_runtime": 20.7247,
"eval_samples_per_second": 241.258,
"eval_steps_per_second": 7.576,
"step": 240
},
{
"epoch": 0.71,
"learning_rate": 1.6031746031746033e-05,
"loss": 2.0636,
"step": 250
},
{
"epoch": 0.71,
"eval_accuracy": 0.4946,
"eval_loss": 1.9777544736862183,
"eval_runtime": 18.9902,
"eval_samples_per_second": 263.294,
"eval_steps_per_second": 8.267,
"step": 250
},
{
"epoch": 0.74,
"learning_rate": 1.4444444444444444e-05,
"loss": 2.0579,
"step": 260
},
{
"epoch": 0.74,
"eval_accuracy": 0.49,
"eval_loss": 1.9608845710754395,
"eval_runtime": 18.9333,
"eval_samples_per_second": 264.084,
"eval_steps_per_second": 8.292,
"step": 260
},
{
"epoch": 0.77,
"learning_rate": 1.2857142857142857e-05,
"loss": 2.028,
"step": 270
},
{
"epoch": 0.77,
"eval_accuracy": 0.4862,
"eval_loss": 1.960185170173645,
"eval_runtime": 18.9535,
"eval_samples_per_second": 263.804,
"eval_steps_per_second": 8.283,
"step": 270
},
{
"epoch": 0.8,
"learning_rate": 1.126984126984127e-05,
"loss": 2.0447,
"step": 280
},
{
"epoch": 0.8,
"eval_accuracy": 0.4934,
"eval_loss": 1.9459648132324219,
"eval_runtime": 18.8589,
"eval_samples_per_second": 265.126,
"eval_steps_per_second": 8.325,
"step": 280
},
{
"epoch": 0.82,
"learning_rate": 9.682539682539683e-06,
"loss": 2.0168,
"step": 290
},
{
"epoch": 0.82,
"eval_accuracy": 0.505,
"eval_loss": 1.9368737936019897,
"eval_runtime": 19.0033,
"eval_samples_per_second": 263.112,
"eval_steps_per_second": 8.262,
"step": 290
},
{
"epoch": 0.85,
"learning_rate": 8.095238095238097e-06,
"loss": 2.0126,
"step": 300
},
{
"epoch": 0.85,
"eval_accuracy": 0.4926,
"eval_loss": 1.931652545928955,
"eval_runtime": 19.1274,
"eval_samples_per_second": 261.406,
"eval_steps_per_second": 8.208,
"step": 300
},
{
"epoch": 0.88,
"learning_rate": 6.507936507936509e-06,
"loss": 2.0099,
"step": 310
},
{
"epoch": 0.88,
"eval_accuracy": 0.4952,
"eval_loss": 1.9234933853149414,
"eval_runtime": 19.032,
"eval_samples_per_second": 262.715,
"eval_steps_per_second": 8.249,
"step": 310
},
{
"epoch": 0.91,
"learning_rate": 4.920634920634921e-06,
"loss": 1.9978,
"step": 320
},
{
"epoch": 0.91,
"eval_accuracy": 0.4972,
"eval_loss": 1.9174150228500366,
"eval_runtime": 18.9404,
"eval_samples_per_second": 263.986,
"eval_steps_per_second": 8.289,
"step": 320
},
{
"epoch": 0.94,
"learning_rate": 3.3333333333333333e-06,
"loss": 1.9951,
"step": 330
},
{
"epoch": 0.94,
"eval_accuracy": 0.507,
"eval_loss": 1.9119243621826172,
"eval_runtime": 18.9559,
"eval_samples_per_second": 263.769,
"eval_steps_per_second": 8.282,
"step": 330
},
{
"epoch": 0.97,
"learning_rate": 1.7460317460317462e-06,
"loss": 1.9823,
"step": 340
},
{
"epoch": 0.97,
"eval_accuracy": 0.4992,
"eval_loss": 1.9119775295257568,
"eval_runtime": 19.001,
"eval_samples_per_second": 263.143,
"eval_steps_per_second": 8.263,
"step": 340
},
{
"epoch": 1.0,
"learning_rate": 1.5873015873015874e-07,
"loss": 1.985,
"step": 350
},
{
"epoch": 1.0,
"eval_accuracy": 0.5022,
"eval_loss": 1.9064103364944458,
"eval_runtime": 19.1234,
"eval_samples_per_second": 261.459,
"eval_steps_per_second": 8.21,
"step": 350
},
{
"epoch": 1.0,
"step": 351,
"total_flos": 9.547293521089659e+17,
"train_loss": 2.144495400947723,
"train_runtime": 1271.552,
"train_samples_per_second": 35.39,
"train_steps_per_second": 0.276
}
],
"logging_steps": 10,
"max_steps": 351,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 9.547293521089659e+17,
"trial_name": null,
"trial_params": null
}