Niraya666's picture
End of training
6f739ef
{
"best_metric": 0.7,
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-ADC-4cls-0922/checkpoint-122",
"epoch": 200.0,
"eval_steps": 500,
"global_step": 400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9655490517616272,
"eval_runtime": 0.8298,
"eval_samples_per_second": 84.356,
"eval_steps_per_second": 2.41,
"step": 2
},
{
"epoch": 2.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.9653854370117188,
"eval_runtime": 0.6383,
"eval_samples_per_second": 109.671,
"eval_steps_per_second": 3.133,
"step": 4
},
{
"epoch": 3.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.9650949835777283,
"eval_runtime": 0.6412,
"eval_samples_per_second": 109.167,
"eval_steps_per_second": 3.119,
"step": 6
},
{
"epoch": 4.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.9646532535552979,
"eval_runtime": 0.8218,
"eval_samples_per_second": 85.18,
"eval_steps_per_second": 2.434,
"step": 8
},
{
"epoch": 5.0,
"learning_rate": 1.25e-05,
"loss": 1.0064,
"step": 10
},
{
"epoch": 5.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.9641380310058594,
"eval_runtime": 0.6452,
"eval_samples_per_second": 108.491,
"eval_steps_per_second": 3.1,
"step": 10
},
{
"epoch": 6.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.9635317921638489,
"eval_runtime": 0.6347,
"eval_samples_per_second": 110.284,
"eval_steps_per_second": 3.151,
"step": 12
},
{
"epoch": 7.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.9628700017929077,
"eval_runtime": 0.8273,
"eval_samples_per_second": 84.611,
"eval_steps_per_second": 2.417,
"step": 14
},
{
"epoch": 8.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.9623274803161621,
"eval_runtime": 0.6551,
"eval_samples_per_second": 106.859,
"eval_steps_per_second": 3.053,
"step": 16
},
{
"epoch": 9.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.9616996645927429,
"eval_runtime": 0.646,
"eval_samples_per_second": 108.363,
"eval_steps_per_second": 3.096,
"step": 18
},
{
"epoch": 10.0,
"learning_rate": 2.5e-05,
"loss": 0.9821,
"step": 20
},
{
"epoch": 10.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.9611372947692871,
"eval_runtime": 0.8313,
"eval_samples_per_second": 84.202,
"eval_steps_per_second": 2.406,
"step": 20
},
{
"epoch": 11.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.9607454538345337,
"eval_runtime": 0.8335,
"eval_samples_per_second": 83.985,
"eval_steps_per_second": 2.4,
"step": 22
},
{
"epoch": 12.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9604489207267761,
"eval_runtime": 0.8194,
"eval_samples_per_second": 85.429,
"eval_steps_per_second": 2.441,
"step": 24
},
{
"epoch": 13.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9601203799247742,
"eval_runtime": 0.8211,
"eval_samples_per_second": 85.256,
"eval_steps_per_second": 2.436,
"step": 26
},
{
"epoch": 14.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9597390294075012,
"eval_runtime": 0.6563,
"eval_samples_per_second": 106.663,
"eval_steps_per_second": 3.048,
"step": 28
},
{
"epoch": 15.0,
"learning_rate": 3.7500000000000003e-05,
"loss": 1.0278,
"step": 30
},
{
"epoch": 15.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9591529965400696,
"eval_runtime": 0.6495,
"eval_samples_per_second": 107.778,
"eval_steps_per_second": 3.079,
"step": 30
},
{
"epoch": 16.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9581246376037598,
"eval_runtime": 0.791,
"eval_samples_per_second": 88.495,
"eval_steps_per_second": 2.528,
"step": 32
},
{
"epoch": 17.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9566996097564697,
"eval_runtime": 0.6461,
"eval_samples_per_second": 108.347,
"eval_steps_per_second": 3.096,
"step": 34
},
{
"epoch": 18.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9551236629486084,
"eval_runtime": 0.6456,
"eval_samples_per_second": 108.429,
"eval_steps_per_second": 3.098,
"step": 36
},
{
"epoch": 19.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9534342288970947,
"eval_runtime": 0.8038,
"eval_samples_per_second": 87.083,
"eval_steps_per_second": 2.488,
"step": 38
},
{
"epoch": 20.0,
"learning_rate": 5e-05,
"loss": 0.9986,
"step": 40
},
{
"epoch": 20.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.9513913989067078,
"eval_runtime": 0.6423,
"eval_samples_per_second": 108.98,
"eval_steps_per_second": 3.114,
"step": 40
},
{
"epoch": 21.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.9493252635002136,
"eval_runtime": 0.6401,
"eval_samples_per_second": 109.357,
"eval_steps_per_second": 3.124,
"step": 42
},
{
"epoch": 22.0,
"eval_accuracy": 0.6428571428571429,
"eval_loss": 0.9471749663352966,
"eval_runtime": 0.7957,
"eval_samples_per_second": 87.97,
"eval_steps_per_second": 2.513,
"step": 44
},
{
"epoch": 23.0,
"eval_accuracy": 0.6428571428571429,
"eval_loss": 0.9451875686645508,
"eval_runtime": 0.6379,
"eval_samples_per_second": 109.728,
"eval_steps_per_second": 3.135,
"step": 46
},
{
"epoch": 24.0,
"eval_accuracy": 0.6428571428571429,
"eval_loss": 0.943417489528656,
"eval_runtime": 0.6466,
"eval_samples_per_second": 108.259,
"eval_steps_per_second": 3.093,
"step": 48
},
{
"epoch": 25.0,
"learning_rate": 6.25e-05,
"loss": 0.9973,
"step": 50
},
{
"epoch": 25.0,
"eval_accuracy": 0.6428571428571429,
"eval_loss": 0.9419717788696289,
"eval_runtime": 0.8115,
"eval_samples_per_second": 86.264,
"eval_steps_per_second": 2.465,
"step": 50
},
{
"epoch": 26.0,
"eval_accuracy": 0.6428571428571429,
"eval_loss": 0.9404588937759399,
"eval_runtime": 0.6332,
"eval_samples_per_second": 110.551,
"eval_steps_per_second": 3.159,
"step": 52
},
{
"epoch": 27.0,
"eval_accuracy": 0.6285714285714286,
"eval_loss": 0.9387302994728088,
"eval_runtime": 0.64,
"eval_samples_per_second": 109.375,
"eval_steps_per_second": 3.125,
"step": 54
},
{
"epoch": 28.0,
"eval_accuracy": 0.6285714285714286,
"eval_loss": 0.9375677704811096,
"eval_runtime": 0.8312,
"eval_samples_per_second": 84.219,
"eval_steps_per_second": 2.406,
"step": 56
},
{
"epoch": 29.0,
"eval_accuracy": 0.6428571428571429,
"eval_loss": 0.9368333220481873,
"eval_runtime": 0.6385,
"eval_samples_per_second": 109.629,
"eval_steps_per_second": 3.132,
"step": 58
},
{
"epoch": 30.0,
"learning_rate": 7.500000000000001e-05,
"loss": 0.9936,
"step": 60
},
{
"epoch": 30.0,
"eval_accuracy": 0.6428571428571429,
"eval_loss": 0.9361710548400879,
"eval_runtime": 0.6573,
"eval_samples_per_second": 106.497,
"eval_steps_per_second": 3.043,
"step": 60
},
{
"epoch": 31.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.9361298680305481,
"eval_runtime": 0.7944,
"eval_samples_per_second": 88.115,
"eval_steps_per_second": 2.518,
"step": 62
},
{
"epoch": 32.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9364449381828308,
"eval_runtime": 0.6554,
"eval_samples_per_second": 106.808,
"eval_steps_per_second": 3.052,
"step": 64
},
{
"epoch": 33.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9371016621589661,
"eval_runtime": 0.6483,
"eval_samples_per_second": 107.97,
"eval_steps_per_second": 3.085,
"step": 66
},
{
"epoch": 34.0,
"eval_accuracy": 0.6428571428571429,
"eval_loss": 0.9379546046257019,
"eval_runtime": 0.8119,
"eval_samples_per_second": 86.219,
"eval_steps_per_second": 2.463,
"step": 68
},
{
"epoch": 35.0,
"learning_rate": 8.75e-05,
"loss": 0.9746,
"step": 70
},
{
"epoch": 35.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.9379692077636719,
"eval_runtime": 0.6362,
"eval_samples_per_second": 110.031,
"eval_steps_per_second": 3.144,
"step": 70
},
{
"epoch": 36.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9374780654907227,
"eval_runtime": 0.639,
"eval_samples_per_second": 109.543,
"eval_steps_per_second": 3.13,
"step": 72
},
{
"epoch": 37.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9379698634147644,
"eval_runtime": 0.8343,
"eval_samples_per_second": 83.899,
"eval_steps_per_second": 2.397,
"step": 74
},
{
"epoch": 38.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9375231862068176,
"eval_runtime": 0.6395,
"eval_samples_per_second": 109.457,
"eval_steps_per_second": 3.127,
"step": 76
},
{
"epoch": 39.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9369739890098572,
"eval_runtime": 0.6333,
"eval_samples_per_second": 110.536,
"eval_steps_per_second": 3.158,
"step": 78
},
{
"epoch": 40.0,
"learning_rate": 0.0001,
"loss": 1.0113,
"step": 80
},
{
"epoch": 40.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9361743330955505,
"eval_runtime": 0.7993,
"eval_samples_per_second": 87.579,
"eval_steps_per_second": 2.502,
"step": 80
},
{
"epoch": 41.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9340663552284241,
"eval_runtime": 0.6461,
"eval_samples_per_second": 108.348,
"eval_steps_per_second": 3.096,
"step": 82
},
{
"epoch": 42.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.9300563335418701,
"eval_runtime": 0.636,
"eval_samples_per_second": 110.058,
"eval_steps_per_second": 3.145,
"step": 84
},
{
"epoch": 43.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9259787201881409,
"eval_runtime": 0.8154,
"eval_samples_per_second": 85.845,
"eval_steps_per_second": 2.453,
"step": 86
},
{
"epoch": 44.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.9224489331245422,
"eval_runtime": 0.6369,
"eval_samples_per_second": 109.903,
"eval_steps_per_second": 3.14,
"step": 88
},
{
"epoch": 45.0,
"learning_rate": 9.687500000000001e-05,
"loss": 0.9756,
"step": 90
},
{
"epoch": 45.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9190067648887634,
"eval_runtime": 0.6388,
"eval_samples_per_second": 109.577,
"eval_steps_per_second": 3.131,
"step": 90
},
{
"epoch": 46.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9154108166694641,
"eval_runtime": 0.7966,
"eval_samples_per_second": 87.873,
"eval_steps_per_second": 2.511,
"step": 92
},
{
"epoch": 47.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.912346363067627,
"eval_runtime": 0.6406,
"eval_samples_per_second": 109.268,
"eval_steps_per_second": 3.122,
"step": 94
},
{
"epoch": 48.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.9091367721557617,
"eval_runtime": 0.6398,
"eval_samples_per_second": 109.41,
"eval_steps_per_second": 3.126,
"step": 96
},
{
"epoch": 49.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.9070726037025452,
"eval_runtime": 0.8188,
"eval_samples_per_second": 85.488,
"eval_steps_per_second": 2.443,
"step": 98
},
{
"epoch": 50.0,
"learning_rate": 9.375e-05,
"loss": 0.9721,
"step": 100
},
{
"epoch": 50.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.9055730700492859,
"eval_runtime": 0.6361,
"eval_samples_per_second": 110.054,
"eval_steps_per_second": 3.144,
"step": 100
},
{
"epoch": 51.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.9046576619148254,
"eval_runtime": 0.6407,
"eval_samples_per_second": 109.252,
"eval_steps_per_second": 3.121,
"step": 102
},
{
"epoch": 52.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.9038794636726379,
"eval_runtime": 0.8178,
"eval_samples_per_second": 85.592,
"eval_steps_per_second": 2.445,
"step": 104
},
{
"epoch": 53.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9030665755271912,
"eval_runtime": 0.6283,
"eval_samples_per_second": 111.419,
"eval_steps_per_second": 3.183,
"step": 106
},
{
"epoch": 54.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.902490496635437,
"eval_runtime": 0.8366,
"eval_samples_per_second": 83.669,
"eval_steps_per_second": 2.391,
"step": 108
},
{
"epoch": 55.0,
"learning_rate": 9.062500000000001e-05,
"loss": 0.9698,
"step": 110
},
{
"epoch": 55.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.902264416217804,
"eval_runtime": 0.9891,
"eval_samples_per_second": 70.774,
"eval_steps_per_second": 2.022,
"step": 110
},
{
"epoch": 56.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.9011555314064026,
"eval_runtime": 0.6498,
"eval_samples_per_second": 107.729,
"eval_steps_per_second": 3.078,
"step": 112
},
{
"epoch": 57.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8996686935424805,
"eval_runtime": 0.8289,
"eval_samples_per_second": 84.447,
"eval_steps_per_second": 2.413,
"step": 114
},
{
"epoch": 58.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8982025980949402,
"eval_runtime": 0.6375,
"eval_samples_per_second": 109.798,
"eval_steps_per_second": 3.137,
"step": 116
},
{
"epoch": 59.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8969982266426086,
"eval_runtime": 0.6483,
"eval_samples_per_second": 107.97,
"eval_steps_per_second": 3.085,
"step": 118
},
{
"epoch": 60.0,
"learning_rate": 8.75e-05,
"loss": 0.9341,
"step": 120
},
{
"epoch": 60.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8956836462020874,
"eval_runtime": 0.8303,
"eval_samples_per_second": 84.307,
"eval_steps_per_second": 2.409,
"step": 120
},
{
"epoch": 61.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8946982622146606,
"eval_runtime": 0.6483,
"eval_samples_per_second": 107.981,
"eval_steps_per_second": 3.085,
"step": 122
},
{
"epoch": 62.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8940390348434448,
"eval_runtime": 0.6421,
"eval_samples_per_second": 109.023,
"eval_steps_per_second": 3.115,
"step": 124
},
{
"epoch": 63.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8940520286560059,
"eval_runtime": 0.8356,
"eval_samples_per_second": 83.773,
"eval_steps_per_second": 2.394,
"step": 126
},
{
"epoch": 64.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8934383988380432,
"eval_runtime": 0.6317,
"eval_samples_per_second": 110.812,
"eval_steps_per_second": 3.166,
"step": 128
},
{
"epoch": 65.0,
"learning_rate": 8.4375e-05,
"loss": 0.9717,
"step": 130
},
{
"epoch": 65.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8916982412338257,
"eval_runtime": 0.6456,
"eval_samples_per_second": 108.418,
"eval_steps_per_second": 3.098,
"step": 130
},
{
"epoch": 66.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8898113369941711,
"eval_runtime": 0.8145,
"eval_samples_per_second": 85.937,
"eval_steps_per_second": 2.455,
"step": 132
},
{
"epoch": 67.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8883917927742004,
"eval_runtime": 0.6387,
"eval_samples_per_second": 109.599,
"eval_steps_per_second": 3.131,
"step": 134
},
{
"epoch": 68.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8869962692260742,
"eval_runtime": 0.6406,
"eval_samples_per_second": 109.266,
"eval_steps_per_second": 3.122,
"step": 136
},
{
"epoch": 69.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8853691816329956,
"eval_runtime": 0.8216,
"eval_samples_per_second": 85.2,
"eval_steps_per_second": 2.434,
"step": 138
},
{
"epoch": 70.0,
"learning_rate": 8.125000000000001e-05,
"loss": 0.9655,
"step": 140
},
{
"epoch": 70.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8840075731277466,
"eval_runtime": 0.6378,
"eval_samples_per_second": 109.751,
"eval_steps_per_second": 3.136,
"step": 140
},
{
"epoch": 71.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8826519250869751,
"eval_runtime": 0.6384,
"eval_samples_per_second": 109.644,
"eval_steps_per_second": 3.133,
"step": 142
},
{
"epoch": 72.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8813565373420715,
"eval_runtime": 0.8402,
"eval_samples_per_second": 83.313,
"eval_steps_per_second": 2.38,
"step": 144
},
{
"epoch": 73.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8805155754089355,
"eval_runtime": 0.6428,
"eval_samples_per_second": 108.905,
"eval_steps_per_second": 3.112,
"step": 146
},
{
"epoch": 74.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8803040385246277,
"eval_runtime": 0.649,
"eval_samples_per_second": 107.857,
"eval_steps_per_second": 3.082,
"step": 148
},
{
"epoch": 75.0,
"learning_rate": 7.8125e-05,
"loss": 0.9458,
"step": 150
},
{
"epoch": 75.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8801725506782532,
"eval_runtime": 0.82,
"eval_samples_per_second": 85.365,
"eval_steps_per_second": 2.439,
"step": 150
},
{
"epoch": 76.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8797475695610046,
"eval_runtime": 0.6476,
"eval_samples_per_second": 108.085,
"eval_steps_per_second": 3.088,
"step": 152
},
{
"epoch": 77.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8793725967407227,
"eval_runtime": 0.6468,
"eval_samples_per_second": 108.22,
"eval_steps_per_second": 3.092,
"step": 154
},
{
"epoch": 78.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8795827031135559,
"eval_runtime": 0.8346,
"eval_samples_per_second": 83.873,
"eval_steps_per_second": 2.396,
"step": 156
},
{
"epoch": 79.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8807878494262695,
"eval_runtime": 0.6453,
"eval_samples_per_second": 108.479,
"eval_steps_per_second": 3.099,
"step": 158
},
{
"epoch": 80.0,
"learning_rate": 7.500000000000001e-05,
"loss": 0.9094,
"step": 160
},
{
"epoch": 80.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8817013502120972,
"eval_runtime": 0.6393,
"eval_samples_per_second": 109.492,
"eval_steps_per_second": 3.128,
"step": 160
},
{
"epoch": 81.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8828238844871521,
"eval_runtime": 0.8346,
"eval_samples_per_second": 83.868,
"eval_steps_per_second": 2.396,
"step": 162
},
{
"epoch": 82.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8835611939430237,
"eval_runtime": 0.636,
"eval_samples_per_second": 110.07,
"eval_steps_per_second": 3.145,
"step": 164
},
{
"epoch": 83.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8830356001853943,
"eval_runtime": 0.6535,
"eval_samples_per_second": 107.117,
"eval_steps_per_second": 3.06,
"step": 166
},
{
"epoch": 84.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.8820751905441284,
"eval_runtime": 0.8384,
"eval_samples_per_second": 83.495,
"eval_steps_per_second": 2.386,
"step": 168
},
{
"epoch": 85.0,
"learning_rate": 7.1875e-05,
"loss": 0.8719,
"step": 170
},
{
"epoch": 85.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.8812506794929504,
"eval_runtime": 0.6519,
"eval_samples_per_second": 107.372,
"eval_steps_per_second": 3.068,
"step": 170
},
{
"epoch": 86.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8804309368133545,
"eval_runtime": 0.6326,
"eval_samples_per_second": 110.652,
"eval_steps_per_second": 3.161,
"step": 172
},
{
"epoch": 87.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.8798118829727173,
"eval_runtime": 0.8338,
"eval_samples_per_second": 83.95,
"eval_steps_per_second": 2.399,
"step": 174
},
{
"epoch": 88.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.8787184953689575,
"eval_runtime": 0.64,
"eval_samples_per_second": 109.38,
"eval_steps_per_second": 3.125,
"step": 176
},
{
"epoch": 89.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 0.8769770264625549,
"eval_runtime": 0.6382,
"eval_samples_per_second": 109.679,
"eval_steps_per_second": 3.134,
"step": 178
},
{
"epoch": 90.0,
"learning_rate": 6.875e-05,
"loss": 0.9288,
"step": 180
},
{
"epoch": 90.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8752025961875916,
"eval_runtime": 0.8649,
"eval_samples_per_second": 80.934,
"eval_steps_per_second": 2.312,
"step": 180
},
{
"epoch": 91.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8721939921379089,
"eval_runtime": 0.6536,
"eval_samples_per_second": 107.101,
"eval_steps_per_second": 3.06,
"step": 182
},
{
"epoch": 92.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8693682551383972,
"eval_runtime": 0.6434,
"eval_samples_per_second": 108.799,
"eval_steps_per_second": 3.109,
"step": 184
},
{
"epoch": 93.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8670406937599182,
"eval_runtime": 0.8337,
"eval_samples_per_second": 83.963,
"eval_steps_per_second": 2.399,
"step": 186
},
{
"epoch": 94.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8644655346870422,
"eval_runtime": 0.6432,
"eval_samples_per_second": 108.826,
"eval_steps_per_second": 3.109,
"step": 188
},
{
"epoch": 95.0,
"learning_rate": 6.562500000000001e-05,
"loss": 0.9039,
"step": 190
},
{
"epoch": 95.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8624207973480225,
"eval_runtime": 0.6482,
"eval_samples_per_second": 107.999,
"eval_steps_per_second": 3.086,
"step": 190
},
{
"epoch": 96.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8603058457374573,
"eval_runtime": 0.8409,
"eval_samples_per_second": 83.249,
"eval_steps_per_second": 2.379,
"step": 192
},
{
"epoch": 97.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8583868741989136,
"eval_runtime": 0.6484,
"eval_samples_per_second": 107.951,
"eval_steps_per_second": 3.084,
"step": 194
},
{
"epoch": 98.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8566268086433411,
"eval_runtime": 0.6949,
"eval_samples_per_second": 100.728,
"eval_steps_per_second": 2.878,
"step": 196
},
{
"epoch": 99.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8553413152694702,
"eval_runtime": 0.8276,
"eval_samples_per_second": 84.585,
"eval_steps_per_second": 2.417,
"step": 198
},
{
"epoch": 100.0,
"learning_rate": 6.25e-05,
"loss": 0.9081,
"step": 200
},
{
"epoch": 100.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8549684286117554,
"eval_runtime": 0.6594,
"eval_samples_per_second": 106.164,
"eval_steps_per_second": 3.033,
"step": 200
},
{
"epoch": 101.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8551309108734131,
"eval_runtime": 0.6588,
"eval_samples_per_second": 106.255,
"eval_steps_per_second": 3.036,
"step": 202
},
{
"epoch": 102.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8556391000747681,
"eval_runtime": 0.8474,
"eval_samples_per_second": 82.605,
"eval_steps_per_second": 2.36,
"step": 204
},
{
"epoch": 103.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8558002710342407,
"eval_runtime": 0.6568,
"eval_samples_per_second": 106.577,
"eval_steps_per_second": 3.045,
"step": 206
},
{
"epoch": 104.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8554455637931824,
"eval_runtime": 0.6448,
"eval_samples_per_second": 108.569,
"eval_steps_per_second": 3.102,
"step": 208
},
{
"epoch": 105.0,
"learning_rate": 5.9375e-05,
"loss": 0.9142,
"step": 210
},
{
"epoch": 105.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8551297783851624,
"eval_runtime": 0.8226,
"eval_samples_per_second": 85.093,
"eval_steps_per_second": 2.431,
"step": 210
},
{
"epoch": 106.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8553109169006348,
"eval_runtime": 0.6501,
"eval_samples_per_second": 107.668,
"eval_steps_per_second": 3.076,
"step": 212
},
{
"epoch": 107.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.855134904384613,
"eval_runtime": 0.637,
"eval_samples_per_second": 109.882,
"eval_steps_per_second": 3.139,
"step": 214
},
{
"epoch": 108.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8549013137817383,
"eval_runtime": 0.8378,
"eval_samples_per_second": 83.557,
"eval_steps_per_second": 2.387,
"step": 216
},
{
"epoch": 109.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.854942798614502,
"eval_runtime": 0.6596,
"eval_samples_per_second": 106.131,
"eval_steps_per_second": 3.032,
"step": 218
},
{
"epoch": 110.0,
"learning_rate": 5.6250000000000005e-05,
"loss": 0.9347,
"step": 220
},
{
"epoch": 110.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8551362752914429,
"eval_runtime": 0.6674,
"eval_samples_per_second": 104.886,
"eval_steps_per_second": 2.997,
"step": 220
},
{
"epoch": 111.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8553721308708191,
"eval_runtime": 0.8336,
"eval_samples_per_second": 83.974,
"eval_steps_per_second": 2.399,
"step": 222
},
{
"epoch": 112.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8548364639282227,
"eval_runtime": 0.6506,
"eval_samples_per_second": 107.599,
"eval_steps_per_second": 3.074,
"step": 224
},
{
"epoch": 113.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.853795051574707,
"eval_runtime": 0.6756,
"eval_samples_per_second": 103.611,
"eval_steps_per_second": 2.96,
"step": 226
},
{
"epoch": 114.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8524832129478455,
"eval_runtime": 0.8168,
"eval_samples_per_second": 85.696,
"eval_steps_per_second": 2.448,
"step": 228
},
{
"epoch": 115.0,
"learning_rate": 5.3125000000000004e-05,
"loss": 0.8922,
"step": 230
},
{
"epoch": 115.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8512247204780579,
"eval_runtime": 0.6476,
"eval_samples_per_second": 108.096,
"eval_steps_per_second": 3.088,
"step": 230
},
{
"epoch": 116.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8505221009254456,
"eval_runtime": 0.6563,
"eval_samples_per_second": 106.655,
"eval_steps_per_second": 3.047,
"step": 232
},
{
"epoch": 117.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.849509596824646,
"eval_runtime": 0.8193,
"eval_samples_per_second": 85.434,
"eval_steps_per_second": 2.441,
"step": 234
},
{
"epoch": 118.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8483795523643494,
"eval_runtime": 0.6476,
"eval_samples_per_second": 108.094,
"eval_steps_per_second": 3.088,
"step": 236
},
{
"epoch": 119.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8471851944923401,
"eval_runtime": 0.6472,
"eval_samples_per_second": 108.158,
"eval_steps_per_second": 3.09,
"step": 238
},
{
"epoch": 120.0,
"learning_rate": 5e-05,
"loss": 0.8897,
"step": 240
},
{
"epoch": 120.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8455559611320496,
"eval_runtime": 0.8155,
"eval_samples_per_second": 85.837,
"eval_steps_per_second": 2.452,
"step": 240
},
{
"epoch": 121.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8439861536026001,
"eval_runtime": 0.6794,
"eval_samples_per_second": 103.026,
"eval_steps_per_second": 2.944,
"step": 242
},
{
"epoch": 122.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8426181674003601,
"eval_runtime": 0.6386,
"eval_samples_per_second": 109.616,
"eval_steps_per_second": 3.132,
"step": 244
},
{
"epoch": 123.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8412323594093323,
"eval_runtime": 0.8222,
"eval_samples_per_second": 85.135,
"eval_steps_per_second": 2.432,
"step": 246
},
{
"epoch": 124.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8395997881889343,
"eval_runtime": 0.6405,
"eval_samples_per_second": 109.29,
"eval_steps_per_second": 3.123,
"step": 248
},
{
"epoch": 125.0,
"learning_rate": 4.6875e-05,
"loss": 0.8829,
"step": 250
},
{
"epoch": 125.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8383906483650208,
"eval_runtime": 0.6384,
"eval_samples_per_second": 109.656,
"eval_steps_per_second": 3.133,
"step": 250
},
{
"epoch": 126.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8372732996940613,
"eval_runtime": 0.8007,
"eval_samples_per_second": 87.425,
"eval_steps_per_second": 2.498,
"step": 252
},
{
"epoch": 127.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8365365266799927,
"eval_runtime": 0.6412,
"eval_samples_per_second": 109.171,
"eval_steps_per_second": 3.119,
"step": 254
},
{
"epoch": 128.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.835951030254364,
"eval_runtime": 0.6518,
"eval_samples_per_second": 107.389,
"eval_steps_per_second": 3.068,
"step": 256
},
{
"epoch": 129.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8352962732315063,
"eval_runtime": 0.8209,
"eval_samples_per_second": 85.273,
"eval_steps_per_second": 2.436,
"step": 258
},
{
"epoch": 130.0,
"learning_rate": 4.375e-05,
"loss": 0.8744,
"step": 260
},
{
"epoch": 130.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8344349265098572,
"eval_runtime": 0.6608,
"eval_samples_per_second": 105.932,
"eval_steps_per_second": 3.027,
"step": 260
},
{
"epoch": 131.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8336659669876099,
"eval_runtime": 0.6503,
"eval_samples_per_second": 107.635,
"eval_steps_per_second": 3.075,
"step": 262
},
{
"epoch": 132.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8329463601112366,
"eval_runtime": 0.824,
"eval_samples_per_second": 84.952,
"eval_steps_per_second": 2.427,
"step": 264
},
{
"epoch": 133.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8324605822563171,
"eval_runtime": 0.6594,
"eval_samples_per_second": 106.156,
"eval_steps_per_second": 3.033,
"step": 266
},
{
"epoch": 134.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8318061232566833,
"eval_runtime": 0.6395,
"eval_samples_per_second": 109.457,
"eval_steps_per_second": 3.127,
"step": 268
},
{
"epoch": 135.0,
"learning_rate": 4.0625000000000005e-05,
"loss": 0.8657,
"step": 270
},
{
"epoch": 135.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8312056660652161,
"eval_runtime": 0.8064,
"eval_samples_per_second": 86.802,
"eval_steps_per_second": 2.48,
"step": 270
},
{
"epoch": 136.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8306312561035156,
"eval_runtime": 0.645,
"eval_samples_per_second": 108.533,
"eval_steps_per_second": 3.101,
"step": 272
},
{
"epoch": 137.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8299986720085144,
"eval_runtime": 0.6678,
"eval_samples_per_second": 104.823,
"eval_steps_per_second": 2.995,
"step": 274
},
{
"epoch": 138.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8296393752098083,
"eval_runtime": 0.8159,
"eval_samples_per_second": 85.792,
"eval_steps_per_second": 2.451,
"step": 276
},
{
"epoch": 139.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8294458389282227,
"eval_runtime": 0.6396,
"eval_samples_per_second": 109.442,
"eval_steps_per_second": 3.127,
"step": 278
},
{
"epoch": 140.0,
"learning_rate": 3.7500000000000003e-05,
"loss": 0.9421,
"step": 280
},
{
"epoch": 140.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8292441368103027,
"eval_runtime": 0.6515,
"eval_samples_per_second": 107.445,
"eval_steps_per_second": 3.07,
"step": 280
},
{
"epoch": 141.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8291121125221252,
"eval_runtime": 0.8194,
"eval_samples_per_second": 85.428,
"eval_steps_per_second": 2.441,
"step": 282
},
{
"epoch": 142.0,
"eval_accuracy": 0.6714285714285714,
"eval_loss": 0.8290067315101624,
"eval_runtime": 0.9452,
"eval_samples_per_second": 74.057,
"eval_steps_per_second": 2.116,
"step": 284
},
{
"epoch": 143.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8290221095085144,
"eval_runtime": 0.6854,
"eval_samples_per_second": 102.129,
"eval_steps_per_second": 2.918,
"step": 286
},
{
"epoch": 144.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8288514018058777,
"eval_runtime": 0.6741,
"eval_samples_per_second": 103.846,
"eval_steps_per_second": 2.967,
"step": 288
},
{
"epoch": 145.0,
"learning_rate": 3.4375e-05,
"loss": 0.9066,
"step": 290
},
{
"epoch": 145.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8286876082420349,
"eval_runtime": 0.6545,
"eval_samples_per_second": 106.944,
"eval_steps_per_second": 3.056,
"step": 290
},
{
"epoch": 146.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8290360569953918,
"eval_runtime": 0.6611,
"eval_samples_per_second": 105.889,
"eval_steps_per_second": 3.025,
"step": 292
},
{
"epoch": 147.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8293396830558777,
"eval_runtime": 0.6543,
"eval_samples_per_second": 106.98,
"eval_steps_per_second": 3.057,
"step": 294
},
{
"epoch": 148.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8294445872306824,
"eval_runtime": 0.6455,
"eval_samples_per_second": 108.45,
"eval_steps_per_second": 3.099,
"step": 296
},
{
"epoch": 149.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8294763565063477,
"eval_runtime": 0.9727,
"eval_samples_per_second": 71.966,
"eval_steps_per_second": 2.056,
"step": 298
},
{
"epoch": 150.0,
"learning_rate": 3.125e-05,
"loss": 0.9068,
"step": 300
},
{
"epoch": 150.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8295239210128784,
"eval_runtime": 0.9775,
"eval_samples_per_second": 71.611,
"eval_steps_per_second": 2.046,
"step": 300
},
{
"epoch": 151.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8294230699539185,
"eval_runtime": 0.6644,
"eval_samples_per_second": 105.363,
"eval_steps_per_second": 3.01,
"step": 302
},
{
"epoch": 152.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.829305112361908,
"eval_runtime": 0.6604,
"eval_samples_per_second": 105.994,
"eval_steps_per_second": 3.028,
"step": 304
},
{
"epoch": 153.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8293172717094421,
"eval_runtime": 0.8353,
"eval_samples_per_second": 83.803,
"eval_steps_per_second": 2.394,
"step": 306
},
{
"epoch": 154.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8289957046508789,
"eval_runtime": 0.6575,
"eval_samples_per_second": 106.469,
"eval_steps_per_second": 3.042,
"step": 308
},
{
"epoch": 155.0,
"learning_rate": 2.8125000000000003e-05,
"loss": 0.8715,
"step": 310
},
{
"epoch": 155.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8286699056625366,
"eval_runtime": 0.6466,
"eval_samples_per_second": 108.266,
"eval_steps_per_second": 3.093,
"step": 310
},
{
"epoch": 156.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8283028602600098,
"eval_runtime": 0.8251,
"eval_samples_per_second": 84.843,
"eval_steps_per_second": 2.424,
"step": 312
},
{
"epoch": 157.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8276944160461426,
"eval_runtime": 0.6461,
"eval_samples_per_second": 108.335,
"eval_steps_per_second": 3.095,
"step": 314
},
{
"epoch": 158.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.827368438243866,
"eval_runtime": 0.6771,
"eval_samples_per_second": 103.379,
"eval_steps_per_second": 2.954,
"step": 316
},
{
"epoch": 159.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8269255757331848,
"eval_runtime": 0.8454,
"eval_samples_per_second": 82.804,
"eval_steps_per_second": 2.366,
"step": 318
},
{
"epoch": 160.0,
"learning_rate": 2.5e-05,
"loss": 0.8921,
"step": 320
},
{
"epoch": 160.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.826560914516449,
"eval_runtime": 0.6462,
"eval_samples_per_second": 108.325,
"eval_steps_per_second": 3.095,
"step": 320
},
{
"epoch": 161.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8263527154922485,
"eval_runtime": 0.6718,
"eval_samples_per_second": 104.193,
"eval_steps_per_second": 2.977,
"step": 322
},
{
"epoch": 162.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.826131284236908,
"eval_runtime": 0.8359,
"eval_samples_per_second": 83.747,
"eval_steps_per_second": 2.393,
"step": 324
},
{
"epoch": 163.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8259814977645874,
"eval_runtime": 0.6618,
"eval_samples_per_second": 105.778,
"eval_steps_per_second": 3.022,
"step": 326
},
{
"epoch": 164.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8257696032524109,
"eval_runtime": 0.6625,
"eval_samples_per_second": 105.664,
"eval_steps_per_second": 3.019,
"step": 328
},
{
"epoch": 165.0,
"learning_rate": 2.1875e-05,
"loss": 0.8768,
"step": 330
},
{
"epoch": 165.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.825222373008728,
"eval_runtime": 0.8436,
"eval_samples_per_second": 82.974,
"eval_steps_per_second": 2.371,
"step": 330
},
{
"epoch": 166.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8247527480125427,
"eval_runtime": 0.6665,
"eval_samples_per_second": 105.023,
"eval_steps_per_second": 3.001,
"step": 332
},
{
"epoch": 167.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8242577910423279,
"eval_runtime": 0.6669,
"eval_samples_per_second": 104.971,
"eval_steps_per_second": 2.999,
"step": 334
},
{
"epoch": 168.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8237206339836121,
"eval_runtime": 0.8327,
"eval_samples_per_second": 84.06,
"eval_steps_per_second": 2.402,
"step": 336
},
{
"epoch": 169.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8231467604637146,
"eval_runtime": 0.6532,
"eval_samples_per_second": 107.163,
"eval_steps_per_second": 3.062,
"step": 338
},
{
"epoch": 170.0,
"learning_rate": 1.8750000000000002e-05,
"loss": 0.8519,
"step": 340
},
{
"epoch": 170.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8226965665817261,
"eval_runtime": 0.6591,
"eval_samples_per_second": 106.199,
"eval_steps_per_second": 3.034,
"step": 340
},
{
"epoch": 171.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.822342038154602,
"eval_runtime": 0.8214,
"eval_samples_per_second": 85.216,
"eval_steps_per_second": 2.435,
"step": 342
},
{
"epoch": 172.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.822126030921936,
"eval_runtime": 0.6612,
"eval_samples_per_second": 105.861,
"eval_steps_per_second": 3.025,
"step": 344
},
{
"epoch": 173.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8220161199569702,
"eval_runtime": 0.6469,
"eval_samples_per_second": 108.212,
"eval_steps_per_second": 3.092,
"step": 346
},
{
"epoch": 174.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8218111991882324,
"eval_runtime": 0.8067,
"eval_samples_per_second": 86.769,
"eval_steps_per_second": 2.479,
"step": 348
},
{
"epoch": 175.0,
"learning_rate": 1.5625e-05,
"loss": 0.92,
"step": 350
},
{
"epoch": 175.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.821461021900177,
"eval_runtime": 0.6484,
"eval_samples_per_second": 107.962,
"eval_steps_per_second": 3.085,
"step": 350
},
{
"epoch": 176.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8210566639900208,
"eval_runtime": 0.6645,
"eval_samples_per_second": 105.342,
"eval_steps_per_second": 3.01,
"step": 352
},
{
"epoch": 177.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8207017183303833,
"eval_runtime": 0.8152,
"eval_samples_per_second": 85.873,
"eval_steps_per_second": 2.454,
"step": 354
},
{
"epoch": 178.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8204047679901123,
"eval_runtime": 0.7773,
"eval_samples_per_second": 90.05,
"eval_steps_per_second": 2.573,
"step": 356
},
{
"epoch": 179.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8200381398200989,
"eval_runtime": 0.6533,
"eval_samples_per_second": 107.15,
"eval_steps_per_second": 3.061,
"step": 358
},
{
"epoch": 180.0,
"learning_rate": 1.25e-05,
"loss": 0.879,
"step": 360
},
{
"epoch": 180.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8197112083435059,
"eval_runtime": 0.8254,
"eval_samples_per_second": 84.803,
"eval_steps_per_second": 2.423,
"step": 360
},
{
"epoch": 181.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8194140195846558,
"eval_runtime": 0.6736,
"eval_samples_per_second": 103.918,
"eval_steps_per_second": 2.969,
"step": 362
},
{
"epoch": 182.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8190609812736511,
"eval_runtime": 0.6501,
"eval_samples_per_second": 107.669,
"eval_steps_per_second": 3.076,
"step": 364
},
{
"epoch": 183.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 0.8187218308448792,
"eval_runtime": 0.7205,
"eval_samples_per_second": 97.148,
"eval_steps_per_second": 2.776,
"step": 366
},
{
"epoch": 184.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8184635639190674,
"eval_runtime": 0.656,
"eval_samples_per_second": 106.712,
"eval_steps_per_second": 3.049,
"step": 368
},
{
"epoch": 185.0,
"learning_rate": 9.375000000000001e-06,
"loss": 0.8893,
"step": 370
},
{
"epoch": 185.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8182028532028198,
"eval_runtime": 0.6563,
"eval_samples_per_second": 106.666,
"eval_steps_per_second": 3.048,
"step": 370
},
{
"epoch": 186.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8179557919502258,
"eval_runtime": 0.6961,
"eval_samples_per_second": 100.563,
"eval_steps_per_second": 2.873,
"step": 372
},
{
"epoch": 187.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8177469372749329,
"eval_runtime": 0.6584,
"eval_samples_per_second": 106.311,
"eval_steps_per_second": 3.037,
"step": 374
},
{
"epoch": 188.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8175888657569885,
"eval_runtime": 0.6728,
"eval_samples_per_second": 104.046,
"eval_steps_per_second": 2.973,
"step": 376
},
{
"epoch": 189.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8174628615379333,
"eval_runtime": 0.661,
"eval_samples_per_second": 105.894,
"eval_steps_per_second": 3.026,
"step": 378
},
{
"epoch": 190.0,
"learning_rate": 6.25e-06,
"loss": 0.8501,
"step": 380
},
{
"epoch": 190.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8172903656959534,
"eval_runtime": 0.6643,
"eval_samples_per_second": 105.379,
"eval_steps_per_second": 3.011,
"step": 380
},
{
"epoch": 191.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8171139359474182,
"eval_runtime": 0.7224,
"eval_samples_per_second": 96.898,
"eval_steps_per_second": 2.769,
"step": 382
},
{
"epoch": 192.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8169858455657959,
"eval_runtime": 0.6822,
"eval_samples_per_second": 102.605,
"eval_steps_per_second": 2.932,
"step": 384
},
{
"epoch": 193.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8169211149215698,
"eval_runtime": 0.6488,
"eval_samples_per_second": 107.887,
"eval_steps_per_second": 3.082,
"step": 386
},
{
"epoch": 194.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8168790340423584,
"eval_runtime": 0.8355,
"eval_samples_per_second": 83.778,
"eval_steps_per_second": 2.394,
"step": 388
},
{
"epoch": 195.0,
"learning_rate": 3.125e-06,
"loss": 0.8611,
"step": 390
},
{
"epoch": 195.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8168440461158752,
"eval_runtime": 0.6488,
"eval_samples_per_second": 107.884,
"eval_steps_per_second": 3.082,
"step": 390
},
{
"epoch": 196.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8168230056762695,
"eval_runtime": 0.6602,
"eval_samples_per_second": 106.026,
"eval_steps_per_second": 3.029,
"step": 392
},
{
"epoch": 197.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8167951107025146,
"eval_runtime": 0.8588,
"eval_samples_per_second": 81.511,
"eval_steps_per_second": 2.329,
"step": 394
},
{
"epoch": 198.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8167835474014282,
"eval_runtime": 0.6762,
"eval_samples_per_second": 103.513,
"eval_steps_per_second": 2.958,
"step": 396
},
{
"epoch": 199.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8167732954025269,
"eval_runtime": 0.6596,
"eval_samples_per_second": 106.128,
"eval_steps_per_second": 3.032,
"step": 398
},
{
"epoch": 200.0,
"learning_rate": 0.0,
"loss": 0.8881,
"step": 400
},
{
"epoch": 200.0,
"eval_accuracy": 0.7,
"eval_loss": 0.8167622089385986,
"eval_runtime": 0.844,
"eval_samples_per_second": 82.939,
"eval_steps_per_second": 2.37,
"step": 400
},
{
"epoch": 200.0,
"step": 400,
"total_flos": 2.2371640252416e+18,
"train_loss": 0.9259392237663269,
"train_runtime": 1042.9233,
"train_samples_per_second": 86.296,
"train_steps_per_second": 0.384
}
],
"logging_steps": 10,
"max_steps": 400,
"num_train_epochs": 200,
"save_steps": 500,
"total_flos": 2.2371640252416e+18,
"trial_name": null,
"trial_params": null
}