{ "best_metric": 0.7, "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-ADC-4cls-0922/checkpoint-122", "epoch": 200.0, "eval_steps": 500, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9655490517616272, "eval_runtime": 0.8298, "eval_samples_per_second": 84.356, "eval_steps_per_second": 2.41, "step": 2 }, { "epoch": 2.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.9653854370117188, "eval_runtime": 0.6383, "eval_samples_per_second": 109.671, "eval_steps_per_second": 3.133, "step": 4 }, { "epoch": 3.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.9650949835777283, "eval_runtime": 0.6412, "eval_samples_per_second": 109.167, "eval_steps_per_second": 3.119, "step": 6 }, { "epoch": 4.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.9646532535552979, "eval_runtime": 0.8218, "eval_samples_per_second": 85.18, "eval_steps_per_second": 2.434, "step": 8 }, { "epoch": 5.0, "learning_rate": 1.25e-05, "loss": 1.0064, "step": 10 }, { "epoch": 5.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.9641380310058594, "eval_runtime": 0.6452, "eval_samples_per_second": 108.491, "eval_steps_per_second": 3.1, "step": 10 }, { "epoch": 6.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.9635317921638489, "eval_runtime": 0.6347, "eval_samples_per_second": 110.284, "eval_steps_per_second": 3.151, "step": 12 }, { "epoch": 7.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.9628700017929077, "eval_runtime": 0.8273, "eval_samples_per_second": 84.611, "eval_steps_per_second": 2.417, "step": 14 }, { "epoch": 8.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.9623274803161621, "eval_runtime": 0.6551, "eval_samples_per_second": 106.859, "eval_steps_per_second": 3.053, "step": 16 }, { "epoch": 9.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.9616996645927429, "eval_runtime": 0.646, "eval_samples_per_second": 108.363, "eval_steps_per_second": 3.096, "step": 18 }, { "epoch": 10.0, "learning_rate": 2.5e-05, "loss": 0.9821, "step": 20 }, { "epoch": 10.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.9611372947692871, "eval_runtime": 0.8313, "eval_samples_per_second": 84.202, "eval_steps_per_second": 2.406, "step": 20 }, { "epoch": 11.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.9607454538345337, "eval_runtime": 0.8335, "eval_samples_per_second": 83.985, "eval_steps_per_second": 2.4, "step": 22 }, { "epoch": 12.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9604489207267761, "eval_runtime": 0.8194, "eval_samples_per_second": 85.429, "eval_steps_per_second": 2.441, "step": 24 }, { "epoch": 13.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9601203799247742, "eval_runtime": 0.8211, "eval_samples_per_second": 85.256, "eval_steps_per_second": 2.436, "step": 26 }, { "epoch": 14.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9597390294075012, "eval_runtime": 0.6563, "eval_samples_per_second": 106.663, "eval_steps_per_second": 3.048, "step": 28 }, { "epoch": 15.0, "learning_rate": 3.7500000000000003e-05, "loss": 1.0278, "step": 30 }, { "epoch": 15.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9591529965400696, "eval_runtime": 0.6495, "eval_samples_per_second": 107.778, "eval_steps_per_second": 3.079, "step": 30 }, { "epoch": 16.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9581246376037598, "eval_runtime": 0.791, "eval_samples_per_second": 88.495, "eval_steps_per_second": 2.528, "step": 32 }, { "epoch": 17.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9566996097564697, "eval_runtime": 0.6461, "eval_samples_per_second": 108.347, "eval_steps_per_second": 3.096, "step": 34 }, { "epoch": 18.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9551236629486084, "eval_runtime": 0.6456, "eval_samples_per_second": 108.429, "eval_steps_per_second": 3.098, "step": 36 }, { "epoch": 19.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9534342288970947, "eval_runtime": 0.8038, "eval_samples_per_second": 87.083, "eval_steps_per_second": 2.488, "step": 38 }, { "epoch": 20.0, "learning_rate": 5e-05, "loss": 0.9986, "step": 40 }, { "epoch": 20.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.9513913989067078, "eval_runtime": 0.6423, "eval_samples_per_second": 108.98, "eval_steps_per_second": 3.114, "step": 40 }, { "epoch": 21.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.9493252635002136, "eval_runtime": 0.6401, "eval_samples_per_second": 109.357, "eval_steps_per_second": 3.124, "step": 42 }, { "epoch": 22.0, "eval_accuracy": 0.6428571428571429, "eval_loss": 0.9471749663352966, "eval_runtime": 0.7957, "eval_samples_per_second": 87.97, "eval_steps_per_second": 2.513, "step": 44 }, { "epoch": 23.0, "eval_accuracy": 0.6428571428571429, "eval_loss": 0.9451875686645508, "eval_runtime": 0.6379, "eval_samples_per_second": 109.728, "eval_steps_per_second": 3.135, "step": 46 }, { "epoch": 24.0, "eval_accuracy": 0.6428571428571429, "eval_loss": 0.943417489528656, "eval_runtime": 0.6466, "eval_samples_per_second": 108.259, "eval_steps_per_second": 3.093, "step": 48 }, { "epoch": 25.0, "learning_rate": 6.25e-05, "loss": 0.9973, "step": 50 }, { "epoch": 25.0, "eval_accuracy": 0.6428571428571429, "eval_loss": 0.9419717788696289, "eval_runtime": 0.8115, "eval_samples_per_second": 86.264, "eval_steps_per_second": 2.465, "step": 50 }, { "epoch": 26.0, "eval_accuracy": 0.6428571428571429, "eval_loss": 0.9404588937759399, "eval_runtime": 0.6332, "eval_samples_per_second": 110.551, "eval_steps_per_second": 3.159, "step": 52 }, { "epoch": 27.0, "eval_accuracy": 0.6285714285714286, "eval_loss": 0.9387302994728088, "eval_runtime": 0.64, "eval_samples_per_second": 109.375, "eval_steps_per_second": 3.125, "step": 54 }, { "epoch": 28.0, "eval_accuracy": 0.6285714285714286, "eval_loss": 0.9375677704811096, "eval_runtime": 0.8312, "eval_samples_per_second": 84.219, "eval_steps_per_second": 2.406, "step": 56 }, { "epoch": 29.0, "eval_accuracy": 0.6428571428571429, "eval_loss": 0.9368333220481873, "eval_runtime": 0.6385, "eval_samples_per_second": 109.629, "eval_steps_per_second": 3.132, "step": 58 }, { "epoch": 30.0, "learning_rate": 7.500000000000001e-05, "loss": 0.9936, "step": 60 }, { "epoch": 30.0, "eval_accuracy": 0.6428571428571429, "eval_loss": 0.9361710548400879, "eval_runtime": 0.6573, "eval_samples_per_second": 106.497, "eval_steps_per_second": 3.043, "step": 60 }, { "epoch": 31.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.9361298680305481, "eval_runtime": 0.7944, "eval_samples_per_second": 88.115, "eval_steps_per_second": 2.518, "step": 62 }, { "epoch": 32.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9364449381828308, "eval_runtime": 0.6554, "eval_samples_per_second": 106.808, "eval_steps_per_second": 3.052, "step": 64 }, { "epoch": 33.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9371016621589661, "eval_runtime": 0.6483, "eval_samples_per_second": 107.97, "eval_steps_per_second": 3.085, "step": 66 }, { "epoch": 34.0, "eval_accuracy": 0.6428571428571429, "eval_loss": 0.9379546046257019, "eval_runtime": 0.8119, "eval_samples_per_second": 86.219, "eval_steps_per_second": 2.463, "step": 68 }, { "epoch": 35.0, "learning_rate": 8.75e-05, "loss": 0.9746, "step": 70 }, { "epoch": 35.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.9379692077636719, "eval_runtime": 0.6362, "eval_samples_per_second": 110.031, "eval_steps_per_second": 3.144, "step": 70 }, { "epoch": 36.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9374780654907227, "eval_runtime": 0.639, "eval_samples_per_second": 109.543, "eval_steps_per_second": 3.13, "step": 72 }, { "epoch": 37.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9379698634147644, "eval_runtime": 0.8343, "eval_samples_per_second": 83.899, "eval_steps_per_second": 2.397, "step": 74 }, { "epoch": 38.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9375231862068176, "eval_runtime": 0.6395, "eval_samples_per_second": 109.457, "eval_steps_per_second": 3.127, "step": 76 }, { "epoch": 39.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9369739890098572, "eval_runtime": 0.6333, "eval_samples_per_second": 110.536, "eval_steps_per_second": 3.158, "step": 78 }, { "epoch": 40.0, "learning_rate": 0.0001, "loss": 1.0113, "step": 80 }, { "epoch": 40.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9361743330955505, "eval_runtime": 0.7993, "eval_samples_per_second": 87.579, "eval_steps_per_second": 2.502, "step": 80 }, { "epoch": 41.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9340663552284241, "eval_runtime": 0.6461, "eval_samples_per_second": 108.348, "eval_steps_per_second": 3.096, "step": 82 }, { "epoch": 42.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.9300563335418701, "eval_runtime": 0.636, "eval_samples_per_second": 110.058, "eval_steps_per_second": 3.145, "step": 84 }, { "epoch": 43.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9259787201881409, "eval_runtime": 0.8154, "eval_samples_per_second": 85.845, "eval_steps_per_second": 2.453, "step": 86 }, { "epoch": 44.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.9224489331245422, "eval_runtime": 0.6369, "eval_samples_per_second": 109.903, "eval_steps_per_second": 3.14, "step": 88 }, { "epoch": 45.0, "learning_rate": 9.687500000000001e-05, "loss": 0.9756, "step": 90 }, { "epoch": 45.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9190067648887634, "eval_runtime": 0.6388, "eval_samples_per_second": 109.577, "eval_steps_per_second": 3.131, "step": 90 }, { "epoch": 46.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9154108166694641, "eval_runtime": 0.7966, "eval_samples_per_second": 87.873, "eval_steps_per_second": 2.511, "step": 92 }, { "epoch": 47.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.912346363067627, "eval_runtime": 0.6406, "eval_samples_per_second": 109.268, "eval_steps_per_second": 3.122, "step": 94 }, { "epoch": 48.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.9091367721557617, "eval_runtime": 0.6398, "eval_samples_per_second": 109.41, "eval_steps_per_second": 3.126, "step": 96 }, { "epoch": 49.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.9070726037025452, "eval_runtime": 0.8188, "eval_samples_per_second": 85.488, "eval_steps_per_second": 2.443, "step": 98 }, { "epoch": 50.0, "learning_rate": 9.375e-05, "loss": 0.9721, "step": 100 }, { "epoch": 50.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.9055730700492859, "eval_runtime": 0.6361, "eval_samples_per_second": 110.054, "eval_steps_per_second": 3.144, "step": 100 }, { "epoch": 51.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.9046576619148254, "eval_runtime": 0.6407, "eval_samples_per_second": 109.252, "eval_steps_per_second": 3.121, "step": 102 }, { "epoch": 52.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.9038794636726379, "eval_runtime": 0.8178, "eval_samples_per_second": 85.592, "eval_steps_per_second": 2.445, "step": 104 }, { "epoch": 53.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9030665755271912, "eval_runtime": 0.6283, "eval_samples_per_second": 111.419, "eval_steps_per_second": 3.183, "step": 106 }, { "epoch": 54.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.902490496635437, "eval_runtime": 0.8366, "eval_samples_per_second": 83.669, "eval_steps_per_second": 2.391, "step": 108 }, { "epoch": 55.0, "learning_rate": 9.062500000000001e-05, "loss": 0.9698, "step": 110 }, { "epoch": 55.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.902264416217804, "eval_runtime": 0.9891, "eval_samples_per_second": 70.774, "eval_steps_per_second": 2.022, "step": 110 }, { "epoch": 56.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.9011555314064026, "eval_runtime": 0.6498, "eval_samples_per_second": 107.729, "eval_steps_per_second": 3.078, "step": 112 }, { "epoch": 57.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8996686935424805, "eval_runtime": 0.8289, "eval_samples_per_second": 84.447, "eval_steps_per_second": 2.413, "step": 114 }, { "epoch": 58.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8982025980949402, "eval_runtime": 0.6375, "eval_samples_per_second": 109.798, "eval_steps_per_second": 3.137, "step": 116 }, { "epoch": 59.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8969982266426086, "eval_runtime": 0.6483, "eval_samples_per_second": 107.97, "eval_steps_per_second": 3.085, "step": 118 }, { "epoch": 60.0, "learning_rate": 8.75e-05, "loss": 0.9341, "step": 120 }, { "epoch": 60.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8956836462020874, "eval_runtime": 0.8303, "eval_samples_per_second": 84.307, "eval_steps_per_second": 2.409, "step": 120 }, { "epoch": 61.0, "eval_accuracy": 0.7, "eval_loss": 0.8946982622146606, "eval_runtime": 0.6483, "eval_samples_per_second": 107.981, "eval_steps_per_second": 3.085, "step": 122 }, { "epoch": 62.0, "eval_accuracy": 0.7, "eval_loss": 0.8940390348434448, "eval_runtime": 0.6421, "eval_samples_per_second": 109.023, "eval_steps_per_second": 3.115, "step": 124 }, { "epoch": 63.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8940520286560059, "eval_runtime": 0.8356, "eval_samples_per_second": 83.773, "eval_steps_per_second": 2.394, "step": 126 }, { "epoch": 64.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8934383988380432, "eval_runtime": 0.6317, "eval_samples_per_second": 110.812, "eval_steps_per_second": 3.166, "step": 128 }, { "epoch": 65.0, "learning_rate": 8.4375e-05, "loss": 0.9717, "step": 130 }, { "epoch": 65.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8916982412338257, "eval_runtime": 0.6456, "eval_samples_per_second": 108.418, "eval_steps_per_second": 3.098, "step": 130 }, { "epoch": 66.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8898113369941711, "eval_runtime": 0.8145, "eval_samples_per_second": 85.937, "eval_steps_per_second": 2.455, "step": 132 }, { "epoch": 67.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8883917927742004, "eval_runtime": 0.6387, "eval_samples_per_second": 109.599, "eval_steps_per_second": 3.131, "step": 134 }, { "epoch": 68.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8869962692260742, "eval_runtime": 0.6406, "eval_samples_per_second": 109.266, "eval_steps_per_second": 3.122, "step": 136 }, { "epoch": 69.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8853691816329956, "eval_runtime": 0.8216, "eval_samples_per_second": 85.2, "eval_steps_per_second": 2.434, "step": 138 }, { "epoch": 70.0, "learning_rate": 8.125000000000001e-05, "loss": 0.9655, "step": 140 }, { "epoch": 70.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8840075731277466, "eval_runtime": 0.6378, "eval_samples_per_second": 109.751, "eval_steps_per_second": 3.136, "step": 140 }, { "epoch": 71.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8826519250869751, "eval_runtime": 0.6384, "eval_samples_per_second": 109.644, "eval_steps_per_second": 3.133, "step": 142 }, { "epoch": 72.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8813565373420715, "eval_runtime": 0.8402, "eval_samples_per_second": 83.313, "eval_steps_per_second": 2.38, "step": 144 }, { "epoch": 73.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8805155754089355, "eval_runtime": 0.6428, "eval_samples_per_second": 108.905, "eval_steps_per_second": 3.112, "step": 146 }, { "epoch": 74.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8803040385246277, "eval_runtime": 0.649, "eval_samples_per_second": 107.857, "eval_steps_per_second": 3.082, "step": 148 }, { "epoch": 75.0, "learning_rate": 7.8125e-05, "loss": 0.9458, "step": 150 }, { "epoch": 75.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8801725506782532, "eval_runtime": 0.82, "eval_samples_per_second": 85.365, "eval_steps_per_second": 2.439, "step": 150 }, { "epoch": 76.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8797475695610046, "eval_runtime": 0.6476, "eval_samples_per_second": 108.085, "eval_steps_per_second": 3.088, "step": 152 }, { "epoch": 77.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8793725967407227, "eval_runtime": 0.6468, "eval_samples_per_second": 108.22, "eval_steps_per_second": 3.092, "step": 154 }, { "epoch": 78.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8795827031135559, "eval_runtime": 0.8346, "eval_samples_per_second": 83.873, "eval_steps_per_second": 2.396, "step": 156 }, { "epoch": 79.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8807878494262695, "eval_runtime": 0.6453, "eval_samples_per_second": 108.479, "eval_steps_per_second": 3.099, "step": 158 }, { "epoch": 80.0, "learning_rate": 7.500000000000001e-05, "loss": 0.9094, "step": 160 }, { "epoch": 80.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8817013502120972, "eval_runtime": 0.6393, "eval_samples_per_second": 109.492, "eval_steps_per_second": 3.128, "step": 160 }, { "epoch": 81.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8828238844871521, "eval_runtime": 0.8346, "eval_samples_per_second": 83.868, "eval_steps_per_second": 2.396, "step": 162 }, { "epoch": 82.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8835611939430237, "eval_runtime": 0.636, "eval_samples_per_second": 110.07, "eval_steps_per_second": 3.145, "step": 164 }, { "epoch": 83.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8830356001853943, "eval_runtime": 0.6535, "eval_samples_per_second": 107.117, "eval_steps_per_second": 3.06, "step": 166 }, { "epoch": 84.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.8820751905441284, "eval_runtime": 0.8384, "eval_samples_per_second": 83.495, "eval_steps_per_second": 2.386, "step": 168 }, { "epoch": 85.0, "learning_rate": 7.1875e-05, "loss": 0.8719, "step": 170 }, { "epoch": 85.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.8812506794929504, "eval_runtime": 0.6519, "eval_samples_per_second": 107.372, "eval_steps_per_second": 3.068, "step": 170 }, { "epoch": 86.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8804309368133545, "eval_runtime": 0.6326, "eval_samples_per_second": 110.652, "eval_steps_per_second": 3.161, "step": 172 }, { "epoch": 87.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.8798118829727173, "eval_runtime": 0.8338, "eval_samples_per_second": 83.95, "eval_steps_per_second": 2.399, "step": 174 }, { "epoch": 88.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.8787184953689575, "eval_runtime": 0.64, "eval_samples_per_second": 109.38, "eval_steps_per_second": 3.125, "step": 176 }, { "epoch": 89.0, "eval_accuracy": 0.6571428571428571, "eval_loss": 0.8769770264625549, "eval_runtime": 0.6382, "eval_samples_per_second": 109.679, "eval_steps_per_second": 3.134, "step": 178 }, { "epoch": 90.0, "learning_rate": 6.875e-05, "loss": 0.9288, "step": 180 }, { "epoch": 90.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8752025961875916, "eval_runtime": 0.8649, "eval_samples_per_second": 80.934, "eval_steps_per_second": 2.312, "step": 180 }, { "epoch": 91.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8721939921379089, "eval_runtime": 0.6536, "eval_samples_per_second": 107.101, "eval_steps_per_second": 3.06, "step": 182 }, { "epoch": 92.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8693682551383972, "eval_runtime": 0.6434, "eval_samples_per_second": 108.799, "eval_steps_per_second": 3.109, "step": 184 }, { "epoch": 93.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8670406937599182, "eval_runtime": 0.8337, "eval_samples_per_second": 83.963, "eval_steps_per_second": 2.399, "step": 186 }, { "epoch": 94.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8644655346870422, "eval_runtime": 0.6432, "eval_samples_per_second": 108.826, "eval_steps_per_second": 3.109, "step": 188 }, { "epoch": 95.0, "learning_rate": 6.562500000000001e-05, "loss": 0.9039, "step": 190 }, { "epoch": 95.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8624207973480225, "eval_runtime": 0.6482, "eval_samples_per_second": 107.999, "eval_steps_per_second": 3.086, "step": 190 }, { "epoch": 96.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8603058457374573, "eval_runtime": 0.8409, "eval_samples_per_second": 83.249, "eval_steps_per_second": 2.379, "step": 192 }, { "epoch": 97.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8583868741989136, "eval_runtime": 0.6484, "eval_samples_per_second": 107.951, "eval_steps_per_second": 3.084, "step": 194 }, { "epoch": 98.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8566268086433411, "eval_runtime": 0.6949, "eval_samples_per_second": 100.728, "eval_steps_per_second": 2.878, "step": 196 }, { "epoch": 99.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8553413152694702, "eval_runtime": 0.8276, "eval_samples_per_second": 84.585, "eval_steps_per_second": 2.417, "step": 198 }, { "epoch": 100.0, "learning_rate": 6.25e-05, "loss": 0.9081, "step": 200 }, { "epoch": 100.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8549684286117554, "eval_runtime": 0.6594, "eval_samples_per_second": 106.164, "eval_steps_per_second": 3.033, "step": 200 }, { "epoch": 101.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8551309108734131, "eval_runtime": 0.6588, "eval_samples_per_second": 106.255, "eval_steps_per_second": 3.036, "step": 202 }, { "epoch": 102.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8556391000747681, "eval_runtime": 0.8474, "eval_samples_per_second": 82.605, "eval_steps_per_second": 2.36, "step": 204 }, { "epoch": 103.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8558002710342407, "eval_runtime": 0.6568, "eval_samples_per_second": 106.577, "eval_steps_per_second": 3.045, "step": 206 }, { "epoch": 104.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8554455637931824, "eval_runtime": 0.6448, "eval_samples_per_second": 108.569, "eval_steps_per_second": 3.102, "step": 208 }, { "epoch": 105.0, "learning_rate": 5.9375e-05, "loss": 0.9142, "step": 210 }, { "epoch": 105.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8551297783851624, "eval_runtime": 0.8226, "eval_samples_per_second": 85.093, "eval_steps_per_second": 2.431, "step": 210 }, { "epoch": 106.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8553109169006348, "eval_runtime": 0.6501, "eval_samples_per_second": 107.668, "eval_steps_per_second": 3.076, "step": 212 }, { "epoch": 107.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.855134904384613, "eval_runtime": 0.637, "eval_samples_per_second": 109.882, "eval_steps_per_second": 3.139, "step": 214 }, { "epoch": 108.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8549013137817383, "eval_runtime": 0.8378, "eval_samples_per_second": 83.557, "eval_steps_per_second": 2.387, "step": 216 }, { "epoch": 109.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.854942798614502, "eval_runtime": 0.6596, "eval_samples_per_second": 106.131, "eval_steps_per_second": 3.032, "step": 218 }, { "epoch": 110.0, "learning_rate": 5.6250000000000005e-05, "loss": 0.9347, "step": 220 }, { "epoch": 110.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8551362752914429, "eval_runtime": 0.6674, "eval_samples_per_second": 104.886, "eval_steps_per_second": 2.997, "step": 220 }, { "epoch": 111.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8553721308708191, "eval_runtime": 0.8336, "eval_samples_per_second": 83.974, "eval_steps_per_second": 2.399, "step": 222 }, { "epoch": 112.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8548364639282227, "eval_runtime": 0.6506, "eval_samples_per_second": 107.599, "eval_steps_per_second": 3.074, "step": 224 }, { "epoch": 113.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.853795051574707, "eval_runtime": 0.6756, "eval_samples_per_second": 103.611, "eval_steps_per_second": 2.96, "step": 226 }, { "epoch": 114.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8524832129478455, "eval_runtime": 0.8168, "eval_samples_per_second": 85.696, "eval_steps_per_second": 2.448, "step": 228 }, { "epoch": 115.0, "learning_rate": 5.3125000000000004e-05, "loss": 0.8922, "step": 230 }, { "epoch": 115.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8512247204780579, "eval_runtime": 0.6476, "eval_samples_per_second": 108.096, "eval_steps_per_second": 3.088, "step": 230 }, { "epoch": 116.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8505221009254456, "eval_runtime": 0.6563, "eval_samples_per_second": 106.655, "eval_steps_per_second": 3.047, "step": 232 }, { "epoch": 117.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.849509596824646, "eval_runtime": 0.8193, "eval_samples_per_second": 85.434, "eval_steps_per_second": 2.441, "step": 234 }, { "epoch": 118.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8483795523643494, "eval_runtime": 0.6476, "eval_samples_per_second": 108.094, "eval_steps_per_second": 3.088, "step": 236 }, { "epoch": 119.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8471851944923401, "eval_runtime": 0.6472, "eval_samples_per_second": 108.158, "eval_steps_per_second": 3.09, "step": 238 }, { "epoch": 120.0, "learning_rate": 5e-05, "loss": 0.8897, "step": 240 }, { "epoch": 120.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8455559611320496, "eval_runtime": 0.8155, "eval_samples_per_second": 85.837, "eval_steps_per_second": 2.452, "step": 240 }, { "epoch": 121.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8439861536026001, "eval_runtime": 0.6794, "eval_samples_per_second": 103.026, "eval_steps_per_second": 2.944, "step": 242 }, { "epoch": 122.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8426181674003601, "eval_runtime": 0.6386, "eval_samples_per_second": 109.616, "eval_steps_per_second": 3.132, "step": 244 }, { "epoch": 123.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8412323594093323, "eval_runtime": 0.8222, "eval_samples_per_second": 85.135, "eval_steps_per_second": 2.432, "step": 246 }, { "epoch": 124.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8395997881889343, "eval_runtime": 0.6405, "eval_samples_per_second": 109.29, "eval_steps_per_second": 3.123, "step": 248 }, { "epoch": 125.0, "learning_rate": 4.6875e-05, "loss": 0.8829, "step": 250 }, { "epoch": 125.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8383906483650208, "eval_runtime": 0.6384, "eval_samples_per_second": 109.656, "eval_steps_per_second": 3.133, "step": 250 }, { "epoch": 126.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8372732996940613, "eval_runtime": 0.8007, "eval_samples_per_second": 87.425, "eval_steps_per_second": 2.498, "step": 252 }, { "epoch": 127.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8365365266799927, "eval_runtime": 0.6412, "eval_samples_per_second": 109.171, "eval_steps_per_second": 3.119, "step": 254 }, { "epoch": 128.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.835951030254364, "eval_runtime": 0.6518, "eval_samples_per_second": 107.389, "eval_steps_per_second": 3.068, "step": 256 }, { "epoch": 129.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8352962732315063, "eval_runtime": 0.8209, "eval_samples_per_second": 85.273, "eval_steps_per_second": 2.436, "step": 258 }, { "epoch": 130.0, "learning_rate": 4.375e-05, "loss": 0.8744, "step": 260 }, { "epoch": 130.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8344349265098572, "eval_runtime": 0.6608, "eval_samples_per_second": 105.932, "eval_steps_per_second": 3.027, "step": 260 }, { "epoch": 131.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8336659669876099, "eval_runtime": 0.6503, "eval_samples_per_second": 107.635, "eval_steps_per_second": 3.075, "step": 262 }, { "epoch": 132.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8329463601112366, "eval_runtime": 0.824, "eval_samples_per_second": 84.952, "eval_steps_per_second": 2.427, "step": 264 }, { "epoch": 133.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8324605822563171, "eval_runtime": 0.6594, "eval_samples_per_second": 106.156, "eval_steps_per_second": 3.033, "step": 266 }, { "epoch": 134.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8318061232566833, "eval_runtime": 0.6395, "eval_samples_per_second": 109.457, "eval_steps_per_second": 3.127, "step": 268 }, { "epoch": 135.0, "learning_rate": 4.0625000000000005e-05, "loss": 0.8657, "step": 270 }, { "epoch": 135.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8312056660652161, "eval_runtime": 0.8064, "eval_samples_per_second": 86.802, "eval_steps_per_second": 2.48, "step": 270 }, { "epoch": 136.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8306312561035156, "eval_runtime": 0.645, "eval_samples_per_second": 108.533, "eval_steps_per_second": 3.101, "step": 272 }, { "epoch": 137.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8299986720085144, "eval_runtime": 0.6678, "eval_samples_per_second": 104.823, "eval_steps_per_second": 2.995, "step": 274 }, { "epoch": 138.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8296393752098083, "eval_runtime": 0.8159, "eval_samples_per_second": 85.792, "eval_steps_per_second": 2.451, "step": 276 }, { "epoch": 139.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8294458389282227, "eval_runtime": 0.6396, "eval_samples_per_second": 109.442, "eval_steps_per_second": 3.127, "step": 278 }, { "epoch": 140.0, "learning_rate": 3.7500000000000003e-05, "loss": 0.9421, "step": 280 }, { "epoch": 140.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8292441368103027, "eval_runtime": 0.6515, "eval_samples_per_second": 107.445, "eval_steps_per_second": 3.07, "step": 280 }, { "epoch": 141.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8291121125221252, "eval_runtime": 0.8194, "eval_samples_per_second": 85.428, "eval_steps_per_second": 2.441, "step": 282 }, { "epoch": 142.0, "eval_accuracy": 0.6714285714285714, "eval_loss": 0.8290067315101624, "eval_runtime": 0.9452, "eval_samples_per_second": 74.057, "eval_steps_per_second": 2.116, "step": 284 }, { "epoch": 143.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8290221095085144, "eval_runtime": 0.6854, "eval_samples_per_second": 102.129, "eval_steps_per_second": 2.918, "step": 286 }, { "epoch": 144.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8288514018058777, "eval_runtime": 0.6741, "eval_samples_per_second": 103.846, "eval_steps_per_second": 2.967, "step": 288 }, { "epoch": 145.0, "learning_rate": 3.4375e-05, "loss": 0.9066, "step": 290 }, { "epoch": 145.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8286876082420349, "eval_runtime": 0.6545, "eval_samples_per_second": 106.944, "eval_steps_per_second": 3.056, "step": 290 }, { "epoch": 146.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8290360569953918, "eval_runtime": 0.6611, "eval_samples_per_second": 105.889, "eval_steps_per_second": 3.025, "step": 292 }, { "epoch": 147.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8293396830558777, "eval_runtime": 0.6543, "eval_samples_per_second": 106.98, "eval_steps_per_second": 3.057, "step": 294 }, { "epoch": 148.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8294445872306824, "eval_runtime": 0.6455, "eval_samples_per_second": 108.45, "eval_steps_per_second": 3.099, "step": 296 }, { "epoch": 149.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8294763565063477, "eval_runtime": 0.9727, "eval_samples_per_second": 71.966, "eval_steps_per_second": 2.056, "step": 298 }, { "epoch": 150.0, "learning_rate": 3.125e-05, "loss": 0.9068, "step": 300 }, { "epoch": 150.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8295239210128784, "eval_runtime": 0.9775, "eval_samples_per_second": 71.611, "eval_steps_per_second": 2.046, "step": 300 }, { "epoch": 151.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8294230699539185, "eval_runtime": 0.6644, "eval_samples_per_second": 105.363, "eval_steps_per_second": 3.01, "step": 302 }, { "epoch": 152.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.829305112361908, "eval_runtime": 0.6604, "eval_samples_per_second": 105.994, "eval_steps_per_second": 3.028, "step": 304 }, { "epoch": 153.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8293172717094421, "eval_runtime": 0.8353, "eval_samples_per_second": 83.803, "eval_steps_per_second": 2.394, "step": 306 }, { "epoch": 154.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8289957046508789, "eval_runtime": 0.6575, "eval_samples_per_second": 106.469, "eval_steps_per_second": 3.042, "step": 308 }, { "epoch": 155.0, "learning_rate": 2.8125000000000003e-05, "loss": 0.8715, "step": 310 }, { "epoch": 155.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8286699056625366, "eval_runtime": 0.6466, "eval_samples_per_second": 108.266, "eval_steps_per_second": 3.093, "step": 310 }, { "epoch": 156.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8283028602600098, "eval_runtime": 0.8251, "eval_samples_per_second": 84.843, "eval_steps_per_second": 2.424, "step": 312 }, { "epoch": 157.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8276944160461426, "eval_runtime": 0.6461, "eval_samples_per_second": 108.335, "eval_steps_per_second": 3.095, "step": 314 }, { "epoch": 158.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.827368438243866, "eval_runtime": 0.6771, "eval_samples_per_second": 103.379, "eval_steps_per_second": 2.954, "step": 316 }, { "epoch": 159.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8269255757331848, "eval_runtime": 0.8454, "eval_samples_per_second": 82.804, "eval_steps_per_second": 2.366, "step": 318 }, { "epoch": 160.0, "learning_rate": 2.5e-05, "loss": 0.8921, "step": 320 }, { "epoch": 160.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.826560914516449, "eval_runtime": 0.6462, "eval_samples_per_second": 108.325, "eval_steps_per_second": 3.095, "step": 320 }, { "epoch": 161.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8263527154922485, "eval_runtime": 0.6718, "eval_samples_per_second": 104.193, "eval_steps_per_second": 2.977, "step": 322 }, { "epoch": 162.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.826131284236908, "eval_runtime": 0.8359, "eval_samples_per_second": 83.747, "eval_steps_per_second": 2.393, "step": 324 }, { "epoch": 163.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8259814977645874, "eval_runtime": 0.6618, "eval_samples_per_second": 105.778, "eval_steps_per_second": 3.022, "step": 326 }, { "epoch": 164.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8257696032524109, "eval_runtime": 0.6625, "eval_samples_per_second": 105.664, "eval_steps_per_second": 3.019, "step": 328 }, { "epoch": 165.0, "learning_rate": 2.1875e-05, "loss": 0.8768, "step": 330 }, { "epoch": 165.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.825222373008728, "eval_runtime": 0.8436, "eval_samples_per_second": 82.974, "eval_steps_per_second": 2.371, "step": 330 }, { "epoch": 166.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8247527480125427, "eval_runtime": 0.6665, "eval_samples_per_second": 105.023, "eval_steps_per_second": 3.001, "step": 332 }, { "epoch": 167.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8242577910423279, "eval_runtime": 0.6669, "eval_samples_per_second": 104.971, "eval_steps_per_second": 2.999, "step": 334 }, { "epoch": 168.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8237206339836121, "eval_runtime": 0.8327, "eval_samples_per_second": 84.06, "eval_steps_per_second": 2.402, "step": 336 }, { "epoch": 169.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8231467604637146, "eval_runtime": 0.6532, "eval_samples_per_second": 107.163, "eval_steps_per_second": 3.062, "step": 338 }, { "epoch": 170.0, "learning_rate": 1.8750000000000002e-05, "loss": 0.8519, "step": 340 }, { "epoch": 170.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8226965665817261, "eval_runtime": 0.6591, "eval_samples_per_second": 106.199, "eval_steps_per_second": 3.034, "step": 340 }, { "epoch": 171.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.822342038154602, "eval_runtime": 0.8214, "eval_samples_per_second": 85.216, "eval_steps_per_second": 2.435, "step": 342 }, { "epoch": 172.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.822126030921936, "eval_runtime": 0.6612, "eval_samples_per_second": 105.861, "eval_steps_per_second": 3.025, "step": 344 }, { "epoch": 173.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8220161199569702, "eval_runtime": 0.6469, "eval_samples_per_second": 108.212, "eval_steps_per_second": 3.092, "step": 346 }, { "epoch": 174.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8218111991882324, "eval_runtime": 0.8067, "eval_samples_per_second": 86.769, "eval_steps_per_second": 2.479, "step": 348 }, { "epoch": 175.0, "learning_rate": 1.5625e-05, "loss": 0.92, "step": 350 }, { "epoch": 175.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.821461021900177, "eval_runtime": 0.6484, "eval_samples_per_second": 107.962, "eval_steps_per_second": 3.085, "step": 350 }, { "epoch": 176.0, "eval_accuracy": 0.7, "eval_loss": 0.8210566639900208, "eval_runtime": 0.6645, "eval_samples_per_second": 105.342, "eval_steps_per_second": 3.01, "step": 352 }, { "epoch": 177.0, "eval_accuracy": 0.7, "eval_loss": 0.8207017183303833, "eval_runtime": 0.8152, "eval_samples_per_second": 85.873, "eval_steps_per_second": 2.454, "step": 354 }, { "epoch": 178.0, "eval_accuracy": 0.7, "eval_loss": 0.8204047679901123, "eval_runtime": 0.7773, "eval_samples_per_second": 90.05, "eval_steps_per_second": 2.573, "step": 356 }, { "epoch": 179.0, "eval_accuracy": 0.7, "eval_loss": 0.8200381398200989, "eval_runtime": 0.6533, "eval_samples_per_second": 107.15, "eval_steps_per_second": 3.061, "step": 358 }, { "epoch": 180.0, "learning_rate": 1.25e-05, "loss": 0.879, "step": 360 }, { "epoch": 180.0, "eval_accuracy": 0.7, "eval_loss": 0.8197112083435059, "eval_runtime": 0.8254, "eval_samples_per_second": 84.803, "eval_steps_per_second": 2.423, "step": 360 }, { "epoch": 181.0, "eval_accuracy": 0.7, "eval_loss": 0.8194140195846558, "eval_runtime": 0.6736, "eval_samples_per_second": 103.918, "eval_steps_per_second": 2.969, "step": 362 }, { "epoch": 182.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8190609812736511, "eval_runtime": 0.6501, "eval_samples_per_second": 107.669, "eval_steps_per_second": 3.076, "step": 364 }, { "epoch": 183.0, "eval_accuracy": 0.6857142857142857, "eval_loss": 0.8187218308448792, "eval_runtime": 0.7205, "eval_samples_per_second": 97.148, "eval_steps_per_second": 2.776, "step": 366 }, { "epoch": 184.0, "eval_accuracy": 0.7, "eval_loss": 0.8184635639190674, "eval_runtime": 0.656, "eval_samples_per_second": 106.712, "eval_steps_per_second": 3.049, "step": 368 }, { "epoch": 185.0, "learning_rate": 9.375000000000001e-06, "loss": 0.8893, "step": 370 }, { "epoch": 185.0, "eval_accuracy": 0.7, "eval_loss": 0.8182028532028198, "eval_runtime": 0.6563, "eval_samples_per_second": 106.666, "eval_steps_per_second": 3.048, "step": 370 }, { "epoch": 186.0, "eval_accuracy": 0.7, "eval_loss": 0.8179557919502258, "eval_runtime": 0.6961, "eval_samples_per_second": 100.563, "eval_steps_per_second": 2.873, "step": 372 }, { "epoch": 187.0, "eval_accuracy": 0.7, "eval_loss": 0.8177469372749329, "eval_runtime": 0.6584, "eval_samples_per_second": 106.311, "eval_steps_per_second": 3.037, "step": 374 }, { "epoch": 188.0, "eval_accuracy": 0.7, "eval_loss": 0.8175888657569885, "eval_runtime": 0.6728, "eval_samples_per_second": 104.046, "eval_steps_per_second": 2.973, "step": 376 }, { "epoch": 189.0, "eval_accuracy": 0.7, "eval_loss": 0.8174628615379333, "eval_runtime": 0.661, "eval_samples_per_second": 105.894, "eval_steps_per_second": 3.026, "step": 378 }, { "epoch": 190.0, "learning_rate": 6.25e-06, "loss": 0.8501, "step": 380 }, { "epoch": 190.0, "eval_accuracy": 0.7, "eval_loss": 0.8172903656959534, "eval_runtime": 0.6643, "eval_samples_per_second": 105.379, "eval_steps_per_second": 3.011, "step": 380 }, { "epoch": 191.0, "eval_accuracy": 0.7, "eval_loss": 0.8171139359474182, "eval_runtime": 0.7224, "eval_samples_per_second": 96.898, "eval_steps_per_second": 2.769, "step": 382 }, { "epoch": 192.0, "eval_accuracy": 0.7, "eval_loss": 0.8169858455657959, "eval_runtime": 0.6822, "eval_samples_per_second": 102.605, "eval_steps_per_second": 2.932, "step": 384 }, { "epoch": 193.0, "eval_accuracy": 0.7, "eval_loss": 0.8169211149215698, "eval_runtime": 0.6488, "eval_samples_per_second": 107.887, "eval_steps_per_second": 3.082, "step": 386 }, { "epoch": 194.0, "eval_accuracy": 0.7, "eval_loss": 0.8168790340423584, "eval_runtime": 0.8355, "eval_samples_per_second": 83.778, "eval_steps_per_second": 2.394, "step": 388 }, { "epoch": 195.0, "learning_rate": 3.125e-06, "loss": 0.8611, "step": 390 }, { "epoch": 195.0, "eval_accuracy": 0.7, "eval_loss": 0.8168440461158752, "eval_runtime": 0.6488, "eval_samples_per_second": 107.884, "eval_steps_per_second": 3.082, "step": 390 }, { "epoch": 196.0, "eval_accuracy": 0.7, "eval_loss": 0.8168230056762695, "eval_runtime": 0.6602, "eval_samples_per_second": 106.026, "eval_steps_per_second": 3.029, "step": 392 }, { "epoch": 197.0, "eval_accuracy": 0.7, "eval_loss": 0.8167951107025146, "eval_runtime": 0.8588, "eval_samples_per_second": 81.511, "eval_steps_per_second": 2.329, "step": 394 }, { "epoch": 198.0, "eval_accuracy": 0.7, "eval_loss": 0.8167835474014282, "eval_runtime": 0.6762, "eval_samples_per_second": 103.513, "eval_steps_per_second": 2.958, "step": 396 }, { "epoch": 199.0, "eval_accuracy": 0.7, "eval_loss": 0.8167732954025269, "eval_runtime": 0.6596, "eval_samples_per_second": 106.128, "eval_steps_per_second": 3.032, "step": 398 }, { "epoch": 200.0, "learning_rate": 0.0, "loss": 0.8881, "step": 400 }, { "epoch": 200.0, "eval_accuracy": 0.7, "eval_loss": 0.8167622089385986, "eval_runtime": 0.844, "eval_samples_per_second": 82.939, "eval_steps_per_second": 2.37, "step": 400 }, { "epoch": 200.0, "step": 400, "total_flos": 2.2371640252416e+18, "train_loss": 0.9259392237663269, "train_runtime": 1042.9233, "train_samples_per_second": 86.296, "train_steps_per_second": 0.384 } ], "logging_steps": 10, "max_steps": 400, "num_train_epochs": 200, "save_steps": 500, "total_flos": 2.2371640252416e+18, "trial_name": null, "trial_params": null }