adapters-mistral-bf16-QLORA-super_glue-axb
/
trainer_state-mistral-bf16-QLORA-super_glue-axb-sequence_classification.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 10.0, | |
"eval_steps": 1, | |
"global_step": 70, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.14285714285714285, | |
"grad_norm": 115.0, | |
"learning_rate": 2.5e-05, | |
"loss": 2.3293, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.14285714285714285, | |
"eval_loss": 2.3536651134490967, | |
"eval_matthews_correlation": -0.06591512432573125, | |
"eval_runtime": 2.3426, | |
"eval_samples_per_second": 94.34, | |
"eval_steps_per_second": 1.708, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.2857142857142857, | |
"grad_norm": 57.75, | |
"learning_rate": 5e-05, | |
"loss": 1.5584, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.2857142857142857, | |
"eval_loss": 1.9677380323410034, | |
"eval_matthews_correlation": -0.006695650197517534, | |
"eval_runtime": 2.4387, | |
"eval_samples_per_second": 90.624, | |
"eval_steps_per_second": 1.64, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.42857142857142855, | |
"grad_norm": 61.0, | |
"learning_rate": 4.9264705882352944e-05, | |
"loss": 1.8664, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.42857142857142855, | |
"eval_loss": 3.9521396160125732, | |
"eval_matthews_correlation": 0.19430266940230304, | |
"eval_runtime": 2.4032, | |
"eval_samples_per_second": 91.959, | |
"eval_steps_per_second": 1.664, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.5714285714285714, | |
"grad_norm": 304.0, | |
"learning_rate": 4.8529411764705885e-05, | |
"loss": 3.7735, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.5714285714285714, | |
"eval_loss": 1.8678630590438843, | |
"eval_matthews_correlation": 0.4251456680325992, | |
"eval_runtime": 2.4969, | |
"eval_samples_per_second": 88.509, | |
"eval_steps_per_second": 1.602, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.7142857142857143, | |
"grad_norm": 87.0, | |
"learning_rate": 4.7794117647058826e-05, | |
"loss": 1.9643, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.7142857142857143, | |
"eval_loss": 2.8472695350646973, | |
"eval_matthews_correlation": 0.33395224488110414, | |
"eval_runtime": 2.433, | |
"eval_samples_per_second": 90.836, | |
"eval_steps_per_second": 1.644, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.8571428571428571, | |
"grad_norm": 230.0, | |
"learning_rate": 4.705882352941177e-05, | |
"loss": 3.0758, | |
"step": 6 | |
}, | |
{ | |
"epoch": 0.8571428571428571, | |
"eval_loss": 2.5650100708007812, | |
"eval_matthews_correlation": 0.3136710011001962, | |
"eval_runtime": 2.5174, | |
"eval_samples_per_second": 87.789, | |
"eval_steps_per_second": 1.589, | |
"step": 6 | |
}, | |
{ | |
"epoch": 1.0, | |
"grad_norm": 239.0, | |
"learning_rate": 4.632352941176471e-05, | |
"loss": 3.3907, | |
"step": 7 | |
}, | |
{ | |
"epoch": 1.0, | |
"eval_loss": 1.2579513788223267, | |
"eval_matthews_correlation": 0.41294426532673595, | |
"eval_runtime": 2.3986, | |
"eval_samples_per_second": 92.137, | |
"eval_steps_per_second": 1.668, | |
"step": 7 | |
}, | |
{ | |
"epoch": 1.1428571428571428, | |
"grad_norm": 52.75, | |
"learning_rate": 4.558823529411765e-05, | |
"loss": 1.0873, | |
"step": 8 | |
}, | |
{ | |
"epoch": 1.1428571428571428, | |
"eval_loss": 2.0917861461639404, | |
"eval_matthews_correlation": 0.362354082768012, | |
"eval_runtime": 2.3981, | |
"eval_samples_per_second": 92.157, | |
"eval_steps_per_second": 1.668, | |
"step": 8 | |
}, | |
{ | |
"epoch": 1.2857142857142856, | |
"grad_norm": 178.0, | |
"learning_rate": 4.485294117647059e-05, | |
"loss": 1.5217, | |
"step": 9 | |
}, | |
{ | |
"epoch": 1.2857142857142856, | |
"eval_loss": 2.355184555053711, | |
"eval_matthews_correlation": 0.291910232021563, | |
"eval_runtime": 2.4479, | |
"eval_samples_per_second": 90.283, | |
"eval_steps_per_second": 1.634, | |
"step": 9 | |
}, | |
{ | |
"epoch": 1.4285714285714286, | |
"grad_norm": 174.0, | |
"learning_rate": 4.411764705882353e-05, | |
"loss": 1.381, | |
"step": 10 | |
}, | |
{ | |
"epoch": 1.4285714285714286, | |
"eval_loss": 1.6294102668762207, | |
"eval_matthews_correlation": 0.42663939531137113, | |
"eval_runtime": 2.4472, | |
"eval_samples_per_second": 90.306, | |
"eval_steps_per_second": 1.635, | |
"step": 10 | |
}, | |
{ | |
"epoch": 1.5714285714285714, | |
"grad_norm": 173.0, | |
"learning_rate": 4.3382352941176474e-05, | |
"loss": 1.837, | |
"step": 11 | |
}, | |
{ | |
"epoch": 1.5714285714285714, | |
"eval_loss": 1.2466400861740112, | |
"eval_matthews_correlation": 0.4498698839028164, | |
"eval_runtime": 2.4515, | |
"eval_samples_per_second": 90.147, | |
"eval_steps_per_second": 1.632, | |
"step": 11 | |
}, | |
{ | |
"epoch": 1.7142857142857144, | |
"grad_norm": 38.75, | |
"learning_rate": 4.2647058823529415e-05, | |
"loss": 1.0087, | |
"step": 12 | |
}, | |
{ | |
"epoch": 1.7142857142857144, | |
"eval_loss": 1.7990175485610962, | |
"eval_matthews_correlation": 0.27245548483246373, | |
"eval_runtime": 2.4526, | |
"eval_samples_per_second": 90.108, | |
"eval_steps_per_second": 1.631, | |
"step": 12 | |
}, | |
{ | |
"epoch": 1.8571428571428572, | |
"grad_norm": 177.0, | |
"learning_rate": 4.1911764705882356e-05, | |
"loss": 1.3649, | |
"step": 13 | |
}, | |
{ | |
"epoch": 1.8571428571428572, | |
"eval_loss": 1.8390474319458008, | |
"eval_matthews_correlation": 0.25282726975747233, | |
"eval_runtime": 2.4436, | |
"eval_samples_per_second": 90.44, | |
"eval_steps_per_second": 1.637, | |
"step": 13 | |
}, | |
{ | |
"epoch": 2.0, | |
"grad_norm": 188.0, | |
"learning_rate": 4.11764705882353e-05, | |
"loss": 1.4765, | |
"step": 14 | |
}, | |
{ | |
"epoch": 2.0, | |
"eval_loss": 1.3225667476654053, | |
"eval_matthews_correlation": 0.42075821027925114, | |
"eval_runtime": 2.4529, | |
"eval_samples_per_second": 90.099, | |
"eval_steps_per_second": 1.631, | |
"step": 14 | |
}, | |
{ | |
"epoch": 2.142857142857143, | |
"grad_norm": 90.5, | |
"learning_rate": 4.044117647058824e-05, | |
"loss": 0.721, | |
"step": 15 | |
}, | |
{ | |
"epoch": 2.142857142857143, | |
"eval_loss": 1.0870685577392578, | |
"eval_matthews_correlation": 0.3977728811458742, | |
"eval_runtime": 2.4487, | |
"eval_samples_per_second": 90.251, | |
"eval_steps_per_second": 1.633, | |
"step": 15 | |
}, | |
{ | |
"epoch": 2.2857142857142856, | |
"grad_norm": 30.75, | |
"learning_rate": 3.970588235294117e-05, | |
"loss": 0.5558, | |
"step": 16 | |
}, | |
{ | |
"epoch": 2.2857142857142856, | |
"eval_loss": 1.6198923587799072, | |
"eval_matthews_correlation": 0.3547921342690866, | |
"eval_runtime": 2.4476, | |
"eval_samples_per_second": 90.293, | |
"eval_steps_per_second": 1.634, | |
"step": 16 | |
}, | |
{ | |
"epoch": 2.4285714285714284, | |
"grad_norm": 98.5, | |
"learning_rate": 3.897058823529412e-05, | |
"loss": 0.6862, | |
"step": 17 | |
}, | |
{ | |
"epoch": 2.4285714285714284, | |
"eval_loss": 1.977512001991272, | |
"eval_matthews_correlation": 0.325163923401339, | |
"eval_runtime": 2.451, | |
"eval_samples_per_second": 90.168, | |
"eval_steps_per_second": 1.632, | |
"step": 17 | |
}, | |
{ | |
"epoch": 2.571428571428571, | |
"grad_norm": 147.0, | |
"learning_rate": 3.8235294117647055e-05, | |
"loss": 1.2147, | |
"step": 18 | |
}, | |
{ | |
"epoch": 2.571428571428571, | |
"eval_loss": 1.86572265625, | |
"eval_matthews_correlation": 0.34367974406613594, | |
"eval_runtime": 2.444, | |
"eval_samples_per_second": 90.427, | |
"eval_steps_per_second": 1.637, | |
"step": 18 | |
}, | |
{ | |
"epoch": 2.7142857142857144, | |
"grad_norm": 123.5, | |
"learning_rate": 3.7500000000000003e-05, | |
"loss": 0.9887, | |
"step": 19 | |
}, | |
{ | |
"epoch": 2.7142857142857144, | |
"eval_loss": 1.4242559671401978, | |
"eval_matthews_correlation": 0.4393080679849749, | |
"eval_runtime": 2.4464, | |
"eval_samples_per_second": 90.337, | |
"eval_steps_per_second": 1.635, | |
"step": 19 | |
}, | |
{ | |
"epoch": 2.857142857142857, | |
"grad_norm": 112.0, | |
"learning_rate": 3.6764705882352945e-05, | |
"loss": 0.9255, | |
"step": 20 | |
}, | |
{ | |
"epoch": 2.857142857142857, | |
"eval_loss": 1.0208874940872192, | |
"eval_matthews_correlation": 0.4561037659877011, | |
"eval_runtime": 2.4474, | |
"eval_samples_per_second": 90.299, | |
"eval_steps_per_second": 1.634, | |
"step": 20 | |
}, | |
{ | |
"epoch": 3.0, | |
"grad_norm": 28.75, | |
"learning_rate": 3.6029411764705886e-05, | |
"loss": 0.521, | |
"step": 21 | |
}, | |
{ | |
"epoch": 3.0, | |
"eval_loss": 1.15372896194458, | |
"eval_matthews_correlation": 0.42984107582508957, | |
"eval_runtime": 2.4504, | |
"eval_samples_per_second": 90.19, | |
"eval_steps_per_second": 1.632, | |
"step": 21 | |
}, | |
{ | |
"epoch": 3.142857142857143, | |
"grad_norm": 103.0, | |
"learning_rate": 3.529411764705883e-05, | |
"loss": 0.6449, | |
"step": 22 | |
}, | |
{ | |
"epoch": 3.142857142857143, | |
"eval_loss": 1.4311715364456177, | |
"eval_matthews_correlation": 0.35056721354125436, | |
"eval_runtime": 2.4502, | |
"eval_samples_per_second": 90.197, | |
"eval_steps_per_second": 1.633, | |
"step": 22 | |
}, | |
{ | |
"epoch": 3.2857142857142856, | |
"grad_norm": 120.5, | |
"learning_rate": 3.455882352941177e-05, | |
"loss": 0.8555, | |
"step": 23 | |
}, | |
{ | |
"epoch": 3.2857142857142856, | |
"eval_loss": 1.412217140197754, | |
"eval_matthews_correlation": 0.3570256366956951, | |
"eval_runtime": 2.4452, | |
"eval_samples_per_second": 90.382, | |
"eval_steps_per_second": 1.636, | |
"step": 23 | |
}, | |
{ | |
"epoch": 3.4285714285714284, | |
"grad_norm": 91.5, | |
"learning_rate": 3.382352941176471e-05, | |
"loss": 0.7243, | |
"step": 24 | |
}, | |
{ | |
"epoch": 3.4285714285714284, | |
"eval_loss": 1.155587077140808, | |
"eval_matthews_correlation": 0.4394341621715417, | |
"eval_runtime": 2.45, | |
"eval_samples_per_second": 90.205, | |
"eval_steps_per_second": 1.633, | |
"step": 24 | |
}, | |
{ | |
"epoch": 3.571428571428571, | |
"grad_norm": 57.0, | |
"learning_rate": 3.308823529411765e-05, | |
"loss": 0.3214, | |
"step": 25 | |
}, | |
{ | |
"epoch": 3.571428571428571, | |
"eval_loss": 0.9137119054794312, | |
"eval_matthews_correlation": 0.5300936664086263, | |
"eval_runtime": 2.4473, | |
"eval_samples_per_second": 90.304, | |
"eval_steps_per_second": 1.634, | |
"step": 25 | |
}, | |
{ | |
"epoch": 3.7142857142857144, | |
"grad_norm": 40.0, | |
"learning_rate": 3.235294117647059e-05, | |
"loss": 0.3758, | |
"step": 26 | |
}, | |
{ | |
"epoch": 3.7142857142857144, | |
"eval_loss": 0.9561758041381836, | |
"eval_matthews_correlation": 0.4651714656882337, | |
"eval_runtime": 2.4463, | |
"eval_samples_per_second": 90.34, | |
"eval_steps_per_second": 1.635, | |
"step": 26 | |
}, | |
{ | |
"epoch": 3.857142857142857, | |
"grad_norm": 21.75, | |
"learning_rate": 3.161764705882353e-05, | |
"loss": 0.2258, | |
"step": 27 | |
}, | |
{ | |
"epoch": 3.857142857142857, | |
"eval_loss": 1.128272533416748, | |
"eval_matthews_correlation": 0.4814890197334374, | |
"eval_runtime": 2.4491, | |
"eval_samples_per_second": 90.237, | |
"eval_steps_per_second": 1.633, | |
"step": 27 | |
}, | |
{ | |
"epoch": 4.0, | |
"grad_norm": 69.0, | |
"learning_rate": 3.0882352941176475e-05, | |
"loss": 0.3708, | |
"step": 28 | |
}, | |
{ | |
"epoch": 4.0, | |
"eval_loss": 1.177422285079956, | |
"eval_matthews_correlation": 0.4998140485129056, | |
"eval_runtime": 2.4044, | |
"eval_samples_per_second": 91.916, | |
"eval_steps_per_second": 1.664, | |
"step": 28 | |
}, | |
{ | |
"epoch": 4.142857142857143, | |
"grad_norm": 67.0, | |
"learning_rate": 3.0147058823529413e-05, | |
"loss": 0.3731, | |
"step": 29 | |
}, | |
{ | |
"epoch": 4.142857142857143, | |
"eval_loss": 1.086193323135376, | |
"eval_matthews_correlation": 0.4918366209765684, | |
"eval_runtime": 2.4518, | |
"eval_samples_per_second": 90.139, | |
"eval_steps_per_second": 1.631, | |
"step": 29 | |
}, | |
{ | |
"epoch": 4.285714285714286, | |
"grad_norm": 101.5, | |
"learning_rate": 2.9411764705882354e-05, | |
"loss": 0.4808, | |
"step": 30 | |
}, | |
{ | |
"epoch": 4.285714285714286, | |
"eval_loss": 0.9441786408424377, | |
"eval_matthews_correlation": 0.46521158567025794, | |
"eval_runtime": 2.4478, | |
"eval_samples_per_second": 90.284, | |
"eval_steps_per_second": 1.634, | |
"step": 30 | |
}, | |
{ | |
"epoch": 4.428571428571429, | |
"grad_norm": 55.25, | |
"learning_rate": 2.8676470588235295e-05, | |
"loss": 0.2656, | |
"step": 31 | |
}, | |
{ | |
"epoch": 4.428571428571429, | |
"eval_loss": 0.8748857975006104, | |
"eval_matthews_correlation": 0.4648371224980464, | |
"eval_runtime": 2.4437, | |
"eval_samples_per_second": 90.435, | |
"eval_steps_per_second": 1.637, | |
"step": 31 | |
}, | |
{ | |
"epoch": 4.571428571428571, | |
"grad_norm": 11.6875, | |
"learning_rate": 2.7941176470588236e-05, | |
"loss": 0.2129, | |
"step": 32 | |
}, | |
{ | |
"epoch": 4.571428571428571, | |
"eval_loss": 0.9578605890274048, | |
"eval_matthews_correlation": 0.48306400916503295, | |
"eval_runtime": 2.4414, | |
"eval_samples_per_second": 90.523, | |
"eval_steps_per_second": 1.638, | |
"step": 32 | |
}, | |
{ | |
"epoch": 4.714285714285714, | |
"grad_norm": 37.75, | |
"learning_rate": 2.7205882352941174e-05, | |
"loss": 0.1662, | |
"step": 33 | |
}, | |
{ | |
"epoch": 4.714285714285714, | |
"eval_loss": 1.0285699367523193, | |
"eval_matthews_correlation": 0.45099444835506275, | |
"eval_runtime": 2.4439, | |
"eval_samples_per_second": 90.428, | |
"eval_steps_per_second": 1.637, | |
"step": 33 | |
}, | |
{ | |
"epoch": 4.857142857142857, | |
"grad_norm": 72.0, | |
"learning_rate": 2.647058823529412e-05, | |
"loss": 0.3764, | |
"step": 34 | |
}, | |
{ | |
"epoch": 4.857142857142857, | |
"eval_loss": 1.0230013132095337, | |
"eval_matthews_correlation": 0.44663306131570457, | |
"eval_runtime": 2.4405, | |
"eval_samples_per_second": 90.554, | |
"eval_steps_per_second": 1.639, | |
"step": 34 | |
}, | |
{ | |
"epoch": 5.0, | |
"grad_norm": 52.0, | |
"learning_rate": 2.5735294117647057e-05, | |
"loss": 0.2526, | |
"step": 35 | |
}, | |
{ | |
"epoch": 5.0, | |
"eval_loss": 0.9536852836608887, | |
"eval_matthews_correlation": 0.464431647717182, | |
"eval_runtime": 2.4485, | |
"eval_samples_per_second": 90.259, | |
"eval_steps_per_second": 1.634, | |
"step": 35 | |
}, | |
{ | |
"epoch": 5.142857142857143, | |
"grad_norm": 29.625, | |
"learning_rate": 2.5e-05, | |
"loss": 0.1377, | |
"step": 36 | |
}, | |
{ | |
"epoch": 5.142857142857143, | |
"eval_loss": 0.9085801243782043, | |
"eval_matthews_correlation": 0.4468374029095797, | |
"eval_runtime": 2.4414, | |
"eval_samples_per_second": 90.521, | |
"eval_steps_per_second": 1.638, | |
"step": 36 | |
}, | |
{ | |
"epoch": 5.285714285714286, | |
"grad_norm": 7.9375, | |
"learning_rate": 2.4264705882352942e-05, | |
"loss": 0.0878, | |
"step": 37 | |
}, | |
{ | |
"epoch": 5.285714285714286, | |
"eval_loss": 0.9484843015670776, | |
"eval_matthews_correlation": 0.4447429282606205, | |
"eval_runtime": 2.4472, | |
"eval_samples_per_second": 90.308, | |
"eval_steps_per_second": 1.635, | |
"step": 37 | |
}, | |
{ | |
"epoch": 5.428571428571429, | |
"grad_norm": 35.75, | |
"learning_rate": 2.3529411764705884e-05, | |
"loss": 0.1931, | |
"step": 38 | |
}, | |
{ | |
"epoch": 5.428571428571429, | |
"eval_loss": 0.9881438612937927, | |
"eval_matthews_correlation": 0.45462446923262295, | |
"eval_runtime": 2.4467, | |
"eval_samples_per_second": 90.327, | |
"eval_steps_per_second": 1.635, | |
"step": 38 | |
}, | |
{ | |
"epoch": 5.571428571428571, | |
"grad_norm": 30.5, | |
"learning_rate": 2.2794117647058825e-05, | |
"loss": 0.2088, | |
"step": 39 | |
}, | |
{ | |
"epoch": 5.571428571428571, | |
"eval_loss": 0.9699224829673767, | |
"eval_matthews_correlation": 0.46521158567025794, | |
"eval_runtime": 2.4373, | |
"eval_samples_per_second": 90.673, | |
"eval_steps_per_second": 1.641, | |
"step": 39 | |
}, | |
{ | |
"epoch": 5.714285714285714, | |
"grad_norm": 18.125, | |
"learning_rate": 2.2058823529411766e-05, | |
"loss": 0.0894, | |
"step": 40 | |
}, | |
{ | |
"epoch": 5.714285714285714, | |
"eval_loss": 0.9157021045684814, | |
"eval_matthews_correlation": 0.4757491334290412, | |
"eval_runtime": 2.4471, | |
"eval_samples_per_second": 90.312, | |
"eval_steps_per_second": 1.635, | |
"step": 40 | |
}, | |
{ | |
"epoch": 5.857142857142857, | |
"grad_norm": 13.75, | |
"learning_rate": 2.1323529411764707e-05, | |
"loss": 0.0847, | |
"step": 41 | |
}, | |
{ | |
"epoch": 5.857142857142857, | |
"eval_loss": 0.875838041305542, | |
"eval_matthews_correlation": 0.4535865464379655, | |
"eval_runtime": 2.4434, | |
"eval_samples_per_second": 90.449, | |
"eval_steps_per_second": 1.637, | |
"step": 41 | |
}, | |
{ | |
"epoch": 6.0, | |
"grad_norm": 7.78125, | |
"learning_rate": 2.058823529411765e-05, | |
"loss": 0.0814, | |
"step": 42 | |
}, | |
{ | |
"epoch": 6.0, | |
"eval_loss": 0.8775647282600403, | |
"eval_matthews_correlation": 0.5032327641012869, | |
"eval_runtime": 2.4462, | |
"eval_samples_per_second": 90.343, | |
"eval_steps_per_second": 1.635, | |
"step": 42 | |
}, | |
{ | |
"epoch": 6.142857142857143, | |
"grad_norm": 11.1875, | |
"learning_rate": 1.9852941176470586e-05, | |
"loss": 0.0322, | |
"step": 43 | |
}, | |
{ | |
"epoch": 6.142857142857143, | |
"eval_loss": 0.8905591368675232, | |
"eval_matthews_correlation": 0.4985778131509898, | |
"eval_runtime": 2.4501, | |
"eval_samples_per_second": 90.201, | |
"eval_steps_per_second": 1.633, | |
"step": 43 | |
}, | |
{ | |
"epoch": 6.285714285714286, | |
"grad_norm": 18.875, | |
"learning_rate": 1.9117647058823528e-05, | |
"loss": 0.0929, | |
"step": 44 | |
}, | |
{ | |
"epoch": 6.285714285714286, | |
"eval_loss": 0.8745710849761963, | |
"eval_matthews_correlation": 0.5063592272792496, | |
"eval_runtime": 2.4431, | |
"eval_samples_per_second": 90.459, | |
"eval_steps_per_second": 1.637, | |
"step": 44 | |
}, | |
{ | |
"epoch": 6.428571428571429, | |
"grad_norm": 23.375, | |
"learning_rate": 1.8382352941176472e-05, | |
"loss": 0.092, | |
"step": 45 | |
}, | |
{ | |
"epoch": 6.428571428571429, | |
"eval_loss": 0.8459537625312805, | |
"eval_matthews_correlation": 0.5221955837331487, | |
"eval_runtime": 2.4409, | |
"eval_samples_per_second": 90.541, | |
"eval_steps_per_second": 1.639, | |
"step": 45 | |
}, | |
{ | |
"epoch": 6.571428571428571, | |
"grad_norm": 14.1875, | |
"learning_rate": 1.7647058823529414e-05, | |
"loss": 0.0364, | |
"step": 46 | |
}, | |
{ | |
"epoch": 6.571428571428571, | |
"eval_loss": 0.8510618805885315, | |
"eval_matthews_correlation": 0.471552805496144, | |
"eval_runtime": 2.4458, | |
"eval_samples_per_second": 90.361, | |
"eval_steps_per_second": 1.635, | |
"step": 46 | |
}, | |
{ | |
"epoch": 6.714285714285714, | |
"grad_norm": 7.65625, | |
"learning_rate": 1.6911764705882355e-05, | |
"loss": 0.0793, | |
"step": 47 | |
}, | |
{ | |
"epoch": 6.714285714285714, | |
"eval_loss": 0.8684913516044617, | |
"eval_matthews_correlation": 0.4581214898872277, | |
"eval_runtime": 2.4434, | |
"eval_samples_per_second": 90.447, | |
"eval_steps_per_second": 1.637, | |
"step": 47 | |
}, | |
{ | |
"epoch": 6.857142857142857, | |
"grad_norm": 16.875, | |
"learning_rate": 1.6176470588235296e-05, | |
"loss": 0.1175, | |
"step": 48 | |
}, | |
{ | |
"epoch": 6.857142857142857, | |
"eval_loss": 0.8669148683547974, | |
"eval_matthews_correlation": 0.4594112493758089, | |
"eval_runtime": 2.452, | |
"eval_samples_per_second": 90.13, | |
"eval_steps_per_second": 1.631, | |
"step": 48 | |
}, | |
{ | |
"epoch": 7.0, | |
"grad_norm": 12.9375, | |
"learning_rate": 1.5441176470588237e-05, | |
"loss": 0.0645, | |
"step": 49 | |
}, | |
{ | |
"epoch": 7.0, | |
"eval_loss": 0.8497275710105896, | |
"eval_matthews_correlation": 0.471552805496144, | |
"eval_runtime": 2.4455, | |
"eval_samples_per_second": 90.372, | |
"eval_steps_per_second": 1.636, | |
"step": 49 | |
}, | |
{ | |
"epoch": 7.142857142857143, | |
"grad_norm": 1.5625, | |
"learning_rate": 1.4705882352941177e-05, | |
"loss": 0.0144, | |
"step": 50 | |
}, | |
{ | |
"epoch": 7.142857142857143, | |
"eval_loss": 0.8556525707244873, | |
"eval_matthews_correlation": 0.48388603901876864, | |
"eval_runtime": 2.4464, | |
"eval_samples_per_second": 90.335, | |
"eval_steps_per_second": 1.635, | |
"step": 50 | |
}, | |
{ | |
"epoch": 7.285714285714286, | |
"grad_norm": 4.1875, | |
"learning_rate": 1.3970588235294118e-05, | |
"loss": 0.0368, | |
"step": 51 | |
}, | |
{ | |
"epoch": 7.285714285714286, | |
"eval_loss": 0.8561367988586426, | |
"eval_matthews_correlation": 0.498477763912085, | |
"eval_runtime": 2.4469, | |
"eval_samples_per_second": 90.318, | |
"eval_steps_per_second": 1.635, | |
"step": 51 | |
}, | |
{ | |
"epoch": 7.428571428571429, | |
"grad_norm": 5.15625, | |
"learning_rate": 1.323529411764706e-05, | |
"loss": 0.0434, | |
"step": 52 | |
}, | |
{ | |
"epoch": 7.428571428571429, | |
"eval_loss": 0.8746027946472168, | |
"eval_matthews_correlation": 0.49478720037093177, | |
"eval_runtime": 2.4474, | |
"eval_samples_per_second": 90.301, | |
"eval_steps_per_second": 1.634, | |
"step": 52 | |
}, | |
{ | |
"epoch": 7.571428571428571, | |
"grad_norm": 8.9375, | |
"learning_rate": 1.25e-05, | |
"loss": 0.0601, | |
"step": 53 | |
}, | |
{ | |
"epoch": 7.571428571428571, | |
"eval_loss": 0.8838711977005005, | |
"eval_matthews_correlation": 0.4974465105449455, | |
"eval_runtime": 2.4486, | |
"eval_samples_per_second": 90.257, | |
"eval_steps_per_second": 1.634, | |
"step": 53 | |
}, | |
{ | |
"epoch": 7.714285714285714, | |
"grad_norm": 9.375, | |
"learning_rate": 1.1764705882352942e-05, | |
"loss": 0.0495, | |
"step": 54 | |
}, | |
{ | |
"epoch": 7.714285714285714, | |
"eval_loss": 0.9004490375518799, | |
"eval_matthews_correlation": 0.4974465105449455, | |
"eval_runtime": 2.4468, | |
"eval_samples_per_second": 90.322, | |
"eval_steps_per_second": 1.635, | |
"step": 54 | |
}, | |
{ | |
"epoch": 7.857142857142857, | |
"grad_norm": 10.375, | |
"learning_rate": 1.1029411764705883e-05, | |
"loss": 0.038, | |
"step": 55 | |
}, | |
{ | |
"epoch": 7.857142857142857, | |
"eval_loss": 0.9040365219116211, | |
"eval_matthews_correlation": 0.4922849337838682, | |
"eval_runtime": 2.4021, | |
"eval_samples_per_second": 92.005, | |
"eval_steps_per_second": 1.665, | |
"step": 55 | |
}, | |
{ | |
"epoch": 8.0, | |
"grad_norm": 5.46875, | |
"learning_rate": 1.0294117647058824e-05, | |
"loss": 0.0201, | |
"step": 56 | |
}, | |
{ | |
"epoch": 8.0, | |
"eval_loss": 0.9134412407875061, | |
"eval_matthews_correlation": 0.45553170828567224, | |
"eval_runtime": 2.4448, | |
"eval_samples_per_second": 90.396, | |
"eval_steps_per_second": 1.636, | |
"step": 56 | |
}, | |
{ | |
"epoch": 8.142857142857142, | |
"grad_norm": 5.59375, | |
"learning_rate": 9.558823529411764e-06, | |
"loss": 0.0205, | |
"step": 57 | |
}, | |
{ | |
"epoch": 8.142857142857142, | |
"eval_loss": 0.927843451499939, | |
"eval_matthews_correlation": 0.47656224625627874, | |
"eval_runtime": 2.3994, | |
"eval_samples_per_second": 92.108, | |
"eval_steps_per_second": 1.667, | |
"step": 57 | |
}, | |
{ | |
"epoch": 8.285714285714286, | |
"grad_norm": 4.0, | |
"learning_rate": 8.823529411764707e-06, | |
"loss": 0.0161, | |
"step": 58 | |
}, | |
{ | |
"epoch": 8.285714285714286, | |
"eval_loss": 0.9634011387825012, | |
"eval_matthews_correlation": 0.496768767764674, | |
"eval_runtime": 2.4014, | |
"eval_samples_per_second": 92.028, | |
"eval_steps_per_second": 1.666, | |
"step": 58 | |
}, | |
{ | |
"epoch": 8.428571428571429, | |
"grad_norm": 12.9375, | |
"learning_rate": 8.088235294117648e-06, | |
"loss": 0.0345, | |
"step": 59 | |
}, | |
{ | |
"epoch": 8.428571428571429, | |
"eval_loss": 0.9694340825080872, | |
"eval_matthews_correlation": 0.4972077082664266, | |
"eval_runtime": 2.4455, | |
"eval_samples_per_second": 90.37, | |
"eval_steps_per_second": 1.636, | |
"step": 59 | |
}, | |
{ | |
"epoch": 8.571428571428571, | |
"grad_norm": 4.84375, | |
"learning_rate": 7.3529411764705884e-06, | |
"loss": 0.014, | |
"step": 60 | |
}, | |
{ | |
"epoch": 8.571428571428571, | |
"eval_loss": 0.9751154184341431, | |
"eval_matthews_correlation": 0.4972077082664266, | |
"eval_runtime": 2.3952, | |
"eval_samples_per_second": 92.267, | |
"eval_steps_per_second": 1.67, | |
"step": 60 | |
}, | |
{ | |
"epoch": 8.714285714285714, | |
"grad_norm": 15.6875, | |
"learning_rate": 6.61764705882353e-06, | |
"loss": 0.038, | |
"step": 61 | |
}, | |
{ | |
"epoch": 8.714285714285714, | |
"eval_loss": 0.9702379107475281, | |
"eval_matthews_correlation": 0.5075469372121825, | |
"eval_runtime": 2.3957, | |
"eval_samples_per_second": 92.25, | |
"eval_steps_per_second": 1.67, | |
"step": 61 | |
}, | |
{ | |
"epoch": 8.857142857142858, | |
"grad_norm": 7.78125, | |
"learning_rate": 5.882352941176471e-06, | |
"loss": 0.0316, | |
"step": 62 | |
}, | |
{ | |
"epoch": 8.857142857142858, | |
"eval_loss": 0.9534982442855835, | |
"eval_matthews_correlation": 0.517455027315522, | |
"eval_runtime": 2.3965, | |
"eval_samples_per_second": 92.218, | |
"eval_steps_per_second": 1.669, | |
"step": 62 | |
}, | |
{ | |
"epoch": 9.0, | |
"grad_norm": 10.4375, | |
"learning_rate": 5.147058823529412e-06, | |
"loss": 0.0253, | |
"step": 63 | |
}, | |
{ | |
"epoch": 9.0, | |
"eval_loss": 0.9324150681495667, | |
"eval_matthews_correlation": 0.4687627817830735, | |
"eval_runtime": 2.4459, | |
"eval_samples_per_second": 90.356, | |
"eval_steps_per_second": 1.635, | |
"step": 63 | |
}, | |
{ | |
"epoch": 9.142857142857142, | |
"grad_norm": 2.6875, | |
"learning_rate": 4.411764705882353e-06, | |
"loss": 0.0231, | |
"step": 64 | |
}, | |
{ | |
"epoch": 9.142857142857142, | |
"eval_loss": 0.9279094934463501, | |
"eval_matthews_correlation": 0.45553170828567224, | |
"eval_runtime": 2.4506, | |
"eval_samples_per_second": 90.182, | |
"eval_steps_per_second": 1.632, | |
"step": 64 | |
}, | |
{ | |
"epoch": 9.285714285714286, | |
"grad_norm": 1.40625, | |
"learning_rate": 3.6764705882352942e-06, | |
"loss": 0.0075, | |
"step": 65 | |
}, | |
{ | |
"epoch": 9.285714285714286, | |
"eval_loss": 0.9252597689628601, | |
"eval_matthews_correlation": 0.45144276042785747, | |
"eval_runtime": 2.4513, | |
"eval_samples_per_second": 90.155, | |
"eval_steps_per_second": 1.632, | |
"step": 65 | |
}, | |
{ | |
"epoch": 9.428571428571429, | |
"grad_norm": 2.515625, | |
"learning_rate": 2.9411764705882355e-06, | |
"loss": 0.0108, | |
"step": 66 | |
}, | |
{ | |
"epoch": 9.428571428571429, | |
"eval_loss": 0.9272325038909912, | |
"eval_matthews_correlation": 0.47312414294796906, | |
"eval_runtime": 2.4484, | |
"eval_samples_per_second": 90.261, | |
"eval_steps_per_second": 1.634, | |
"step": 66 | |
}, | |
{ | |
"epoch": 9.571428571428571, | |
"grad_norm": 1.21875, | |
"learning_rate": 2.2058823529411767e-06, | |
"loss": 0.0045, | |
"step": 67 | |
}, | |
{ | |
"epoch": 9.571428571428571, | |
"eval_loss": 0.9301801919937134, | |
"eval_matthews_correlation": 0.4865760231907581, | |
"eval_runtime": 2.4029, | |
"eval_samples_per_second": 91.971, | |
"eval_steps_per_second": 1.665, | |
"step": 67 | |
}, | |
{ | |
"epoch": 9.714285714285714, | |
"grad_norm": 3.9375, | |
"learning_rate": 1.4705882352941177e-06, | |
"loss": 0.0132, | |
"step": 68 | |
}, | |
{ | |
"epoch": 9.714285714285714, | |
"eval_loss": 0.9324151873588562, | |
"eval_matthews_correlation": 0.4865760231907581, | |
"eval_runtime": 2.4466, | |
"eval_samples_per_second": 90.33, | |
"eval_steps_per_second": 1.635, | |
"step": 68 | |
}, | |
{ | |
"epoch": 9.857142857142858, | |
"grad_norm": 5.09375, | |
"learning_rate": 7.352941176470589e-07, | |
"loss": 0.0207, | |
"step": 69 | |
}, | |
{ | |
"epoch": 9.857142857142858, | |
"eval_loss": 0.9318564534187317, | |
"eval_matthews_correlation": 0.4865760231907581, | |
"eval_runtime": 2.4458, | |
"eval_samples_per_second": 90.357, | |
"eval_steps_per_second": 1.635, | |
"step": 69 | |
}, | |
{ | |
"epoch": 10.0, | |
"grad_norm": 2.40625, | |
"learning_rate": 0.0, | |
"loss": 0.009, | |
"step": 70 | |
}, | |
{ | |
"epoch": 10.0, | |
"eval_loss": 0.9318756461143494, | |
"eval_matthews_correlation": 0.4865760231907581, | |
"eval_runtime": 2.4456, | |
"eval_samples_per_second": 90.368, | |
"eval_steps_per_second": 1.636, | |
"step": 70 | |
}, | |
{ | |
"epoch": 10.0, | |
"step": 70, | |
"total_flos": 3.960220259503309e+16, | |
"train_loss": 0.5831886287379477, | |
"train_runtime": 467.92, | |
"train_samples_per_second": 18.871, | |
"train_steps_per_second": 0.15 | |
} | |
], | |
"logging_steps": 1, | |
"max_steps": 70, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 10, | |
"save_steps": 500, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": false, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 3.960220259503309e+16, | |
"train_batch_size": 16, | |
"trial_name": null, | |
"trial_params": null | |
} | |