|
{ |
|
"best_metric": 0.2633333333333333, |
|
"best_model_checkpoint": "swinv2-small-patch4-window16-256-mineral\\checkpoint-6693", |
|
"epoch": 480.0, |
|
"global_step": 9000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 5.555555555555556e-07, |
|
"loss": 5.6941, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.005, |
|
"eval_loss": 5.692106246948242, |
|
"eval_runtime": 9.9033, |
|
"eval_samples_per_second": 60.586, |
|
"eval_steps_per_second": 1.919, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.1111111111111112e-06, |
|
"loss": 5.6939, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 5.6886, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.005, |
|
"eval_loss": 5.682541370391846, |
|
"eval_runtime": 4.1429, |
|
"eval_samples_per_second": 144.826, |
|
"eval_steps_per_second": 4.586, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.2222222222222225e-06, |
|
"loss": 5.6844, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 5.6735, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.005, |
|
"eval_loss": 5.669071674346924, |
|
"eval_runtime": 4.142, |
|
"eval_samples_per_second": 144.858, |
|
"eval_steps_per_second": 4.587, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 5.6534, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 3.888888888888889e-06, |
|
"loss": 5.6521, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.0033333333333333335, |
|
"eval_loss": 5.654940605163574, |
|
"eval_runtime": 4.1572, |
|
"eval_samples_per_second": 144.326, |
|
"eval_steps_per_second": 4.57, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 4.444444444444445e-06, |
|
"loss": 5.6431, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 5e-06, |
|
"loss": 5.6394, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_accuracy": 0.0033333333333333335, |
|
"eval_loss": 5.641611099243164, |
|
"eval_runtime": 4.1535, |
|
"eval_samples_per_second": 144.457, |
|
"eval_steps_per_second": 4.574, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 5.5941, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 6.111111111111111e-06, |
|
"loss": 5.6078, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"eval_accuracy": 0.0033333333333333335, |
|
"eval_loss": 5.627758979797363, |
|
"eval_runtime": 4.1893, |
|
"eval_samples_per_second": 143.22, |
|
"eval_steps_per_second": 4.535, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 5.5762, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 7.222222222222222e-06, |
|
"loss": 5.5743, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": 0.0016666666666666668, |
|
"eval_loss": 5.612813472747803, |
|
"eval_runtime": 4.224, |
|
"eval_samples_per_second": 142.046, |
|
"eval_steps_per_second": 4.498, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 7.777777777777777e-06, |
|
"loss": 5.5413, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 5.5509, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.0016666666666666668, |
|
"eval_loss": 5.591813087463379, |
|
"eval_runtime": 4.1747, |
|
"eval_samples_per_second": 143.723, |
|
"eval_steps_per_second": 4.551, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 8.88888888888889e-06, |
|
"loss": 5.5115, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"eval_accuracy": 0.006666666666666667, |
|
"eval_loss": 5.569559097290039, |
|
"eval_runtime": 4.1739, |
|
"eval_samples_per_second": 143.75, |
|
"eval_steps_per_second": 4.552, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 5.4912, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 1e-05, |
|
"loss": 5.4411, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_accuracy": 0.01, |
|
"eval_loss": 5.5439839363098145, |
|
"eval_runtime": 4.1644, |
|
"eval_samples_per_second": 144.078, |
|
"eval_steps_per_second": 4.562, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"learning_rate": 1.0555555555555555e-05, |
|
"loss": 5.3942, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 5.3335, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_accuracy": 0.016666666666666666, |
|
"eval_loss": 5.513484001159668, |
|
"eval_runtime": 4.1932, |
|
"eval_samples_per_second": 143.088, |
|
"eval_steps_per_second": 4.531, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 1.1666666666666668e-05, |
|
"loss": 5.2998, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 1.2222222222222222e-05, |
|
"loss": 5.2413, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.021666666666666667, |
|
"eval_loss": 5.464037895202637, |
|
"eval_runtime": 4.2106, |
|
"eval_samples_per_second": 142.496, |
|
"eval_steps_per_second": 4.512, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 1.2777777777777777e-05, |
|
"loss": 5.2175, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 12.8, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 5.1738, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"eval_accuracy": 0.03333333333333333, |
|
"eval_loss": 5.408351421356201, |
|
"eval_runtime": 4.1793, |
|
"eval_samples_per_second": 143.566, |
|
"eval_steps_per_second": 4.546, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 5.0966, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 13.87, |
|
"learning_rate": 1.4444444444444444e-05, |
|
"loss": 5.0222, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"eval_accuracy": 0.045, |
|
"eval_loss": 5.3320746421813965, |
|
"eval_runtime": 4.1797, |
|
"eval_samples_per_second": 143.55, |
|
"eval_steps_per_second": 4.546, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 14.4, |
|
"learning_rate": 1.5e-05, |
|
"loss": 4.913, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 1.5555555555555555e-05, |
|
"loss": 4.8594, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_accuracy": 0.05333333333333334, |
|
"eval_loss": 5.248490333557129, |
|
"eval_runtime": 4.1704, |
|
"eval_samples_per_second": 143.871, |
|
"eval_steps_per_second": 4.556, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 15.47, |
|
"learning_rate": 1.6111111111111115e-05, |
|
"loss": 4.7591, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 4.7441, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.065, |
|
"eval_loss": 5.150908946990967, |
|
"eval_runtime": 4.1485, |
|
"eval_samples_per_second": 144.629, |
|
"eval_steps_per_second": 4.58, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 16.53, |
|
"learning_rate": 1.7222222222222224e-05, |
|
"loss": 4.5946, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"eval_accuracy": 0.07166666666666667, |
|
"eval_loss": 5.070082187652588, |
|
"eval_runtime": 4.1543, |
|
"eval_samples_per_second": 144.43, |
|
"eval_steps_per_second": 4.574, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 1.777777777777778e-05, |
|
"loss": 4.4995, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 1.8333333333333333e-05, |
|
"loss": 4.3382, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 17.97, |
|
"eval_accuracy": 0.08666666666666667, |
|
"eval_loss": 4.976734161376953, |
|
"eval_runtime": 4.158, |
|
"eval_samples_per_second": 144.301, |
|
"eval_steps_per_second": 4.57, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"learning_rate": 1.888888888888889e-05, |
|
"loss": 4.3477, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 18.67, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 4.2008, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_accuracy": 0.105, |
|
"eval_loss": 4.862234115600586, |
|
"eval_runtime": 4.1637, |
|
"eval_samples_per_second": 144.103, |
|
"eval_steps_per_second": 4.563, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 19.2, |
|
"learning_rate": 2e-05, |
|
"loss": 4.0491, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 19.73, |
|
"learning_rate": 2.0555555555555555e-05, |
|
"loss": 4.0563, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.10333333333333333, |
|
"eval_loss": 4.772627830505371, |
|
"eval_runtime": 4.1762, |
|
"eval_samples_per_second": 143.672, |
|
"eval_steps_per_second": 4.55, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 20.27, |
|
"learning_rate": 2.111111111111111e-05, |
|
"loss": 3.8101, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 2.1666666666666667e-05, |
|
"loss": 3.8064, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 20.96, |
|
"eval_accuracy": 0.115, |
|
"eval_loss": 4.689815044403076, |
|
"eval_runtime": 4.151, |
|
"eval_samples_per_second": 144.542, |
|
"eval_steps_per_second": 4.577, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 21.33, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 3.6083, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 21.87, |
|
"learning_rate": 2.277777777777778e-05, |
|
"loss": 3.5584, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 21.97, |
|
"eval_accuracy": 0.125, |
|
"eval_loss": 4.599685192108154, |
|
"eval_runtime": 4.1649, |
|
"eval_samples_per_second": 144.062, |
|
"eval_steps_per_second": 4.562, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 22.4, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 3.4029, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 22.93, |
|
"learning_rate": 2.3888888888888892e-05, |
|
"loss": 3.3377, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"eval_accuracy": 0.13666666666666666, |
|
"eval_loss": 4.484786510467529, |
|
"eval_runtime": 4.165, |
|
"eval_samples_per_second": 144.057, |
|
"eval_steps_per_second": 4.562, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 23.47, |
|
"learning_rate": 2.4444444444444445e-05, |
|
"loss": 3.1633, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 2.5e-05, |
|
"loss": 3.1119, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.15333333333333332, |
|
"eval_loss": 4.405167102813721, |
|
"eval_runtime": 4.1901, |
|
"eval_samples_per_second": 143.196, |
|
"eval_steps_per_second": 4.535, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"learning_rate": 2.5555555555555554e-05, |
|
"loss": 2.8686, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 24.96, |
|
"eval_accuracy": 0.15, |
|
"eval_loss": 4.37052583694458, |
|
"eval_runtime": 4.1543, |
|
"eval_samples_per_second": 144.427, |
|
"eval_steps_per_second": 4.574, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 25.07, |
|
"learning_rate": 2.6111111111111114e-05, |
|
"loss": 3.0138, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 25.6, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 2.7649, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 25.97, |
|
"eval_accuracy": 0.165, |
|
"eval_loss": 4.297973155975342, |
|
"eval_runtime": 4.1559, |
|
"eval_samples_per_second": 144.373, |
|
"eval_steps_per_second": 4.572, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 26.13, |
|
"learning_rate": 2.7222222222222223e-05, |
|
"loss": 2.6887, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 2.5698, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 26.99, |
|
"eval_accuracy": 0.17666666666666667, |
|
"eval_loss": 4.236337184906006, |
|
"eval_runtime": 4.1581, |
|
"eval_samples_per_second": 144.296, |
|
"eval_steps_per_second": 4.569, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 27.2, |
|
"learning_rate": 2.8333333333333335e-05, |
|
"loss": 2.5301, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 27.73, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 2.4344, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.17666666666666667, |
|
"eval_loss": 4.17328405380249, |
|
"eval_runtime": 4.1589, |
|
"eval_samples_per_second": 144.269, |
|
"eval_steps_per_second": 4.569, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 28.27, |
|
"learning_rate": 2.9444444444444448e-05, |
|
"loss": 2.2843, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 28.8, |
|
"learning_rate": 3e-05, |
|
"loss": 2.2186, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 28.96, |
|
"eval_accuracy": 0.17333333333333334, |
|
"eval_loss": 4.178333282470703, |
|
"eval_runtime": 4.1547, |
|
"eval_samples_per_second": 144.413, |
|
"eval_steps_per_second": 4.573, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 29.33, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 2.1025, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 29.87, |
|
"learning_rate": 3.111111111111111e-05, |
|
"loss": 2.0227, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 29.97, |
|
"eval_accuracy": 0.18, |
|
"eval_loss": 4.13058614730835, |
|
"eval_runtime": 4.1618, |
|
"eval_samples_per_second": 144.168, |
|
"eval_steps_per_second": 4.565, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 30.4, |
|
"learning_rate": 3.1666666666666666e-05, |
|
"loss": 1.8851, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 30.93, |
|
"learning_rate": 3.222222222222223e-05, |
|
"loss": 1.9153, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 30.99, |
|
"eval_accuracy": 0.175, |
|
"eval_loss": 4.094816207885742, |
|
"eval_runtime": 4.1706, |
|
"eval_samples_per_second": 143.864, |
|
"eval_steps_per_second": 4.556, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 31.47, |
|
"learning_rate": 3.277777777777778e-05, |
|
"loss": 1.7959, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.7363, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.17833333333333334, |
|
"eval_loss": 4.061172008514404, |
|
"eval_runtime": 4.1527, |
|
"eval_samples_per_second": 144.483, |
|
"eval_steps_per_second": 4.575, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 32.53, |
|
"learning_rate": 3.388888888888889e-05, |
|
"loss": 1.6171, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 32.96, |
|
"eval_accuracy": 0.185, |
|
"eval_loss": 4.020925521850586, |
|
"eval_runtime": 4.1794, |
|
"eval_samples_per_second": 143.562, |
|
"eval_steps_per_second": 4.546, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 33.07, |
|
"learning_rate": 3.444444444444445e-05, |
|
"loss": 1.5284, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 33.6, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.4865, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 33.97, |
|
"eval_accuracy": 0.185, |
|
"eval_loss": 4.019384860992432, |
|
"eval_runtime": 4.1555, |
|
"eval_samples_per_second": 144.386, |
|
"eval_steps_per_second": 4.572, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 34.13, |
|
"learning_rate": 3.555555555555556e-05, |
|
"loss": 1.4216, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 34.67, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 1.3194, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 34.99, |
|
"eval_accuracy": 0.205, |
|
"eval_loss": 3.988067626953125, |
|
"eval_runtime": 4.1907, |
|
"eval_samples_per_second": 143.174, |
|
"eval_steps_per_second": 4.534, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 35.2, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 1.3247, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 35.73, |
|
"learning_rate": 3.722222222222222e-05, |
|
"loss": 1.2811, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.215, |
|
"eval_loss": 3.9861950874328613, |
|
"eval_runtime": 4.2139, |
|
"eval_samples_per_second": 142.387, |
|
"eval_steps_per_second": 4.509, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 36.27, |
|
"learning_rate": 3.777777777777778e-05, |
|
"loss": 1.1968, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 36.8, |
|
"learning_rate": 3.8333333333333334e-05, |
|
"loss": 1.1703, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 36.96, |
|
"eval_accuracy": 0.20333333333333334, |
|
"eval_loss": 3.9904768466949463, |
|
"eval_runtime": 4.165, |
|
"eval_samples_per_second": 144.057, |
|
"eval_steps_per_second": 4.562, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 37.33, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 1.1136, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 37.87, |
|
"learning_rate": 3.944444444444445e-05, |
|
"loss": 1.114, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 37.97, |
|
"eval_accuracy": 0.21333333333333335, |
|
"eval_loss": 3.951385021209717, |
|
"eval_runtime": 4.1675, |
|
"eval_samples_per_second": 143.97, |
|
"eval_steps_per_second": 4.559, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 38.4, |
|
"learning_rate": 4e-05, |
|
"loss": 1.0194, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 38.93, |
|
"learning_rate": 4.055555555555556e-05, |
|
"loss": 0.9645, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 38.99, |
|
"eval_accuracy": 0.20666666666666667, |
|
"eval_loss": 3.9677815437316895, |
|
"eval_runtime": 4.1678, |
|
"eval_samples_per_second": 143.96, |
|
"eval_steps_per_second": 4.559, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 39.47, |
|
"learning_rate": 4.111111111111111e-05, |
|
"loss": 0.9037, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.8976, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.21666666666666667, |
|
"eval_loss": 3.987384080886841, |
|
"eval_runtime": 4.1794, |
|
"eval_samples_per_second": 143.56, |
|
"eval_steps_per_second": 4.546, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 40.53, |
|
"learning_rate": 4.222222222222222e-05, |
|
"loss": 0.8147, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 40.96, |
|
"eval_accuracy": 0.20833333333333334, |
|
"eval_loss": 3.925668954849243, |
|
"eval_runtime": 4.2286, |
|
"eval_samples_per_second": 141.892, |
|
"eval_steps_per_second": 4.493, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 41.07, |
|
"learning_rate": 4.277777777777778e-05, |
|
"loss": 0.8449, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 41.6, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.7239, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 41.97, |
|
"eval_accuracy": 0.22166666666666668, |
|
"eval_loss": 3.9394490718841553, |
|
"eval_runtime": 4.2239, |
|
"eval_samples_per_second": 142.05, |
|
"eval_steps_per_second": 4.498, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 42.13, |
|
"learning_rate": 4.388888888888889e-05, |
|
"loss": 0.7847, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 42.67, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.7732, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 42.99, |
|
"eval_accuracy": 0.215, |
|
"eval_loss": 3.9472830295562744, |
|
"eval_runtime": 4.1769, |
|
"eval_samples_per_second": 143.646, |
|
"eval_steps_per_second": 4.549, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 43.2, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.7159, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 43.73, |
|
"learning_rate": 4.555555555555556e-05, |
|
"loss": 0.7009, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.215, |
|
"eval_loss": 3.946096897125244, |
|
"eval_runtime": 4.1968, |
|
"eval_samples_per_second": 142.965, |
|
"eval_steps_per_second": 4.527, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 44.27, |
|
"learning_rate": 4.6111111111111115e-05, |
|
"loss": 0.663, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 44.8, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.5945, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 44.96, |
|
"eval_accuracy": 0.21333333333333335, |
|
"eval_loss": 4.0206685066223145, |
|
"eval_runtime": 4.207, |
|
"eval_samples_per_second": 142.62, |
|
"eval_steps_per_second": 4.516, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 45.33, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.5903, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 45.87, |
|
"learning_rate": 4.7777777777777784e-05, |
|
"loss": 0.555, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 45.97, |
|
"eval_accuracy": 0.20833333333333334, |
|
"eval_loss": 4.035262107849121, |
|
"eval_runtime": 4.1806, |
|
"eval_samples_per_second": 143.522, |
|
"eval_steps_per_second": 4.545, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 46.4, |
|
"learning_rate": 4.8333333333333334e-05, |
|
"loss": 0.5768, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 46.93, |
|
"learning_rate": 4.888888888888889e-05, |
|
"loss": 0.5241, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 46.99, |
|
"eval_accuracy": 0.21666666666666667, |
|
"eval_loss": 4.023153305053711, |
|
"eval_runtime": 4.1743, |
|
"eval_samples_per_second": 143.736, |
|
"eval_steps_per_second": 4.552, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 47.47, |
|
"learning_rate": 4.9444444444444446e-05, |
|
"loss": 0.5324, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4789, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.22, |
|
"eval_loss": 4.002644062042236, |
|
"eval_runtime": 4.1605, |
|
"eval_samples_per_second": 144.215, |
|
"eval_steps_per_second": 4.567, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 48.53, |
|
"learning_rate": 4.9938271604938276e-05, |
|
"loss": 0.4284, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 48.96, |
|
"eval_accuracy": 0.22, |
|
"eval_loss": 4.0031256675720215, |
|
"eval_runtime": 4.1788, |
|
"eval_samples_per_second": 143.582, |
|
"eval_steps_per_second": 4.547, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 49.07, |
|
"learning_rate": 4.987654320987655e-05, |
|
"loss": 0.5321, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 49.6, |
|
"learning_rate": 4.981481481481482e-05, |
|
"loss": 0.4701, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 49.97, |
|
"eval_accuracy": 0.215, |
|
"eval_loss": 4.057220935821533, |
|
"eval_runtime": 4.1731, |
|
"eval_samples_per_second": 143.779, |
|
"eval_steps_per_second": 4.553, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 50.13, |
|
"learning_rate": 4.9753086419753084e-05, |
|
"loss": 0.491, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 50.67, |
|
"learning_rate": 4.969135802469136e-05, |
|
"loss": 0.4501, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 50.99, |
|
"eval_accuracy": 0.215, |
|
"eval_loss": 4.087738513946533, |
|
"eval_runtime": 4.1876, |
|
"eval_samples_per_second": 143.279, |
|
"eval_steps_per_second": 4.537, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 51.2, |
|
"learning_rate": 4.962962962962963e-05, |
|
"loss": 0.4453, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 51.73, |
|
"learning_rate": 4.9567901234567905e-05, |
|
"loss": 0.3966, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.21666666666666667, |
|
"eval_loss": 4.020733833312988, |
|
"eval_runtime": 4.1529, |
|
"eval_samples_per_second": 144.478, |
|
"eval_steps_per_second": 4.575, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 52.27, |
|
"learning_rate": 4.950617283950618e-05, |
|
"loss": 0.4295, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 52.8, |
|
"learning_rate": 4.9444444444444446e-05, |
|
"loss": 0.3564, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 52.96, |
|
"eval_accuracy": 0.215, |
|
"eval_loss": 4.082664966583252, |
|
"eval_runtime": 4.1603, |
|
"eval_samples_per_second": 144.219, |
|
"eval_steps_per_second": 4.567, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 53.33, |
|
"learning_rate": 4.938271604938271e-05, |
|
"loss": 0.4013, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 53.87, |
|
"learning_rate": 4.932098765432099e-05, |
|
"loss": 0.3472, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 53.97, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.090172290802002, |
|
"eval_runtime": 4.1532, |
|
"eval_samples_per_second": 144.465, |
|
"eval_steps_per_second": 4.575, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 54.4, |
|
"learning_rate": 4.925925925925926e-05, |
|
"loss": 0.383, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 54.93, |
|
"learning_rate": 4.9197530864197535e-05, |
|
"loss": 0.3731, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 54.99, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.095271587371826, |
|
"eval_runtime": 4.2172, |
|
"eval_samples_per_second": 142.273, |
|
"eval_steps_per_second": 4.505, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 55.47, |
|
"learning_rate": 4.913580246913581e-05, |
|
"loss": 0.3237, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 4.9074074074074075e-05, |
|
"loss": 0.3161, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.20333333333333334, |
|
"eval_loss": 4.165963172912598, |
|
"eval_runtime": 4.2197, |
|
"eval_samples_per_second": 142.19, |
|
"eval_steps_per_second": 4.503, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 56.53, |
|
"learning_rate": 4.901234567901235e-05, |
|
"loss": 0.3352, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 56.96, |
|
"eval_accuracy": 0.22166666666666668, |
|
"eval_loss": 4.115335464477539, |
|
"eval_runtime": 4.1813, |
|
"eval_samples_per_second": 143.495, |
|
"eval_steps_per_second": 4.544, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 57.07, |
|
"learning_rate": 4.8950617283950616e-05, |
|
"loss": 0.3455, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 57.6, |
|
"learning_rate": 4.888888888888889e-05, |
|
"loss": 0.3317, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 57.97, |
|
"eval_accuracy": 0.21666666666666667, |
|
"eval_loss": 4.1095662117004395, |
|
"eval_runtime": 4.2026, |
|
"eval_samples_per_second": 142.768, |
|
"eval_steps_per_second": 4.521, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 58.13, |
|
"learning_rate": 4.8827160493827164e-05, |
|
"loss": 0.3594, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 58.67, |
|
"learning_rate": 4.876543209876544e-05, |
|
"loss": 0.294, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 58.99, |
|
"eval_accuracy": 0.215, |
|
"eval_loss": 4.185626029968262, |
|
"eval_runtime": 4.176, |
|
"eval_samples_per_second": 143.679, |
|
"eval_steps_per_second": 4.55, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 59.2, |
|
"learning_rate": 4.8703703703703704e-05, |
|
"loss": 0.3059, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 59.73, |
|
"learning_rate": 4.864197530864198e-05, |
|
"loss": 0.3299, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.22333333333333333, |
|
"eval_loss": 4.1475830078125, |
|
"eval_runtime": 4.1797, |
|
"eval_samples_per_second": 143.552, |
|
"eval_steps_per_second": 4.546, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 60.27, |
|
"learning_rate": 4.858024691358025e-05, |
|
"loss": 0.2965, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 60.8, |
|
"learning_rate": 4.851851851851852e-05, |
|
"loss": 0.2847, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 60.96, |
|
"eval_accuracy": 0.225, |
|
"eval_loss": 4.204588890075684, |
|
"eval_runtime": 4.1808, |
|
"eval_samples_per_second": 143.515, |
|
"eval_steps_per_second": 4.545, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 61.33, |
|
"learning_rate": 4.845679012345679e-05, |
|
"loss": 0.3176, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 61.87, |
|
"learning_rate": 4.8395061728395067e-05, |
|
"loss": 0.2924, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 61.97, |
|
"eval_accuracy": 0.21833333333333332, |
|
"eval_loss": 4.156820297241211, |
|
"eval_runtime": 4.1645, |
|
"eval_samples_per_second": 144.075, |
|
"eval_steps_per_second": 4.562, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 62.4, |
|
"learning_rate": 4.8333333333333334e-05, |
|
"loss": 0.255, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 62.93, |
|
"learning_rate": 4.827160493827161e-05, |
|
"loss": 0.2818, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 62.99, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.151925563812256, |
|
"eval_runtime": 4.175, |
|
"eval_samples_per_second": 143.711, |
|
"eval_steps_per_second": 4.551, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 63.47, |
|
"learning_rate": 4.820987654320988e-05, |
|
"loss": 0.2618, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 0.2698, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.215, |
|
"eval_loss": 4.227489471435547, |
|
"eval_runtime": 4.1793, |
|
"eval_samples_per_second": 143.565, |
|
"eval_steps_per_second": 4.546, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 64.53, |
|
"learning_rate": 4.808641975308642e-05, |
|
"loss": 0.2579, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 64.96, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.162615776062012, |
|
"eval_runtime": 4.1756, |
|
"eval_samples_per_second": 143.693, |
|
"eval_steps_per_second": 4.55, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 65.07, |
|
"learning_rate": 4.8024691358024696e-05, |
|
"loss": 0.2555, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 65.6, |
|
"learning_rate": 4.796296296296296e-05, |
|
"loss": 0.2597, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 65.97, |
|
"eval_accuracy": 0.22166666666666668, |
|
"eval_loss": 4.227728843688965, |
|
"eval_runtime": 4.1826, |
|
"eval_samples_per_second": 143.452, |
|
"eval_steps_per_second": 4.543, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 66.13, |
|
"learning_rate": 4.7901234567901237e-05, |
|
"loss": 0.1989, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 4.783950617283951e-05, |
|
"loss": 0.2443, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 66.99, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.192920207977295, |
|
"eval_runtime": 4.1844, |
|
"eval_samples_per_second": 143.39, |
|
"eval_steps_per_second": 4.541, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 67.2, |
|
"learning_rate": 4.7777777777777784e-05, |
|
"loss": 0.2686, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 67.73, |
|
"learning_rate": 4.771604938271605e-05, |
|
"loss": 0.2532, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.22333333333333333, |
|
"eval_loss": 4.277872085571289, |
|
"eval_runtime": 4.1836, |
|
"eval_samples_per_second": 143.417, |
|
"eval_steps_per_second": 4.542, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 68.27, |
|
"learning_rate": 4.7654320987654325e-05, |
|
"loss": 0.2647, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 68.8, |
|
"learning_rate": 4.759259259259259e-05, |
|
"loss": 0.2305, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 68.96, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.244070053100586, |
|
"eval_runtime": 4.1785, |
|
"eval_samples_per_second": 143.591, |
|
"eval_steps_per_second": 4.547, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 69.33, |
|
"learning_rate": 4.7530864197530866e-05, |
|
"loss": 0.2347, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 69.87, |
|
"learning_rate": 4.746913580246914e-05, |
|
"loss": 0.2423, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 69.97, |
|
"eval_accuracy": 0.22166666666666668, |
|
"eval_loss": 4.25825309753418, |
|
"eval_runtime": 4.1881, |
|
"eval_samples_per_second": 143.264, |
|
"eval_steps_per_second": 4.537, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 70.4, |
|
"learning_rate": 4.740740740740741e-05, |
|
"loss": 0.2167, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 70.93, |
|
"learning_rate": 4.734567901234569e-05, |
|
"loss": 0.222, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 70.99, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.293475151062012, |
|
"eval_runtime": 4.178, |
|
"eval_samples_per_second": 143.61, |
|
"eval_steps_per_second": 4.548, |
|
"step": 1331 |
|
}, |
|
{ |
|
"epoch": 71.47, |
|
"learning_rate": 4.7283950617283954e-05, |
|
"loss": 0.2781, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.2096, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.271422863006592, |
|
"eval_runtime": 4.1808, |
|
"eval_samples_per_second": 143.513, |
|
"eval_steps_per_second": 4.545, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 72.53, |
|
"learning_rate": 4.7160493827160495e-05, |
|
"loss": 0.1776, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 72.96, |
|
"eval_accuracy": 0.225, |
|
"eval_loss": 4.234805583953857, |
|
"eval_runtime": 4.1972, |
|
"eval_samples_per_second": 142.953, |
|
"eval_steps_per_second": 4.527, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 73.07, |
|
"learning_rate": 4.709876543209877e-05, |
|
"loss": 0.1977, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 73.6, |
|
"learning_rate": 4.703703703703704e-05, |
|
"loss": 0.2009, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 73.97, |
|
"eval_accuracy": 0.22833333333333333, |
|
"eval_loss": 4.292957305908203, |
|
"eval_runtime": 4.1773, |
|
"eval_samples_per_second": 143.633, |
|
"eval_steps_per_second": 4.548, |
|
"step": 1387 |
|
}, |
|
{ |
|
"epoch": 74.13, |
|
"learning_rate": 4.6975308641975316e-05, |
|
"loss": 0.2292, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 74.67, |
|
"learning_rate": 4.691358024691358e-05, |
|
"loss": 0.2087, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 74.99, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.307061672210693, |
|
"eval_runtime": 4.182, |
|
"eval_samples_per_second": 143.472, |
|
"eval_steps_per_second": 4.543, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 75.2, |
|
"learning_rate": 4.685185185185185e-05, |
|
"loss": 0.2235, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 75.73, |
|
"learning_rate": 4.6790123456790124e-05, |
|
"loss": 0.1818, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.296043872833252, |
|
"eval_runtime": 4.1959, |
|
"eval_samples_per_second": 142.996, |
|
"eval_steps_per_second": 4.528, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 76.27, |
|
"learning_rate": 4.67283950617284e-05, |
|
"loss": 0.2019, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 76.8, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.2236, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 76.96, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.290974140167236, |
|
"eval_runtime": 4.1784, |
|
"eval_samples_per_second": 143.596, |
|
"eval_steps_per_second": 4.547, |
|
"step": 1443 |
|
}, |
|
{ |
|
"epoch": 77.33, |
|
"learning_rate": 4.6604938271604945e-05, |
|
"loss": 0.2008, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 77.87, |
|
"learning_rate": 4.654320987654321e-05, |
|
"loss": 0.1802, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 77.97, |
|
"eval_accuracy": 0.25, |
|
"eval_loss": 4.289625644683838, |
|
"eval_runtime": 4.224, |
|
"eval_samples_per_second": 142.044, |
|
"eval_steps_per_second": 4.498, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 78.4, |
|
"learning_rate": 4.648148148148148e-05, |
|
"loss": 0.2296, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 78.93, |
|
"learning_rate": 4.641975308641975e-05, |
|
"loss": 0.2037, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 78.99, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.331364154815674, |
|
"eval_runtime": 4.2445, |
|
"eval_samples_per_second": 141.359, |
|
"eval_steps_per_second": 4.476, |
|
"step": 1481 |
|
}, |
|
{ |
|
"epoch": 79.47, |
|
"learning_rate": 4.635802469135803e-05, |
|
"loss": 0.1697, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.1912, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.261221408843994, |
|
"eval_runtime": 4.1986, |
|
"eval_samples_per_second": 142.905, |
|
"eval_steps_per_second": 4.525, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 80.53, |
|
"learning_rate": 4.623456790123457e-05, |
|
"loss": 0.2305, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 80.96, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.279019832611084, |
|
"eval_runtime": 4.1994, |
|
"eval_samples_per_second": 142.879, |
|
"eval_steps_per_second": 4.524, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 81.07, |
|
"learning_rate": 4.617283950617284e-05, |
|
"loss": 0.1808, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 81.6, |
|
"learning_rate": 4.6111111111111115e-05, |
|
"loss": 0.2188, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 81.97, |
|
"eval_accuracy": 0.22166666666666668, |
|
"eval_loss": 4.306877613067627, |
|
"eval_runtime": 4.1818, |
|
"eval_samples_per_second": 143.477, |
|
"eval_steps_per_second": 4.543, |
|
"step": 1537 |
|
}, |
|
{ |
|
"epoch": 82.13, |
|
"learning_rate": 4.604938271604938e-05, |
|
"loss": 0.2251, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 82.67, |
|
"learning_rate": 4.5987654320987656e-05, |
|
"loss": 0.1639, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 82.99, |
|
"eval_accuracy": 0.21833333333333332, |
|
"eval_loss": 4.353877067565918, |
|
"eval_runtime": 4.2313, |
|
"eval_samples_per_second": 141.801, |
|
"eval_steps_per_second": 4.49, |
|
"step": 1556 |
|
}, |
|
{ |
|
"epoch": 83.2, |
|
"learning_rate": 4.592592592592593e-05, |
|
"loss": 0.1407, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 83.73, |
|
"learning_rate": 4.58641975308642e-05, |
|
"loss": 0.1741, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.225, |
|
"eval_loss": 4.321075916290283, |
|
"eval_runtime": 4.2392, |
|
"eval_samples_per_second": 141.536, |
|
"eval_steps_per_second": 4.482, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 84.27, |
|
"learning_rate": 4.580246913580247e-05, |
|
"loss": 0.1926, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 84.8, |
|
"learning_rate": 4.5740740740740745e-05, |
|
"loss": 0.1937, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 84.96, |
|
"eval_accuracy": 0.21166666666666667, |
|
"eval_loss": 4.357635498046875, |
|
"eval_runtime": 4.1967, |
|
"eval_samples_per_second": 142.97, |
|
"eval_steps_per_second": 4.527, |
|
"step": 1593 |
|
}, |
|
{ |
|
"epoch": 85.33, |
|
"learning_rate": 4.567901234567901e-05, |
|
"loss": 0.1932, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 85.87, |
|
"learning_rate": 4.5617283950617285e-05, |
|
"loss": 0.1712, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 85.97, |
|
"eval_accuracy": 0.22333333333333333, |
|
"eval_loss": 4.3434247970581055, |
|
"eval_runtime": 4.2166, |
|
"eval_samples_per_second": 142.294, |
|
"eval_steps_per_second": 4.506, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 86.4, |
|
"learning_rate": 4.555555555555556e-05, |
|
"loss": 0.1574, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 86.93, |
|
"learning_rate": 4.5493827160493826e-05, |
|
"loss": 0.1665, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 86.99, |
|
"eval_accuracy": 0.21166666666666667, |
|
"eval_loss": 4.334897041320801, |
|
"eval_runtime": 4.1946, |
|
"eval_samples_per_second": 143.042, |
|
"eval_steps_per_second": 4.53, |
|
"step": 1631 |
|
}, |
|
{ |
|
"epoch": 87.47, |
|
"learning_rate": 4.54320987654321e-05, |
|
"loss": 0.1565, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"learning_rate": 4.5370370370370374e-05, |
|
"loss": 0.1846, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.417025566101074, |
|
"eval_runtime": 4.202, |
|
"eval_samples_per_second": 142.79, |
|
"eval_steps_per_second": 4.522, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 88.53, |
|
"learning_rate": 4.530864197530865e-05, |
|
"loss": 0.1827, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 88.96, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.335045337677002, |
|
"eval_runtime": 4.1833, |
|
"eval_samples_per_second": 143.428, |
|
"eval_steps_per_second": 4.542, |
|
"step": 1668 |
|
}, |
|
{ |
|
"epoch": 89.07, |
|
"learning_rate": 4.5246913580246914e-05, |
|
"loss": 0.1504, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 89.6, |
|
"learning_rate": 4.518518518518519e-05, |
|
"loss": 0.1591, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 89.97, |
|
"eval_accuracy": 0.215, |
|
"eval_loss": 4.339655876159668, |
|
"eval_runtime": 4.1906, |
|
"eval_samples_per_second": 143.178, |
|
"eval_steps_per_second": 4.534, |
|
"step": 1687 |
|
}, |
|
{ |
|
"epoch": 90.13, |
|
"learning_rate": 4.5123456790123455e-05, |
|
"loss": 0.1328, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 90.67, |
|
"learning_rate": 4.506172839506173e-05, |
|
"loss": 0.1508, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 90.99, |
|
"eval_accuracy": 0.23166666666666666, |
|
"eval_loss": 4.3272809982299805, |
|
"eval_runtime": 4.1836, |
|
"eval_samples_per_second": 143.418, |
|
"eval_steps_per_second": 4.542, |
|
"step": 1706 |
|
}, |
|
{ |
|
"epoch": 91.2, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.1493, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 91.73, |
|
"learning_rate": 4.493827160493828e-05, |
|
"loss": 0.1808, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.23166666666666666, |
|
"eval_loss": 4.3314714431762695, |
|
"eval_runtime": 4.1951, |
|
"eval_samples_per_second": 143.024, |
|
"eval_steps_per_second": 4.529, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 92.27, |
|
"learning_rate": 4.4876543209876544e-05, |
|
"loss": 0.1551, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 92.8, |
|
"learning_rate": 4.481481481481482e-05, |
|
"loss": 0.17, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 92.96, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.275998592376709, |
|
"eval_runtime": 4.1877, |
|
"eval_samples_per_second": 143.278, |
|
"eval_steps_per_second": 4.537, |
|
"step": 1743 |
|
}, |
|
{ |
|
"epoch": 93.33, |
|
"learning_rate": 4.4753086419753084e-05, |
|
"loss": 0.1676, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 93.87, |
|
"learning_rate": 4.469135802469136e-05, |
|
"loss": 0.14, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 93.97, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.314431190490723, |
|
"eval_runtime": 4.222, |
|
"eval_samples_per_second": 142.112, |
|
"eval_steps_per_second": 4.5, |
|
"step": 1762 |
|
}, |
|
{ |
|
"epoch": 94.4, |
|
"learning_rate": 4.462962962962963e-05, |
|
"loss": 0.1526, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 94.93, |
|
"learning_rate": 4.4567901234567906e-05, |
|
"loss": 0.1734, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 94.99, |
|
"eval_accuracy": 0.22833333333333333, |
|
"eval_loss": 4.366744041442871, |
|
"eval_runtime": 4.2453, |
|
"eval_samples_per_second": 141.334, |
|
"eval_steps_per_second": 4.476, |
|
"step": 1781 |
|
}, |
|
{ |
|
"epoch": 95.47, |
|
"learning_rate": 4.450617283950618e-05, |
|
"loss": 0.1472, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.1593, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.225, |
|
"eval_loss": 4.390308380126953, |
|
"eval_runtime": 4.2118, |
|
"eval_samples_per_second": 142.456, |
|
"eval_steps_per_second": 4.511, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 96.53, |
|
"learning_rate": 4.4382716049382714e-05, |
|
"loss": 0.1523, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 96.96, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.331397533416748, |
|
"eval_runtime": 4.1925, |
|
"eval_samples_per_second": 143.113, |
|
"eval_steps_per_second": 4.532, |
|
"step": 1818 |
|
}, |
|
{ |
|
"epoch": 97.07, |
|
"learning_rate": 4.432098765432099e-05, |
|
"loss": 0.1665, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 97.6, |
|
"learning_rate": 4.425925925925926e-05, |
|
"loss": 0.1599, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 97.97, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.411539077758789, |
|
"eval_runtime": 4.1869, |
|
"eval_samples_per_second": 143.306, |
|
"eval_steps_per_second": 4.538, |
|
"step": 1837 |
|
}, |
|
{ |
|
"epoch": 98.13, |
|
"learning_rate": 4.4197530864197535e-05, |
|
"loss": 0.1319, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 98.67, |
|
"learning_rate": 4.413580246913581e-05, |
|
"loss": 0.1352, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 98.99, |
|
"eval_accuracy": 0.24666666666666667, |
|
"eval_loss": 4.362613201141357, |
|
"eval_runtime": 4.181, |
|
"eval_samples_per_second": 143.506, |
|
"eval_steps_per_second": 4.544, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 99.2, |
|
"learning_rate": 4.4074074074074076e-05, |
|
"loss": 0.1625, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 99.73, |
|
"learning_rate": 4.401234567901234e-05, |
|
"loss": 0.1406, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.355536937713623, |
|
"eval_runtime": 4.2021, |
|
"eval_samples_per_second": 142.787, |
|
"eval_steps_per_second": 4.522, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 100.27, |
|
"learning_rate": 4.3950617283950617e-05, |
|
"loss": 0.1404, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 100.8, |
|
"learning_rate": 4.388888888888889e-05, |
|
"loss": 0.1486, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 100.96, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.311557769775391, |
|
"eval_runtime": 4.1983, |
|
"eval_samples_per_second": 142.914, |
|
"eval_steps_per_second": 4.526, |
|
"step": 1893 |
|
}, |
|
{ |
|
"epoch": 101.33, |
|
"learning_rate": 4.3827160493827164e-05, |
|
"loss": 0.1458, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 101.87, |
|
"learning_rate": 4.376543209876544e-05, |
|
"loss": 0.149, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 101.97, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.389366149902344, |
|
"eval_runtime": 4.2353, |
|
"eval_samples_per_second": 141.666, |
|
"eval_steps_per_second": 4.486, |
|
"step": 1912 |
|
}, |
|
{ |
|
"epoch": 102.4, |
|
"learning_rate": 4.3703703703703705e-05, |
|
"loss": 0.1317, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 102.93, |
|
"learning_rate": 4.364197530864197e-05, |
|
"loss": 0.115, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 102.99, |
|
"eval_accuracy": 0.22333333333333333, |
|
"eval_loss": 4.3754754066467285, |
|
"eval_runtime": 4.2516, |
|
"eval_samples_per_second": 141.122, |
|
"eval_steps_per_second": 4.469, |
|
"step": 1931 |
|
}, |
|
{ |
|
"epoch": 103.47, |
|
"learning_rate": 4.3580246913580246e-05, |
|
"loss": 0.1476, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"learning_rate": 4.351851851851852e-05, |
|
"loss": 0.1301, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.23166666666666666, |
|
"eval_loss": 4.376509189605713, |
|
"eval_runtime": 4.2304, |
|
"eval_samples_per_second": 141.83, |
|
"eval_steps_per_second": 4.491, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 104.53, |
|
"learning_rate": 4.345679012345679e-05, |
|
"loss": 0.1429, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 104.96, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.402740955352783, |
|
"eval_runtime": 4.1986, |
|
"eval_samples_per_second": 142.905, |
|
"eval_steps_per_second": 4.525, |
|
"step": 1968 |
|
}, |
|
{ |
|
"epoch": 105.07, |
|
"learning_rate": 4.339506172839507e-05, |
|
"loss": 0.123, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 105.6, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.1209, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 105.97, |
|
"eval_accuracy": 0.23166666666666666, |
|
"eval_loss": 4.380291938781738, |
|
"eval_runtime": 4.2686, |
|
"eval_samples_per_second": 140.56, |
|
"eval_steps_per_second": 4.451, |
|
"step": 1987 |
|
}, |
|
{ |
|
"epoch": 106.13, |
|
"learning_rate": 4.327160493827161e-05, |
|
"loss": 0.1189, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 106.67, |
|
"learning_rate": 4.3209876543209875e-05, |
|
"loss": 0.1287, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 106.99, |
|
"eval_accuracy": 0.24666666666666667, |
|
"eval_loss": 4.323451995849609, |
|
"eval_runtime": 5.6461, |
|
"eval_samples_per_second": 106.268, |
|
"eval_steps_per_second": 3.365, |
|
"step": 2006 |
|
}, |
|
{ |
|
"epoch": 107.2, |
|
"learning_rate": 4.314814814814815e-05, |
|
"loss": 0.1318, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 107.73, |
|
"learning_rate": 4.308641975308642e-05, |
|
"loss": 0.1318, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.348374843597412, |
|
"eval_runtime": 4.2065, |
|
"eval_samples_per_second": 142.635, |
|
"eval_steps_per_second": 4.517, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 108.27, |
|
"learning_rate": 4.3024691358024696e-05, |
|
"loss": 0.1328, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 108.8, |
|
"learning_rate": 4.296296296296296e-05, |
|
"loss": 0.1136, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 108.96, |
|
"eval_accuracy": 0.225, |
|
"eval_loss": 4.39766263961792, |
|
"eval_runtime": 4.2019, |
|
"eval_samples_per_second": 142.792, |
|
"eval_steps_per_second": 4.522, |
|
"step": 2043 |
|
}, |
|
{ |
|
"epoch": 109.33, |
|
"learning_rate": 4.290123456790124e-05, |
|
"loss": 0.1218, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 109.87, |
|
"learning_rate": 4.283950617283951e-05, |
|
"loss": 0.1326, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 109.97, |
|
"eval_accuracy": 0.22666666666666666, |
|
"eval_loss": 4.397821426391602, |
|
"eval_runtime": 4.1836, |
|
"eval_samples_per_second": 143.416, |
|
"eval_steps_per_second": 4.542, |
|
"step": 2062 |
|
}, |
|
{ |
|
"epoch": 110.4, |
|
"learning_rate": 4.277777777777778e-05, |
|
"loss": 0.1223, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 110.93, |
|
"learning_rate": 4.271604938271605e-05, |
|
"loss": 0.1415, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 110.99, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.321408748626709, |
|
"eval_runtime": 4.1873, |
|
"eval_samples_per_second": 143.29, |
|
"eval_steps_per_second": 4.538, |
|
"step": 2081 |
|
}, |
|
{ |
|
"epoch": 111.47, |
|
"learning_rate": 4.2654320987654325e-05, |
|
"loss": 0.1281, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 0.1229, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_accuracy": 0.24666666666666667, |
|
"eval_loss": 4.369870662689209, |
|
"eval_runtime": 4.2352, |
|
"eval_samples_per_second": 141.67, |
|
"eval_steps_per_second": 4.486, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 112.53, |
|
"learning_rate": 4.2530864197530866e-05, |
|
"loss": 0.1004, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 112.96, |
|
"eval_accuracy": 0.25833333333333336, |
|
"eval_loss": 4.382783889770508, |
|
"eval_runtime": 4.2396, |
|
"eval_samples_per_second": 141.524, |
|
"eval_steps_per_second": 4.482, |
|
"step": 2118 |
|
}, |
|
{ |
|
"epoch": 113.07, |
|
"learning_rate": 4.246913580246914e-05, |
|
"loss": 0.1238, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 113.6, |
|
"learning_rate": 4.240740740740741e-05, |
|
"loss": 0.0961, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 113.97, |
|
"eval_accuracy": 0.25166666666666665, |
|
"eval_loss": 4.356354713439941, |
|
"eval_runtime": 4.2272, |
|
"eval_samples_per_second": 141.937, |
|
"eval_steps_per_second": 4.495, |
|
"step": 2137 |
|
}, |
|
{ |
|
"epoch": 114.13, |
|
"learning_rate": 4.234567901234568e-05, |
|
"loss": 0.1181, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 114.67, |
|
"learning_rate": 4.2283950617283955e-05, |
|
"loss": 0.1132, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 114.99, |
|
"eval_accuracy": 0.25333333333333335, |
|
"eval_loss": 4.338386058807373, |
|
"eval_runtime": 4.2183, |
|
"eval_samples_per_second": 142.239, |
|
"eval_steps_per_second": 4.504, |
|
"step": 2156 |
|
}, |
|
{ |
|
"epoch": 115.2, |
|
"learning_rate": 4.222222222222222e-05, |
|
"loss": 0.1232, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 115.73, |
|
"learning_rate": 4.2160493827160495e-05, |
|
"loss": 0.1166, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.415248394012451, |
|
"eval_runtime": 4.233, |
|
"eval_samples_per_second": 141.743, |
|
"eval_steps_per_second": 4.489, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 116.27, |
|
"learning_rate": 4.209876543209877e-05, |
|
"loss": 0.129, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 116.8, |
|
"learning_rate": 4.203703703703704e-05, |
|
"loss": 0.1193, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 116.96, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.363390922546387, |
|
"eval_runtime": 4.2449, |
|
"eval_samples_per_second": 141.345, |
|
"eval_steps_per_second": 4.476, |
|
"step": 2193 |
|
}, |
|
{ |
|
"epoch": 117.33, |
|
"learning_rate": 4.197530864197531e-05, |
|
"loss": 0.1348, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 117.87, |
|
"learning_rate": 4.1913580246913584e-05, |
|
"loss": 0.096, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 117.97, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.382623672485352, |
|
"eval_runtime": 4.236, |
|
"eval_samples_per_second": 141.643, |
|
"eval_steps_per_second": 4.485, |
|
"step": 2212 |
|
}, |
|
{ |
|
"epoch": 118.4, |
|
"learning_rate": 4.185185185185185e-05, |
|
"loss": 0.1375, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 118.93, |
|
"learning_rate": 4.1790123456790124e-05, |
|
"loss": 0.1158, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 118.99, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.452427387237549, |
|
"eval_runtime": 4.1862, |
|
"eval_samples_per_second": 143.327, |
|
"eval_steps_per_second": 4.539, |
|
"step": 2231 |
|
}, |
|
{ |
|
"epoch": 119.47, |
|
"learning_rate": 4.17283950617284e-05, |
|
"loss": 0.0863, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.099, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_accuracy": 0.22333333333333333, |
|
"eval_loss": 4.497795581817627, |
|
"eval_runtime": 4.2433, |
|
"eval_samples_per_second": 141.399, |
|
"eval_steps_per_second": 4.478, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 120.53, |
|
"learning_rate": 4.1604938271604946e-05, |
|
"loss": 0.1065, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 120.96, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.412367343902588, |
|
"eval_runtime": 4.2487, |
|
"eval_samples_per_second": 141.22, |
|
"eval_steps_per_second": 4.472, |
|
"step": 2268 |
|
}, |
|
{ |
|
"epoch": 121.07, |
|
"learning_rate": 4.154320987654321e-05, |
|
"loss": 0.1202, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 121.6, |
|
"learning_rate": 4.148148148148148e-05, |
|
"loss": 0.129, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 121.97, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.381356239318848, |
|
"eval_runtime": 4.2226, |
|
"eval_samples_per_second": 142.092, |
|
"eval_steps_per_second": 4.5, |
|
"step": 2287 |
|
}, |
|
{ |
|
"epoch": 122.13, |
|
"learning_rate": 4.1419753086419754e-05, |
|
"loss": 0.1152, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 122.67, |
|
"learning_rate": 4.135802469135803e-05, |
|
"loss": 0.1047, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 122.99, |
|
"eval_accuracy": 0.24666666666666667, |
|
"eval_loss": 4.366286754608154, |
|
"eval_runtime": 4.1923, |
|
"eval_samples_per_second": 143.119, |
|
"eval_steps_per_second": 4.532, |
|
"step": 2306 |
|
}, |
|
{ |
|
"epoch": 123.2, |
|
"learning_rate": 4.12962962962963e-05, |
|
"loss": 0.1075, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 123.73, |
|
"learning_rate": 4.1234567901234575e-05, |
|
"loss": 0.101, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.511256694793701, |
|
"eval_runtime": 4.2032, |
|
"eval_samples_per_second": 142.748, |
|
"eval_steps_per_second": 4.52, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 124.27, |
|
"learning_rate": 4.117283950617284e-05, |
|
"loss": 0.1111, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 124.8, |
|
"learning_rate": 4.111111111111111e-05, |
|
"loss": 0.1076, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 124.96, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.455278396606445, |
|
"eval_runtime": 4.1874, |
|
"eval_samples_per_second": 143.286, |
|
"eval_steps_per_second": 4.537, |
|
"step": 2343 |
|
}, |
|
{ |
|
"epoch": 125.33, |
|
"learning_rate": 4.104938271604938e-05, |
|
"loss": 0.112, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 125.87, |
|
"learning_rate": 4.0987654320987657e-05, |
|
"loss": 0.1135, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 125.97, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.435062885284424, |
|
"eval_runtime": 4.2228, |
|
"eval_samples_per_second": 142.087, |
|
"eval_steps_per_second": 4.499, |
|
"step": 2362 |
|
}, |
|
{ |
|
"epoch": 126.4, |
|
"learning_rate": 4.092592592592593e-05, |
|
"loss": 0.0839, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 126.93, |
|
"learning_rate": 4.0864197530864204e-05, |
|
"loss": 0.1066, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 126.99, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.4874396324157715, |
|
"eval_runtime": 4.1945, |
|
"eval_samples_per_second": 143.044, |
|
"eval_steps_per_second": 4.53, |
|
"step": 2381 |
|
}, |
|
{ |
|
"epoch": 127.47, |
|
"learning_rate": 4.080246913580247e-05, |
|
"loss": 0.1007, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.1256, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.463526725769043, |
|
"eval_runtime": 4.2437, |
|
"eval_samples_per_second": 141.387, |
|
"eval_steps_per_second": 4.477, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 128.53, |
|
"learning_rate": 4.067901234567901e-05, |
|
"loss": 0.0932, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 128.96, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.457594394683838, |
|
"eval_runtime": 4.2373, |
|
"eval_samples_per_second": 141.599, |
|
"eval_steps_per_second": 4.484, |
|
"step": 2418 |
|
}, |
|
{ |
|
"epoch": 129.07, |
|
"learning_rate": 4.0617283950617286e-05, |
|
"loss": 0.1158, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 129.6, |
|
"learning_rate": 4.055555555555556e-05, |
|
"loss": 0.1189, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 129.97, |
|
"eval_accuracy": 0.22666666666666666, |
|
"eval_loss": 4.577010154724121, |
|
"eval_runtime": 4.2164, |
|
"eval_samples_per_second": 142.301, |
|
"eval_steps_per_second": 4.506, |
|
"step": 2437 |
|
}, |
|
{ |
|
"epoch": 130.13, |
|
"learning_rate": 4.049382716049383e-05, |
|
"loss": 0.116, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 130.67, |
|
"learning_rate": 4.04320987654321e-05, |
|
"loss": 0.1096, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 130.99, |
|
"eval_accuracy": 0.23166666666666666, |
|
"eval_loss": 4.492093563079834, |
|
"eval_runtime": 4.2174, |
|
"eval_samples_per_second": 142.267, |
|
"eval_steps_per_second": 4.505, |
|
"step": 2456 |
|
}, |
|
{ |
|
"epoch": 131.2, |
|
"learning_rate": 4.0370370370370374e-05, |
|
"loss": 0.1004, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 131.73, |
|
"learning_rate": 4.030864197530864e-05, |
|
"loss": 0.0791, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_accuracy": 0.22666666666666666, |
|
"eval_loss": 4.508973598480225, |
|
"eval_runtime": 4.207, |
|
"eval_samples_per_second": 142.618, |
|
"eval_steps_per_second": 4.516, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 132.27, |
|
"learning_rate": 4.0246913580246915e-05, |
|
"loss": 0.0935, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 132.8, |
|
"learning_rate": 4.018518518518519e-05, |
|
"loss": 0.1152, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 132.96, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.457157611846924, |
|
"eval_runtime": 4.1941, |
|
"eval_samples_per_second": 143.058, |
|
"eval_steps_per_second": 4.53, |
|
"step": 2493 |
|
}, |
|
{ |
|
"epoch": 133.33, |
|
"learning_rate": 4.012345679012346e-05, |
|
"loss": 0.091, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 133.87, |
|
"learning_rate": 4.006172839506173e-05, |
|
"loss": 0.1264, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 133.97, |
|
"eval_accuracy": 0.25, |
|
"eval_loss": 4.510921001434326, |
|
"eval_runtime": 4.2057, |
|
"eval_samples_per_second": 142.663, |
|
"eval_steps_per_second": 4.518, |
|
"step": 2512 |
|
}, |
|
{ |
|
"epoch": 134.4, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0749, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 134.93, |
|
"learning_rate": 3.993827160493827e-05, |
|
"loss": 0.1009, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 134.99, |
|
"eval_accuracy": 0.22833333333333333, |
|
"eval_loss": 4.523574352264404, |
|
"eval_runtime": 4.1971, |
|
"eval_samples_per_second": 142.957, |
|
"eval_steps_per_second": 4.527, |
|
"step": 2531 |
|
}, |
|
{ |
|
"epoch": 135.47, |
|
"learning_rate": 3.9876543209876544e-05, |
|
"loss": 0.0964, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"learning_rate": 3.981481481481482e-05, |
|
"loss": 0.0956, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.478328227996826, |
|
"eval_runtime": 4.1949, |
|
"eval_samples_per_second": 143.029, |
|
"eval_steps_per_second": 4.529, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 136.53, |
|
"learning_rate": 3.975308641975309e-05, |
|
"loss": 0.0919, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 136.96, |
|
"eval_accuracy": 0.24666666666666667, |
|
"eval_loss": 4.5484466552734375, |
|
"eval_runtime": 4.2364, |
|
"eval_samples_per_second": 141.629, |
|
"eval_steps_per_second": 4.485, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 137.07, |
|
"learning_rate": 3.969135802469136e-05, |
|
"loss": 0.1154, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 137.6, |
|
"learning_rate": 3.962962962962963e-05, |
|
"loss": 0.1042, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 137.97, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.542301654815674, |
|
"eval_runtime": 4.2535, |
|
"eval_samples_per_second": 141.061, |
|
"eval_steps_per_second": 4.467, |
|
"step": 2587 |
|
}, |
|
{ |
|
"epoch": 138.13, |
|
"learning_rate": 3.9567901234567906e-05, |
|
"loss": 0.0859, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 138.67, |
|
"learning_rate": 3.950617283950617e-05, |
|
"loss": 0.1039, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 138.99, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.491814136505127, |
|
"eval_runtime": 4.1944, |
|
"eval_samples_per_second": 143.049, |
|
"eval_steps_per_second": 4.53, |
|
"step": 2606 |
|
}, |
|
{ |
|
"epoch": 139.2, |
|
"learning_rate": 3.944444444444445e-05, |
|
"loss": 0.0846, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 139.73, |
|
"learning_rate": 3.938271604938272e-05, |
|
"loss": 0.094, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_accuracy": 0.24666666666666667, |
|
"eval_loss": 4.545647144317627, |
|
"eval_runtime": 4.1844, |
|
"eval_samples_per_second": 143.39, |
|
"eval_steps_per_second": 4.541, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 140.27, |
|
"learning_rate": 3.932098765432099e-05, |
|
"loss": 0.0929, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 140.8, |
|
"learning_rate": 3.925925925925926e-05, |
|
"loss": 0.1056, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 140.96, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.521935939788818, |
|
"eval_runtime": 4.1914, |
|
"eval_samples_per_second": 143.151, |
|
"eval_steps_per_second": 4.533, |
|
"step": 2643 |
|
}, |
|
{ |
|
"epoch": 141.33, |
|
"learning_rate": 3.9197530864197535e-05, |
|
"loss": 0.1025, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 141.87, |
|
"learning_rate": 3.91358024691358e-05, |
|
"loss": 0.0918, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 141.97, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.52545166015625, |
|
"eval_runtime": 4.2013, |
|
"eval_samples_per_second": 142.812, |
|
"eval_steps_per_second": 4.522, |
|
"step": 2662 |
|
}, |
|
{ |
|
"epoch": 142.4, |
|
"learning_rate": 3.9074074074074076e-05, |
|
"loss": 0.098, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 142.93, |
|
"learning_rate": 3.901234567901234e-05, |
|
"loss": 0.0877, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 142.99, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.492305278778076, |
|
"eval_runtime": 4.1813, |
|
"eval_samples_per_second": 143.497, |
|
"eval_steps_per_second": 4.544, |
|
"step": 2681 |
|
}, |
|
{ |
|
"epoch": 143.47, |
|
"learning_rate": 3.895061728395062e-05, |
|
"loss": 0.0936, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.105, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.535154819488525, |
|
"eval_runtime": 4.1912, |
|
"eval_samples_per_second": 143.158, |
|
"eval_steps_per_second": 4.533, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 144.53, |
|
"learning_rate": 3.8827160493827165e-05, |
|
"loss": 0.0892, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 144.96, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.471460819244385, |
|
"eval_runtime": 4.1911, |
|
"eval_samples_per_second": 143.161, |
|
"eval_steps_per_second": 4.533, |
|
"step": 2718 |
|
}, |
|
{ |
|
"epoch": 145.07, |
|
"learning_rate": 3.876543209876544e-05, |
|
"loss": 0.1041, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 145.6, |
|
"learning_rate": 3.8703703703703705e-05, |
|
"loss": 0.0963, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 145.97, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.506024360656738, |
|
"eval_runtime": 4.1827, |
|
"eval_samples_per_second": 143.449, |
|
"eval_steps_per_second": 4.543, |
|
"step": 2737 |
|
}, |
|
{ |
|
"epoch": 146.13, |
|
"learning_rate": 3.864197530864197e-05, |
|
"loss": 0.089, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 146.67, |
|
"learning_rate": 3.8580246913580246e-05, |
|
"loss": 0.095, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 146.99, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.559337615966797, |
|
"eval_runtime": 4.1884, |
|
"eval_samples_per_second": 143.254, |
|
"eval_steps_per_second": 4.536, |
|
"step": 2756 |
|
}, |
|
{ |
|
"epoch": 147.2, |
|
"learning_rate": 3.851851851851852e-05, |
|
"loss": 0.0978, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 147.73, |
|
"learning_rate": 3.8456790123456794e-05, |
|
"loss": 0.0997, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.580421447753906, |
|
"eval_runtime": 4.2171, |
|
"eval_samples_per_second": 142.278, |
|
"eval_steps_per_second": 4.505, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 148.27, |
|
"learning_rate": 3.839506172839507e-05, |
|
"loss": 0.0689, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 148.8, |
|
"learning_rate": 3.8333333333333334e-05, |
|
"loss": 0.0839, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 148.96, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.59170389175415, |
|
"eval_runtime": 4.2419, |
|
"eval_samples_per_second": 141.446, |
|
"eval_steps_per_second": 4.479, |
|
"step": 2793 |
|
}, |
|
{ |
|
"epoch": 149.33, |
|
"learning_rate": 3.82716049382716e-05, |
|
"loss": 0.1028, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 149.87, |
|
"learning_rate": 3.8209876543209875e-05, |
|
"loss": 0.0924, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 149.97, |
|
"eval_accuracy": 0.22666666666666666, |
|
"eval_loss": 4.593113422393799, |
|
"eval_runtime": 4.2519, |
|
"eval_samples_per_second": 141.113, |
|
"eval_steps_per_second": 4.469, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 150.4, |
|
"learning_rate": 3.814814814814815e-05, |
|
"loss": 0.0694, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 150.93, |
|
"learning_rate": 3.808641975308642e-05, |
|
"loss": 0.0781, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 150.99, |
|
"eval_accuracy": 0.23166666666666666, |
|
"eval_loss": 4.578421115875244, |
|
"eval_runtime": 4.2293, |
|
"eval_samples_per_second": 141.867, |
|
"eval_steps_per_second": 4.492, |
|
"step": 2831 |
|
}, |
|
{ |
|
"epoch": 151.47, |
|
"learning_rate": 3.80246913580247e-05, |
|
"loss": 0.1092, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"learning_rate": 3.7962962962962964e-05, |
|
"loss": 0.0986, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_accuracy": 0.22833333333333333, |
|
"eval_loss": 4.654634475708008, |
|
"eval_runtime": 4.2497, |
|
"eval_samples_per_second": 141.185, |
|
"eval_steps_per_second": 4.471, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 152.53, |
|
"learning_rate": 3.790123456790123e-05, |
|
"loss": 0.0823, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 152.96, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.598492622375488, |
|
"eval_runtime": 4.242, |
|
"eval_samples_per_second": 141.443, |
|
"eval_steps_per_second": 4.479, |
|
"step": 2868 |
|
}, |
|
{ |
|
"epoch": 153.07, |
|
"learning_rate": 3.7839506172839504e-05, |
|
"loss": 0.0913, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 153.6, |
|
"learning_rate": 3.777777777777778e-05, |
|
"loss": 0.0887, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 153.97, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.614808559417725, |
|
"eval_runtime": 4.2174, |
|
"eval_samples_per_second": 142.267, |
|
"eval_steps_per_second": 4.505, |
|
"step": 2887 |
|
}, |
|
{ |
|
"epoch": 154.13, |
|
"learning_rate": 3.771604938271605e-05, |
|
"loss": 0.0787, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 154.67, |
|
"learning_rate": 3.7654320987654326e-05, |
|
"loss": 0.0671, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 154.99, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.63968563079834, |
|
"eval_runtime": 4.1904, |
|
"eval_samples_per_second": 143.186, |
|
"eval_steps_per_second": 4.534, |
|
"step": 2906 |
|
}, |
|
{ |
|
"epoch": 155.2, |
|
"learning_rate": 3.759259259259259e-05, |
|
"loss": 0.104, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 155.73, |
|
"learning_rate": 3.7530864197530867e-05, |
|
"loss": 0.0897, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.583400249481201, |
|
"eval_runtime": 4.1921, |
|
"eval_samples_per_second": 143.126, |
|
"eval_steps_per_second": 4.532, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 156.27, |
|
"learning_rate": 3.7469135802469134e-05, |
|
"loss": 0.0795, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 156.8, |
|
"learning_rate": 3.740740740740741e-05, |
|
"loss": 0.093, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 156.96, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.539726257324219, |
|
"eval_runtime": 4.203, |
|
"eval_samples_per_second": 142.754, |
|
"eval_steps_per_second": 4.521, |
|
"step": 2943 |
|
}, |
|
{ |
|
"epoch": 157.33, |
|
"learning_rate": 3.734567901234568e-05, |
|
"loss": 0.0903, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 157.87, |
|
"learning_rate": 3.7283950617283955e-05, |
|
"loss": 0.0973, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 157.97, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.55323600769043, |
|
"eval_runtime": 4.2018, |
|
"eval_samples_per_second": 142.796, |
|
"eval_steps_per_second": 4.522, |
|
"step": 2962 |
|
}, |
|
{ |
|
"epoch": 158.4, |
|
"learning_rate": 3.722222222222222e-05, |
|
"loss": 0.064, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 158.93, |
|
"learning_rate": 3.7160493827160496e-05, |
|
"loss": 0.1001, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 158.99, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.58270263671875, |
|
"eval_runtime": 4.2074, |
|
"eval_samples_per_second": 142.605, |
|
"eval_steps_per_second": 4.516, |
|
"step": 2981 |
|
}, |
|
{ |
|
"epoch": 159.47, |
|
"learning_rate": 3.709876543209877e-05, |
|
"loss": 0.0788, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.0884, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.572762966156006, |
|
"eval_runtime": 4.217, |
|
"eval_samples_per_second": 142.282, |
|
"eval_steps_per_second": 4.506, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 160.53, |
|
"learning_rate": 3.697530864197531e-05, |
|
"loss": 0.084, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 160.96, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.654175758361816, |
|
"eval_runtime": 4.2104, |
|
"eval_samples_per_second": 142.503, |
|
"eval_steps_per_second": 4.513, |
|
"step": 3018 |
|
}, |
|
{ |
|
"epoch": 161.07, |
|
"learning_rate": 3.6913580246913584e-05, |
|
"loss": 0.0773, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 161.6, |
|
"learning_rate": 3.685185185185185e-05, |
|
"loss": 0.0902, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 161.97, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.636648178100586, |
|
"eval_runtime": 4.2002, |
|
"eval_samples_per_second": 142.849, |
|
"eval_steps_per_second": 4.524, |
|
"step": 3037 |
|
}, |
|
{ |
|
"epoch": 162.13, |
|
"learning_rate": 3.6790123456790125e-05, |
|
"loss": 0.0819, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 162.67, |
|
"learning_rate": 3.67283950617284e-05, |
|
"loss": 0.0944, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 162.99, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.5957183837890625, |
|
"eval_runtime": 4.2207, |
|
"eval_samples_per_second": 142.157, |
|
"eval_steps_per_second": 4.502, |
|
"step": 3056 |
|
}, |
|
{ |
|
"epoch": 163.2, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 0.0792, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 163.73, |
|
"learning_rate": 3.660493827160494e-05, |
|
"loss": 0.0828, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.652061462402344, |
|
"eval_runtime": 4.2215, |
|
"eval_samples_per_second": 142.13, |
|
"eval_steps_per_second": 4.501, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 164.27, |
|
"learning_rate": 3.654320987654321e-05, |
|
"loss": 0.0729, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 164.8, |
|
"learning_rate": 3.648148148148148e-05, |
|
"loss": 0.0812, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 164.96, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.676053047180176, |
|
"eval_runtime": 4.2058, |
|
"eval_samples_per_second": 142.66, |
|
"eval_steps_per_second": 4.518, |
|
"step": 3093 |
|
}, |
|
{ |
|
"epoch": 165.33, |
|
"learning_rate": 3.6419753086419754e-05, |
|
"loss": 0.0723, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 165.87, |
|
"learning_rate": 3.635802469135803e-05, |
|
"loss": 0.0817, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 165.97, |
|
"eval_accuracy": 0.225, |
|
"eval_loss": 4.627193927764893, |
|
"eval_runtime": 4.22, |
|
"eval_samples_per_second": 142.182, |
|
"eval_steps_per_second": 4.502, |
|
"step": 3112 |
|
}, |
|
{ |
|
"epoch": 166.4, |
|
"learning_rate": 3.62962962962963e-05, |
|
"loss": 0.0718, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 166.93, |
|
"learning_rate": 3.623456790123457e-05, |
|
"loss": 0.07, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 166.99, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.653589725494385, |
|
"eval_runtime": 4.255, |
|
"eval_samples_per_second": 141.01, |
|
"eval_steps_per_second": 4.465, |
|
"step": 3131 |
|
}, |
|
{ |
|
"epoch": 167.47, |
|
"learning_rate": 3.617283950617284e-05, |
|
"loss": 0.0672, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.0746, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.567090034484863, |
|
"eval_runtime": 4.2691, |
|
"eval_samples_per_second": 140.544, |
|
"eval_steps_per_second": 4.451, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 168.53, |
|
"learning_rate": 3.604938271604938e-05, |
|
"loss": 0.0782, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 168.96, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.591490745544434, |
|
"eval_runtime": 4.2101, |
|
"eval_samples_per_second": 142.516, |
|
"eval_steps_per_second": 4.513, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 169.07, |
|
"learning_rate": 3.598765432098766e-05, |
|
"loss": 0.0667, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 169.6, |
|
"learning_rate": 3.592592592592593e-05, |
|
"loss": 0.0677, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 169.97, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.637347221374512, |
|
"eval_runtime": 4.2173, |
|
"eval_samples_per_second": 142.272, |
|
"eval_steps_per_second": 4.505, |
|
"step": 3187 |
|
}, |
|
{ |
|
"epoch": 170.13, |
|
"learning_rate": 3.5864197530864205e-05, |
|
"loss": 0.0839, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 170.67, |
|
"learning_rate": 3.580246913580247e-05, |
|
"loss": 0.0626, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 170.99, |
|
"eval_accuracy": 0.25833333333333336, |
|
"eval_loss": 4.672304630279541, |
|
"eval_runtime": 4.2092, |
|
"eval_samples_per_second": 142.546, |
|
"eval_steps_per_second": 4.514, |
|
"step": 3206 |
|
}, |
|
{ |
|
"epoch": 171.2, |
|
"learning_rate": 3.574074074074074e-05, |
|
"loss": 0.076, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 171.73, |
|
"learning_rate": 3.567901234567901e-05, |
|
"loss": 0.0697, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.681668281555176, |
|
"eval_runtime": 4.1945, |
|
"eval_samples_per_second": 143.044, |
|
"eval_steps_per_second": 4.53, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 172.27, |
|
"learning_rate": 3.5617283950617286e-05, |
|
"loss": 0.0826, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 172.8, |
|
"learning_rate": 3.555555555555556e-05, |
|
"loss": 0.077, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 172.96, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.679342269897461, |
|
"eval_runtime": 4.2134, |
|
"eval_samples_per_second": 142.403, |
|
"eval_steps_per_second": 4.509, |
|
"step": 3243 |
|
}, |
|
{ |
|
"epoch": 173.33, |
|
"learning_rate": 3.5493827160493834e-05, |
|
"loss": 0.0766, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 173.87, |
|
"learning_rate": 3.54320987654321e-05, |
|
"loss": 0.068, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 173.97, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.711019039154053, |
|
"eval_runtime": 4.211, |
|
"eval_samples_per_second": 142.484, |
|
"eval_steps_per_second": 4.512, |
|
"step": 3262 |
|
}, |
|
{ |
|
"epoch": 174.4, |
|
"learning_rate": 3.537037037037037e-05, |
|
"loss": 0.0587, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 174.93, |
|
"learning_rate": 3.530864197530864e-05, |
|
"loss": 0.0875, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 174.99, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.701217174530029, |
|
"eval_runtime": 4.2016, |
|
"eval_samples_per_second": 142.802, |
|
"eval_steps_per_second": 4.522, |
|
"step": 3281 |
|
}, |
|
{ |
|
"epoch": 175.47, |
|
"learning_rate": 3.5246913580246915e-05, |
|
"loss": 0.0765, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"learning_rate": 3.518518518518519e-05, |
|
"loss": 0.0787, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.711310863494873, |
|
"eval_runtime": 4.2282, |
|
"eval_samples_per_second": 141.904, |
|
"eval_steps_per_second": 4.494, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 176.53, |
|
"learning_rate": 3.512345679012346e-05, |
|
"loss": 0.0779, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 176.96, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.699758529663086, |
|
"eval_runtime": 4.1993, |
|
"eval_samples_per_second": 142.882, |
|
"eval_steps_per_second": 4.525, |
|
"step": 3318 |
|
}, |
|
{ |
|
"epoch": 177.07, |
|
"learning_rate": 3.506172839506173e-05, |
|
"loss": 0.0632, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 177.6, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0823, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 177.97, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.709224224090576, |
|
"eval_runtime": 4.2154, |
|
"eval_samples_per_second": 142.335, |
|
"eval_steps_per_second": 4.507, |
|
"step": 3337 |
|
}, |
|
{ |
|
"epoch": 178.13, |
|
"learning_rate": 3.493827160493827e-05, |
|
"loss": 0.0819, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 178.67, |
|
"learning_rate": 3.4876543209876545e-05, |
|
"loss": 0.0685, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 178.99, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.676272869110107, |
|
"eval_runtime": 4.2277, |
|
"eval_samples_per_second": 141.922, |
|
"eval_steps_per_second": 4.494, |
|
"step": 3356 |
|
}, |
|
{ |
|
"epoch": 179.2, |
|
"learning_rate": 3.481481481481482e-05, |
|
"loss": 0.0592, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 179.73, |
|
"learning_rate": 3.475308641975309e-05, |
|
"loss": 0.0698, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"eval_accuracy": 0.25666666666666665, |
|
"eval_loss": 4.718149185180664, |
|
"eval_runtime": 4.2111, |
|
"eval_samples_per_second": 142.479, |
|
"eval_steps_per_second": 4.512, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 180.27, |
|
"learning_rate": 3.469135802469136e-05, |
|
"loss": 0.074, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 180.8, |
|
"learning_rate": 3.4629629629629626e-05, |
|
"loss": 0.0924, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 180.96, |
|
"eval_accuracy": 0.24833333333333332, |
|
"eval_loss": 4.715093612670898, |
|
"eval_runtime": 4.2612, |
|
"eval_samples_per_second": 140.804, |
|
"eval_steps_per_second": 4.459, |
|
"step": 3393 |
|
}, |
|
{ |
|
"epoch": 181.33, |
|
"learning_rate": 3.45679012345679e-05, |
|
"loss": 0.0792, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 181.87, |
|
"learning_rate": 3.4506172839506174e-05, |
|
"loss": 0.084, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 181.97, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.723077774047852, |
|
"eval_runtime": 4.2506, |
|
"eval_samples_per_second": 141.157, |
|
"eval_steps_per_second": 4.47, |
|
"step": 3412 |
|
}, |
|
{ |
|
"epoch": 182.4, |
|
"learning_rate": 3.444444444444445e-05, |
|
"loss": 0.08, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 182.93, |
|
"learning_rate": 3.438271604938272e-05, |
|
"loss": 0.0508, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 182.99, |
|
"eval_accuracy": 0.23166666666666666, |
|
"eval_loss": 4.685625076293945, |
|
"eval_runtime": 4.2741, |
|
"eval_samples_per_second": 140.38, |
|
"eval_steps_per_second": 4.445, |
|
"step": 3431 |
|
}, |
|
{ |
|
"epoch": 183.47, |
|
"learning_rate": 3.432098765432099e-05, |
|
"loss": 0.0604, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"learning_rate": 3.425925925925926e-05, |
|
"loss": 0.0637, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.704137325286865, |
|
"eval_runtime": 4.274, |
|
"eval_samples_per_second": 140.382, |
|
"eval_steps_per_second": 4.445, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 184.53, |
|
"learning_rate": 3.419753086419753e-05, |
|
"loss": 0.06, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 184.96, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.720521450042725, |
|
"eval_runtime": 4.2381, |
|
"eval_samples_per_second": 141.572, |
|
"eval_steps_per_second": 4.483, |
|
"step": 3468 |
|
}, |
|
{ |
|
"epoch": 185.07, |
|
"learning_rate": 3.41358024691358e-05, |
|
"loss": 0.062, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 185.6, |
|
"learning_rate": 3.4074074074074077e-05, |
|
"loss": 0.0659, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 185.97, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.725123405456543, |
|
"eval_runtime": 4.2202, |
|
"eval_samples_per_second": 142.172, |
|
"eval_steps_per_second": 4.502, |
|
"step": 3487 |
|
}, |
|
{ |
|
"epoch": 186.13, |
|
"learning_rate": 3.401234567901235e-05, |
|
"loss": 0.0788, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 186.67, |
|
"learning_rate": 3.395061728395062e-05, |
|
"loss": 0.0842, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 186.99, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.721489906311035, |
|
"eval_runtime": 4.232, |
|
"eval_samples_per_second": 141.778, |
|
"eval_steps_per_second": 4.49, |
|
"step": 3506 |
|
}, |
|
{ |
|
"epoch": 187.2, |
|
"learning_rate": 3.388888888888889e-05, |
|
"loss": 0.0773, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 187.73, |
|
"learning_rate": 3.3827160493827165e-05, |
|
"loss": 0.0733, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.706781387329102, |
|
"eval_runtime": 4.2434, |
|
"eval_samples_per_second": 141.395, |
|
"eval_steps_per_second": 4.478, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 188.27, |
|
"learning_rate": 3.376543209876543e-05, |
|
"loss": 0.0722, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 188.8, |
|
"learning_rate": 3.3703703703703706e-05, |
|
"loss": 0.0647, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 188.96, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.759402751922607, |
|
"eval_runtime": 4.2349, |
|
"eval_samples_per_second": 141.679, |
|
"eval_steps_per_second": 4.486, |
|
"step": 3543 |
|
}, |
|
{ |
|
"epoch": 189.33, |
|
"learning_rate": 3.364197530864198e-05, |
|
"loss": 0.0714, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 189.87, |
|
"learning_rate": 3.3580246913580247e-05, |
|
"loss": 0.0569, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 189.97, |
|
"eval_accuracy": 0.22333333333333333, |
|
"eval_loss": 4.783107280731201, |
|
"eval_runtime": 4.2157, |
|
"eval_samples_per_second": 142.324, |
|
"eval_steps_per_second": 4.507, |
|
"step": 3562 |
|
}, |
|
{ |
|
"epoch": 190.4, |
|
"learning_rate": 3.351851851851852e-05, |
|
"loss": 0.0673, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 190.93, |
|
"learning_rate": 3.3456790123456794e-05, |
|
"loss": 0.0883, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 190.99, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.721207618713379, |
|
"eval_runtime": 4.2273, |
|
"eval_samples_per_second": 141.935, |
|
"eval_steps_per_second": 4.495, |
|
"step": 3581 |
|
}, |
|
{ |
|
"epoch": 191.47, |
|
"learning_rate": 3.339506172839506e-05, |
|
"loss": 0.0704, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.0622, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.6877546310424805, |
|
"eval_runtime": 4.2265, |
|
"eval_samples_per_second": 141.962, |
|
"eval_steps_per_second": 4.495, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 192.53, |
|
"learning_rate": 3.327160493827161e-05, |
|
"loss": 0.057, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 192.96, |
|
"eval_accuracy": 0.24666666666666667, |
|
"eval_loss": 4.665402889251709, |
|
"eval_runtime": 4.2317, |
|
"eval_samples_per_second": 141.788, |
|
"eval_steps_per_second": 4.49, |
|
"step": 3618 |
|
}, |
|
{ |
|
"epoch": 193.07, |
|
"learning_rate": 3.3209876543209876e-05, |
|
"loss": 0.0563, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 193.6, |
|
"learning_rate": 3.314814814814815e-05, |
|
"loss": 0.0654, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 193.97, |
|
"eval_accuracy": 0.25166666666666665, |
|
"eval_loss": 4.635808944702148, |
|
"eval_runtime": 4.2421, |
|
"eval_samples_per_second": 141.438, |
|
"eval_steps_per_second": 4.479, |
|
"step": 3637 |
|
}, |
|
{ |
|
"epoch": 194.13, |
|
"learning_rate": 3.308641975308642e-05, |
|
"loss": 0.0771, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 194.67, |
|
"learning_rate": 3.30246913580247e-05, |
|
"loss": 0.0868, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 194.99, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.662071704864502, |
|
"eval_runtime": 4.2363, |
|
"eval_samples_per_second": 141.634, |
|
"eval_steps_per_second": 4.485, |
|
"step": 3656 |
|
}, |
|
{ |
|
"epoch": 195.2, |
|
"learning_rate": 3.2962962962962964e-05, |
|
"loss": 0.0592, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 195.73, |
|
"learning_rate": 3.290123456790124e-05, |
|
"loss": 0.0789, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.698493003845215, |
|
"eval_runtime": 4.2392, |
|
"eval_samples_per_second": 141.537, |
|
"eval_steps_per_second": 4.482, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 196.27, |
|
"learning_rate": 3.2839506172839505e-05, |
|
"loss": 0.0683, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 196.8, |
|
"learning_rate": 3.277777777777778e-05, |
|
"loss": 0.0657, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 196.96, |
|
"eval_accuracy": 0.25666666666666665, |
|
"eval_loss": 4.663649559020996, |
|
"eval_runtime": 4.2398, |
|
"eval_samples_per_second": 141.517, |
|
"eval_steps_per_second": 4.481, |
|
"step": 3693 |
|
}, |
|
{ |
|
"epoch": 197.33, |
|
"learning_rate": 3.271604938271605e-05, |
|
"loss": 0.057, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 197.87, |
|
"learning_rate": 3.2654320987654326e-05, |
|
"loss": 0.0648, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 197.97, |
|
"eval_accuracy": 0.24666666666666667, |
|
"eval_loss": 4.7698283195495605, |
|
"eval_runtime": 4.2294, |
|
"eval_samples_per_second": 141.864, |
|
"eval_steps_per_second": 4.492, |
|
"step": 3712 |
|
}, |
|
{ |
|
"epoch": 198.4, |
|
"learning_rate": 3.25925925925926e-05, |
|
"loss": 0.0577, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 198.93, |
|
"learning_rate": 3.253086419753087e-05, |
|
"loss": 0.0635, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 198.99, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.722621440887451, |
|
"eval_runtime": 4.2242, |
|
"eval_samples_per_second": 142.038, |
|
"eval_steps_per_second": 4.498, |
|
"step": 3731 |
|
}, |
|
{ |
|
"epoch": 199.47, |
|
"learning_rate": 3.2469135802469134e-05, |
|
"loss": 0.0563, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 0.0637, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.748103618621826, |
|
"eval_runtime": 4.2435, |
|
"eval_samples_per_second": 141.393, |
|
"eval_steps_per_second": 4.477, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 200.53, |
|
"learning_rate": 3.234567901234568e-05, |
|
"loss": 0.0665, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 200.96, |
|
"eval_accuracy": 0.24833333333333332, |
|
"eval_loss": 4.778894901275635, |
|
"eval_runtime": 4.2222, |
|
"eval_samples_per_second": 142.106, |
|
"eval_steps_per_second": 4.5, |
|
"step": 3768 |
|
}, |
|
{ |
|
"epoch": 201.07, |
|
"learning_rate": 3.2283950617283955e-05, |
|
"loss": 0.0649, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 201.6, |
|
"learning_rate": 3.222222222222223e-05, |
|
"loss": 0.0799, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 201.97, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.701383590698242, |
|
"eval_runtime": 4.2577, |
|
"eval_samples_per_second": 140.92, |
|
"eval_steps_per_second": 4.462, |
|
"step": 3787 |
|
}, |
|
{ |
|
"epoch": 202.13, |
|
"learning_rate": 3.216049382716049e-05, |
|
"loss": 0.049, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 202.67, |
|
"learning_rate": 3.209876543209876e-05, |
|
"loss": 0.064, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 202.99, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.752817153930664, |
|
"eval_runtime": 4.2702, |
|
"eval_samples_per_second": 140.507, |
|
"eval_steps_per_second": 4.449, |
|
"step": 3806 |
|
}, |
|
{ |
|
"epoch": 203.2, |
|
"learning_rate": 3.203703703703704e-05, |
|
"loss": 0.0554, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 203.73, |
|
"learning_rate": 3.197530864197531e-05, |
|
"loss": 0.0772, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 204.0, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.740113735198975, |
|
"eval_runtime": 4.2544, |
|
"eval_samples_per_second": 141.031, |
|
"eval_steps_per_second": 4.466, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 204.27, |
|
"learning_rate": 3.1913580246913585e-05, |
|
"loss": 0.0625, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 204.8, |
|
"learning_rate": 3.185185185185185e-05, |
|
"loss": 0.0438, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 204.96, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.76779317855835, |
|
"eval_runtime": 4.2439, |
|
"eval_samples_per_second": 141.379, |
|
"eval_steps_per_second": 4.477, |
|
"step": 3843 |
|
}, |
|
{ |
|
"epoch": 205.33, |
|
"learning_rate": 3.1790123456790125e-05, |
|
"loss": 0.0716, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 205.87, |
|
"learning_rate": 3.172839506172839e-05, |
|
"loss": 0.0766, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 205.97, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.71795654296875, |
|
"eval_runtime": 4.2289, |
|
"eval_samples_per_second": 141.881, |
|
"eval_steps_per_second": 4.493, |
|
"step": 3862 |
|
}, |
|
{ |
|
"epoch": 206.4, |
|
"learning_rate": 3.1666666666666666e-05, |
|
"loss": 0.08, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 206.93, |
|
"learning_rate": 3.160493827160494e-05, |
|
"loss": 0.0687, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 206.99, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.7058281898498535, |
|
"eval_runtime": 4.2979, |
|
"eval_samples_per_second": 139.602, |
|
"eval_steps_per_second": 4.421, |
|
"step": 3881 |
|
}, |
|
{ |
|
"epoch": 207.47, |
|
"learning_rate": 3.1543209876543214e-05, |
|
"loss": 0.0686, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 0.0801, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.7583746910095215, |
|
"eval_runtime": 4.3035, |
|
"eval_samples_per_second": 139.423, |
|
"eval_steps_per_second": 4.415, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 208.53, |
|
"learning_rate": 3.1419753086419755e-05, |
|
"loss": 0.0772, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 208.96, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.73037576675415, |
|
"eval_runtime": 4.265, |
|
"eval_samples_per_second": 140.681, |
|
"eval_steps_per_second": 4.455, |
|
"step": 3918 |
|
}, |
|
{ |
|
"epoch": 209.07, |
|
"learning_rate": 3.135802469135803e-05, |
|
"loss": 0.0504, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 209.6, |
|
"learning_rate": 3.1296296296296295e-05, |
|
"loss": 0.0663, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 209.97, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.693957805633545, |
|
"eval_runtime": 4.2721, |
|
"eval_samples_per_second": 140.445, |
|
"eval_steps_per_second": 4.447, |
|
"step": 3937 |
|
}, |
|
{ |
|
"epoch": 210.13, |
|
"learning_rate": 3.123456790123457e-05, |
|
"loss": 0.0497, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 210.67, |
|
"learning_rate": 3.117283950617284e-05, |
|
"loss": 0.0529, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 210.99, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.694019317626953, |
|
"eval_runtime": 4.2603, |
|
"eval_samples_per_second": 140.834, |
|
"eval_steps_per_second": 4.46, |
|
"step": 3956 |
|
}, |
|
{ |
|
"epoch": 211.2, |
|
"learning_rate": 3.111111111111111e-05, |
|
"loss": 0.0559, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 211.73, |
|
"learning_rate": 3.1049382716049384e-05, |
|
"loss": 0.0568, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 212.0, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.733299255371094, |
|
"eval_runtime": 4.2654, |
|
"eval_samples_per_second": 140.666, |
|
"eval_steps_per_second": 4.454, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 212.27, |
|
"learning_rate": 3.098765432098766e-05, |
|
"loss": 0.057, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 212.8, |
|
"learning_rate": 3.0925925925925924e-05, |
|
"loss": 0.0697, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 212.96, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.667250156402588, |
|
"eval_runtime": 4.2885, |
|
"eval_samples_per_second": 139.909, |
|
"eval_steps_per_second": 4.43, |
|
"step": 3993 |
|
}, |
|
{ |
|
"epoch": 213.33, |
|
"learning_rate": 3.08641975308642e-05, |
|
"loss": 0.0402, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 213.87, |
|
"learning_rate": 3.080246913580247e-05, |
|
"loss": 0.0394, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 213.97, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.673309326171875, |
|
"eval_runtime": 4.2825, |
|
"eval_samples_per_second": 140.105, |
|
"eval_steps_per_second": 4.437, |
|
"step": 4012 |
|
}, |
|
{ |
|
"epoch": 214.4, |
|
"learning_rate": 3.074074074074074e-05, |
|
"loss": 0.0398, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 214.93, |
|
"learning_rate": 3.067901234567901e-05, |
|
"loss": 0.0625, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 214.99, |
|
"eval_accuracy": 0.225, |
|
"eval_loss": 4.738312244415283, |
|
"eval_runtime": 4.3054, |
|
"eval_samples_per_second": 139.359, |
|
"eval_steps_per_second": 4.413, |
|
"step": 4031 |
|
}, |
|
{ |
|
"epoch": 215.47, |
|
"learning_rate": 3.061728395061729e-05, |
|
"loss": 0.0626, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 216.0, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.0588, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 216.0, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.767359256744385, |
|
"eval_runtime": 4.3344, |
|
"eval_samples_per_second": 138.426, |
|
"eval_steps_per_second": 4.383, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 216.53, |
|
"learning_rate": 3.0493827160493827e-05, |
|
"loss": 0.0594, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 216.96, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.687304496765137, |
|
"eval_runtime": 4.285, |
|
"eval_samples_per_second": 140.024, |
|
"eval_steps_per_second": 4.434, |
|
"step": 4068 |
|
}, |
|
{ |
|
"epoch": 217.07, |
|
"learning_rate": 3.0432098765432098e-05, |
|
"loss": 0.0721, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 217.6, |
|
"learning_rate": 3.037037037037037e-05, |
|
"loss": 0.0451, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 217.97, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.671844959259033, |
|
"eval_runtime": 4.2925, |
|
"eval_samples_per_second": 139.78, |
|
"eval_steps_per_second": 4.426, |
|
"step": 4087 |
|
}, |
|
{ |
|
"epoch": 218.13, |
|
"learning_rate": 3.0308641975308642e-05, |
|
"loss": 0.0445, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 218.67, |
|
"learning_rate": 3.0246913580246916e-05, |
|
"loss": 0.047, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 218.99, |
|
"eval_accuracy": 0.22833333333333333, |
|
"eval_loss": 4.714609146118164, |
|
"eval_runtime": 4.2883, |
|
"eval_samples_per_second": 139.914, |
|
"eval_steps_per_second": 4.431, |
|
"step": 4106 |
|
}, |
|
{ |
|
"epoch": 219.2, |
|
"learning_rate": 3.018518518518519e-05, |
|
"loss": 0.0648, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 219.73, |
|
"learning_rate": 3.012345679012346e-05, |
|
"loss": 0.0445, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 220.0, |
|
"eval_accuracy": 0.22833333333333333, |
|
"eval_loss": 4.717392921447754, |
|
"eval_runtime": 4.2776, |
|
"eval_samples_per_second": 140.266, |
|
"eval_steps_per_second": 4.442, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 220.27, |
|
"learning_rate": 3.0061728395061727e-05, |
|
"loss": 0.0438, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 220.8, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0746, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 220.96, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.670175075531006, |
|
"eval_runtime": 4.2998, |
|
"eval_samples_per_second": 139.542, |
|
"eval_steps_per_second": 4.419, |
|
"step": 4143 |
|
}, |
|
{ |
|
"epoch": 221.33, |
|
"learning_rate": 2.993827160493827e-05, |
|
"loss": 0.0617, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 221.87, |
|
"learning_rate": 2.9876543209876545e-05, |
|
"loss": 0.0697, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 221.97, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.646183967590332, |
|
"eval_runtime": 4.3381, |
|
"eval_samples_per_second": 138.308, |
|
"eval_steps_per_second": 4.38, |
|
"step": 4162 |
|
}, |
|
{ |
|
"epoch": 222.4, |
|
"learning_rate": 2.981481481481482e-05, |
|
"loss": 0.0866, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 222.93, |
|
"learning_rate": 2.975308641975309e-05, |
|
"loss": 0.0562, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 222.99, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.695559024810791, |
|
"eval_runtime": 4.3517, |
|
"eval_samples_per_second": 137.876, |
|
"eval_steps_per_second": 4.366, |
|
"step": 4181 |
|
}, |
|
{ |
|
"epoch": 223.47, |
|
"learning_rate": 2.9691358024691356e-05, |
|
"loss": 0.0986, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 224.0, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.047, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 224.0, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.727797985076904, |
|
"eval_runtime": 4.3288, |
|
"eval_samples_per_second": 138.607, |
|
"eval_steps_per_second": 4.389, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 224.53, |
|
"learning_rate": 2.95679012345679e-05, |
|
"loss": 0.0612, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 224.96, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.730659484863281, |
|
"eval_runtime": 4.3287, |
|
"eval_samples_per_second": 138.611, |
|
"eval_steps_per_second": 4.389, |
|
"step": 4218 |
|
}, |
|
{ |
|
"epoch": 225.07, |
|
"learning_rate": 2.9506172839506174e-05, |
|
"loss": 0.048, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 225.6, |
|
"learning_rate": 2.9444444444444448e-05, |
|
"loss": 0.0625, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 225.97, |
|
"eval_accuracy": 0.25666666666666665, |
|
"eval_loss": 4.667015075683594, |
|
"eval_runtime": 4.3594, |
|
"eval_samples_per_second": 137.632, |
|
"eval_steps_per_second": 4.358, |
|
"step": 4237 |
|
}, |
|
{ |
|
"epoch": 226.13, |
|
"learning_rate": 2.9382716049382718e-05, |
|
"loss": 0.0558, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 226.67, |
|
"learning_rate": 2.9320987654320992e-05, |
|
"loss": 0.0739, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 226.99, |
|
"eval_accuracy": 0.23166666666666666, |
|
"eval_loss": 4.710987091064453, |
|
"eval_runtime": 4.3562, |
|
"eval_samples_per_second": 137.734, |
|
"eval_steps_per_second": 4.362, |
|
"step": 4256 |
|
}, |
|
{ |
|
"epoch": 227.2, |
|
"learning_rate": 2.925925925925926e-05, |
|
"loss": 0.054, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 227.73, |
|
"learning_rate": 2.919753086419753e-05, |
|
"loss": 0.0637, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 228.0, |
|
"eval_accuracy": 0.22, |
|
"eval_loss": 4.703871726989746, |
|
"eval_runtime": 4.3307, |
|
"eval_samples_per_second": 138.545, |
|
"eval_steps_per_second": 4.387, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 228.27, |
|
"learning_rate": 2.9135802469135803e-05, |
|
"loss": 0.0491, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 228.8, |
|
"learning_rate": 2.9074074074074077e-05, |
|
"loss": 0.0461, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 228.96, |
|
"eval_accuracy": 0.22666666666666666, |
|
"eval_loss": 4.711916923522949, |
|
"eval_runtime": 4.3352, |
|
"eval_samples_per_second": 138.402, |
|
"eval_steps_per_second": 4.383, |
|
"step": 4293 |
|
}, |
|
{ |
|
"epoch": 229.33, |
|
"learning_rate": 2.9012345679012347e-05, |
|
"loss": 0.0544, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 229.87, |
|
"learning_rate": 2.895061728395062e-05, |
|
"loss": 0.0506, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 229.97, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.7098541259765625, |
|
"eval_runtime": 4.3688, |
|
"eval_samples_per_second": 137.339, |
|
"eval_steps_per_second": 4.349, |
|
"step": 4312 |
|
}, |
|
{ |
|
"epoch": 230.4, |
|
"learning_rate": 2.8888888888888888e-05, |
|
"loss": 0.0511, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 230.93, |
|
"learning_rate": 2.882716049382716e-05, |
|
"loss": 0.0412, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 230.99, |
|
"eval_accuracy": 0.23166666666666666, |
|
"eval_loss": 4.671385288238525, |
|
"eval_runtime": 4.3317, |
|
"eval_samples_per_second": 138.515, |
|
"eval_steps_per_second": 4.386, |
|
"step": 4331 |
|
}, |
|
{ |
|
"epoch": 231.47, |
|
"learning_rate": 2.8765432098765432e-05, |
|
"loss": 0.0681, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 232.0, |
|
"learning_rate": 2.8703703703703706e-05, |
|
"loss": 0.057, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 232.0, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.692080497741699, |
|
"eval_runtime": 4.3407, |
|
"eval_samples_per_second": 138.227, |
|
"eval_steps_per_second": 4.377, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 232.53, |
|
"learning_rate": 2.8641975308641977e-05, |
|
"loss": 0.0402, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 232.96, |
|
"eval_accuracy": 0.23166666666666666, |
|
"eval_loss": 4.754528045654297, |
|
"eval_runtime": 4.3356, |
|
"eval_samples_per_second": 138.389, |
|
"eval_steps_per_second": 4.382, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 233.07, |
|
"learning_rate": 2.858024691358025e-05, |
|
"loss": 0.0766, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 233.6, |
|
"learning_rate": 2.851851851851852e-05, |
|
"loss": 0.058, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 233.97, |
|
"eval_accuracy": 0.225, |
|
"eval_loss": 4.7573161125183105, |
|
"eval_runtime": 4.353, |
|
"eval_samples_per_second": 137.837, |
|
"eval_steps_per_second": 4.365, |
|
"step": 4387 |
|
}, |
|
{ |
|
"epoch": 234.13, |
|
"learning_rate": 2.8456790123456788e-05, |
|
"loss": 0.0749, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 234.67, |
|
"learning_rate": 2.839506172839506e-05, |
|
"loss": 0.0661, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 234.99, |
|
"eval_accuracy": 0.22833333333333333, |
|
"eval_loss": 4.6800408363342285, |
|
"eval_runtime": 4.3414, |
|
"eval_samples_per_second": 138.205, |
|
"eval_steps_per_second": 4.377, |
|
"step": 4406 |
|
}, |
|
{ |
|
"epoch": 235.2, |
|
"learning_rate": 2.8333333333333335e-05, |
|
"loss": 0.0613, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 235.73, |
|
"learning_rate": 2.8271604938271606e-05, |
|
"loss": 0.0613, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 236.0, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.653302192687988, |
|
"eval_runtime": 4.3257, |
|
"eval_samples_per_second": 138.707, |
|
"eval_steps_per_second": 4.392, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 236.27, |
|
"learning_rate": 2.820987654320988e-05, |
|
"loss": 0.0555, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 236.8, |
|
"learning_rate": 2.814814814814815e-05, |
|
"loss": 0.0462, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 236.96, |
|
"eval_accuracy": 0.22833333333333333, |
|
"eval_loss": 4.674839496612549, |
|
"eval_runtime": 4.3407, |
|
"eval_samples_per_second": 138.228, |
|
"eval_steps_per_second": 4.377, |
|
"step": 4443 |
|
}, |
|
{ |
|
"epoch": 237.33, |
|
"learning_rate": 2.8086419753086424e-05, |
|
"loss": 0.0742, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 237.87, |
|
"learning_rate": 2.802469135802469e-05, |
|
"loss": 0.0494, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 237.97, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.687388896942139, |
|
"eval_runtime": 4.3685, |
|
"eval_samples_per_second": 137.346, |
|
"eval_steps_per_second": 4.349, |
|
"step": 4462 |
|
}, |
|
{ |
|
"epoch": 238.4, |
|
"learning_rate": 2.7962962962962965e-05, |
|
"loss": 0.0617, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 238.93, |
|
"learning_rate": 2.7901234567901235e-05, |
|
"loss": 0.0643, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 238.99, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.729123592376709, |
|
"eval_runtime": 4.3628, |
|
"eval_samples_per_second": 137.527, |
|
"eval_steps_per_second": 4.355, |
|
"step": 4481 |
|
}, |
|
{ |
|
"epoch": 239.47, |
|
"learning_rate": 2.783950617283951e-05, |
|
"loss": 0.0557, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 240.0, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.0422, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 240.0, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.7088117599487305, |
|
"eval_runtime": 4.3986, |
|
"eval_samples_per_second": 136.407, |
|
"eval_steps_per_second": 4.32, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 240.53, |
|
"learning_rate": 2.7716049382716053e-05, |
|
"loss": 0.0376, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 240.96, |
|
"eval_accuracy": 0.225, |
|
"eval_loss": 4.74221658706665, |
|
"eval_runtime": 4.4146, |
|
"eval_samples_per_second": 135.913, |
|
"eval_steps_per_second": 4.304, |
|
"step": 4518 |
|
}, |
|
{ |
|
"epoch": 241.07, |
|
"learning_rate": 2.765432098765432e-05, |
|
"loss": 0.0343, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 241.6, |
|
"learning_rate": 2.7592592592592594e-05, |
|
"loss": 0.0696, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 241.97, |
|
"eval_accuracy": 0.22833333333333333, |
|
"eval_loss": 4.801132678985596, |
|
"eval_runtime": 4.3976, |
|
"eval_samples_per_second": 136.44, |
|
"eval_steps_per_second": 4.321, |
|
"step": 4537 |
|
}, |
|
{ |
|
"epoch": 242.13, |
|
"learning_rate": 2.7530864197530864e-05, |
|
"loss": 0.0603, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 242.67, |
|
"learning_rate": 2.7469135802469138e-05, |
|
"loss": 0.0609, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 242.99, |
|
"eval_accuracy": 0.22166666666666668, |
|
"eval_loss": 4.801338195800781, |
|
"eval_runtime": 4.3836, |
|
"eval_samples_per_second": 136.873, |
|
"eval_steps_per_second": 4.334, |
|
"step": 4556 |
|
}, |
|
{ |
|
"epoch": 243.2, |
|
"learning_rate": 2.7407407407407408e-05, |
|
"loss": 0.0484, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 243.73, |
|
"learning_rate": 2.7345679012345682e-05, |
|
"loss": 0.0637, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 244.0, |
|
"eval_accuracy": 0.225, |
|
"eval_loss": 4.760260105133057, |
|
"eval_runtime": 4.4032, |
|
"eval_samples_per_second": 136.265, |
|
"eval_steps_per_second": 4.315, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 244.27, |
|
"learning_rate": 2.7283950617283956e-05, |
|
"loss": 0.0489, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 244.8, |
|
"learning_rate": 2.7222222222222223e-05, |
|
"loss": 0.0529, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 244.96, |
|
"eval_accuracy": 0.22333333333333333, |
|
"eval_loss": 4.789524078369141, |
|
"eval_runtime": 4.388, |
|
"eval_samples_per_second": 136.736, |
|
"eval_steps_per_second": 4.33, |
|
"step": 4593 |
|
}, |
|
{ |
|
"epoch": 245.33, |
|
"learning_rate": 2.7160493827160493e-05, |
|
"loss": 0.0617, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 245.87, |
|
"learning_rate": 2.7098765432098767e-05, |
|
"loss": 0.0603, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 245.97, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.763910293579102, |
|
"eval_runtime": 4.3989, |
|
"eval_samples_per_second": 136.397, |
|
"eval_steps_per_second": 4.319, |
|
"step": 4612 |
|
}, |
|
{ |
|
"epoch": 246.4, |
|
"learning_rate": 2.7037037037037037e-05, |
|
"loss": 0.0558, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 246.93, |
|
"learning_rate": 2.697530864197531e-05, |
|
"loss": 0.0365, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 246.99, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.728492259979248, |
|
"eval_runtime": 4.3868, |
|
"eval_samples_per_second": 136.774, |
|
"eval_steps_per_second": 4.331, |
|
"step": 4631 |
|
}, |
|
{ |
|
"epoch": 247.47, |
|
"learning_rate": 2.6913580246913585e-05, |
|
"loss": 0.0862, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 248.0, |
|
"learning_rate": 2.6851851851851855e-05, |
|
"loss": 0.0732, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 248.0, |
|
"eval_accuracy": 0.22833333333333333, |
|
"eval_loss": 4.725191116333008, |
|
"eval_runtime": 4.4162, |
|
"eval_samples_per_second": 135.863, |
|
"eval_steps_per_second": 4.302, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 248.53, |
|
"learning_rate": 2.6790123456790122e-05, |
|
"loss": 0.0709, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 248.96, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.761960506439209, |
|
"eval_runtime": 4.3965, |
|
"eval_samples_per_second": 136.472, |
|
"eval_steps_per_second": 4.322, |
|
"step": 4668 |
|
}, |
|
{ |
|
"epoch": 249.07, |
|
"learning_rate": 2.6728395061728396e-05, |
|
"loss": 0.0463, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 249.6, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.0485, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 249.97, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.752857208251953, |
|
"eval_runtime": 4.404, |
|
"eval_samples_per_second": 136.24, |
|
"eval_steps_per_second": 4.314, |
|
"step": 4687 |
|
}, |
|
{ |
|
"epoch": 250.13, |
|
"learning_rate": 2.660493827160494e-05, |
|
"loss": 0.0453, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 250.67, |
|
"learning_rate": 2.654320987654321e-05, |
|
"loss": 0.0449, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 250.99, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.800561428070068, |
|
"eval_runtime": 4.4195, |
|
"eval_samples_per_second": 135.761, |
|
"eval_steps_per_second": 4.299, |
|
"step": 4706 |
|
}, |
|
{ |
|
"epoch": 251.2, |
|
"learning_rate": 2.6481481481481485e-05, |
|
"loss": 0.0629, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 251.73, |
|
"learning_rate": 2.641975308641975e-05, |
|
"loss": 0.0506, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 252.0, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.802790641784668, |
|
"eval_runtime": 4.4472, |
|
"eval_samples_per_second": 134.916, |
|
"eval_steps_per_second": 4.272, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 252.27, |
|
"learning_rate": 2.6358024691358025e-05, |
|
"loss": 0.038, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 252.8, |
|
"learning_rate": 2.6296296296296296e-05, |
|
"loss": 0.0455, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 252.96, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.777773380279541, |
|
"eval_runtime": 4.4493, |
|
"eval_samples_per_second": 134.853, |
|
"eval_steps_per_second": 4.27, |
|
"step": 4743 |
|
}, |
|
{ |
|
"epoch": 253.33, |
|
"learning_rate": 2.623456790123457e-05, |
|
"loss": 0.0418, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 253.87, |
|
"learning_rate": 2.617283950617284e-05, |
|
"loss": 0.0594, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 253.97, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.743904113769531, |
|
"eval_runtime": 4.4293, |
|
"eval_samples_per_second": 135.461, |
|
"eval_steps_per_second": 4.29, |
|
"step": 4762 |
|
}, |
|
{ |
|
"epoch": 254.4, |
|
"learning_rate": 2.6111111111111114e-05, |
|
"loss": 0.0511, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 254.93, |
|
"learning_rate": 2.6049382716049388e-05, |
|
"loss": 0.0551, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 254.99, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.806947708129883, |
|
"eval_runtime": 4.4446, |
|
"eval_samples_per_second": 134.995, |
|
"eval_steps_per_second": 4.275, |
|
"step": 4781 |
|
}, |
|
{ |
|
"epoch": 255.47, |
|
"learning_rate": 2.5987654320987655e-05, |
|
"loss": 0.0512, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 256.0, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.0435, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 256.0, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.817090034484863, |
|
"eval_runtime": 4.463, |
|
"eval_samples_per_second": 134.44, |
|
"eval_steps_per_second": 4.257, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 256.53, |
|
"learning_rate": 2.58641975308642e-05, |
|
"loss": 0.042, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 256.96, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.796122074127197, |
|
"eval_runtime": 4.4296, |
|
"eval_samples_per_second": 135.453, |
|
"eval_steps_per_second": 4.289, |
|
"step": 4818 |
|
}, |
|
{ |
|
"epoch": 257.07, |
|
"learning_rate": 2.580246913580247e-05, |
|
"loss": 0.0614, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 257.6, |
|
"learning_rate": 2.5740740740740743e-05, |
|
"loss": 0.0403, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 257.97, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.817234516143799, |
|
"eval_runtime": 4.4455, |
|
"eval_samples_per_second": 134.967, |
|
"eval_steps_per_second": 4.274, |
|
"step": 4837 |
|
}, |
|
{ |
|
"epoch": 258.13, |
|
"learning_rate": 2.5679012345679017e-05, |
|
"loss": 0.0428, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 258.67, |
|
"learning_rate": 2.5617283950617287e-05, |
|
"loss": 0.0524, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 258.99, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.8536577224731445, |
|
"eval_runtime": 4.4559, |
|
"eval_samples_per_second": 134.654, |
|
"eval_steps_per_second": 4.264, |
|
"step": 4856 |
|
}, |
|
{ |
|
"epoch": 259.2, |
|
"learning_rate": 2.5555555555555554e-05, |
|
"loss": 0.0495, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 259.73, |
|
"learning_rate": 2.5493827160493828e-05, |
|
"loss": 0.0461, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 260.0, |
|
"eval_accuracy": 0.22833333333333333, |
|
"eval_loss": 4.769797325134277, |
|
"eval_runtime": 4.4781, |
|
"eval_samples_per_second": 133.984, |
|
"eval_steps_per_second": 4.243, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 260.27, |
|
"learning_rate": 2.5432098765432098e-05, |
|
"loss": 0.0547, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 260.8, |
|
"learning_rate": 2.5370370370370372e-05, |
|
"loss": 0.05, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 260.96, |
|
"eval_accuracy": 0.24833333333333332, |
|
"eval_loss": 4.805800437927246, |
|
"eval_runtime": 4.4587, |
|
"eval_samples_per_second": 134.567, |
|
"eval_steps_per_second": 4.261, |
|
"step": 4893 |
|
}, |
|
{ |
|
"epoch": 261.33, |
|
"learning_rate": 2.5308641975308646e-05, |
|
"loss": 0.0463, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 261.87, |
|
"learning_rate": 2.5246913580246916e-05, |
|
"loss": 0.0545, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 261.97, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.839805603027344, |
|
"eval_runtime": 5.2865, |
|
"eval_samples_per_second": 113.497, |
|
"eval_steps_per_second": 3.594, |
|
"step": 4912 |
|
}, |
|
{ |
|
"epoch": 262.4, |
|
"learning_rate": 2.5185185185185183e-05, |
|
"loss": 0.066, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 262.93, |
|
"learning_rate": 2.5123456790123457e-05, |
|
"loss": 0.0405, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 262.99, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.82278299331665, |
|
"eval_runtime": 4.4772, |
|
"eval_samples_per_second": 134.011, |
|
"eval_steps_per_second": 4.244, |
|
"step": 4931 |
|
}, |
|
{ |
|
"epoch": 263.47, |
|
"learning_rate": 2.5061728395061727e-05, |
|
"loss": 0.0466, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 264.0, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0615, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 264.0, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.839545726776123, |
|
"eval_runtime": 4.4638, |
|
"eval_samples_per_second": 134.416, |
|
"eval_steps_per_second": 4.257, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 264.53, |
|
"learning_rate": 2.4938271604938275e-05, |
|
"loss": 0.0381, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 264.96, |
|
"eval_accuracy": 0.22333333333333333, |
|
"eval_loss": 4.823088645935059, |
|
"eval_runtime": 4.4558, |
|
"eval_samples_per_second": 134.657, |
|
"eval_steps_per_second": 4.264, |
|
"step": 4968 |
|
}, |
|
{ |
|
"epoch": 265.07, |
|
"learning_rate": 2.4876543209876542e-05, |
|
"loss": 0.0609, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 265.6, |
|
"learning_rate": 2.4814814814814816e-05, |
|
"loss": 0.0464, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 265.97, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.818026065826416, |
|
"eval_runtime": 4.4544, |
|
"eval_samples_per_second": 134.698, |
|
"eval_steps_per_second": 4.265, |
|
"step": 4987 |
|
}, |
|
{ |
|
"epoch": 266.13, |
|
"learning_rate": 2.475308641975309e-05, |
|
"loss": 0.0454, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 266.67, |
|
"learning_rate": 2.4691358024691357e-05, |
|
"loss": 0.058, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 266.99, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.87436056137085, |
|
"eval_runtime": 4.4992, |
|
"eval_samples_per_second": 133.357, |
|
"eval_steps_per_second": 4.223, |
|
"step": 5006 |
|
}, |
|
{ |
|
"epoch": 267.2, |
|
"learning_rate": 2.462962962962963e-05, |
|
"loss": 0.044, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 267.73, |
|
"learning_rate": 2.4567901234567904e-05, |
|
"loss": 0.0553, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 268.0, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.886570453643799, |
|
"eval_runtime": 4.4931, |
|
"eval_samples_per_second": 133.538, |
|
"eval_steps_per_second": 4.229, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 268.27, |
|
"learning_rate": 2.4506172839506175e-05, |
|
"loss": 0.0536, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 268.8, |
|
"learning_rate": 2.4444444444444445e-05, |
|
"loss": 0.0505, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 268.96, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.853390216827393, |
|
"eval_runtime": 4.4705, |
|
"eval_samples_per_second": 134.214, |
|
"eval_steps_per_second": 4.25, |
|
"step": 5043 |
|
}, |
|
{ |
|
"epoch": 269.33, |
|
"learning_rate": 2.438271604938272e-05, |
|
"loss": 0.0269, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 269.87, |
|
"learning_rate": 2.432098765432099e-05, |
|
"loss": 0.049, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 269.97, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.870242118835449, |
|
"eval_runtime": 4.4681, |
|
"eval_samples_per_second": 134.285, |
|
"eval_steps_per_second": 4.252, |
|
"step": 5062 |
|
}, |
|
{ |
|
"epoch": 270.4, |
|
"learning_rate": 2.425925925925926e-05, |
|
"loss": 0.0464, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 270.93, |
|
"learning_rate": 2.4197530864197533e-05, |
|
"loss": 0.0444, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 270.99, |
|
"eval_accuracy": 0.22666666666666666, |
|
"eval_loss": 4.871464729309082, |
|
"eval_runtime": 4.4942, |
|
"eval_samples_per_second": 133.505, |
|
"eval_steps_per_second": 4.228, |
|
"step": 5081 |
|
}, |
|
{ |
|
"epoch": 271.47, |
|
"learning_rate": 2.4135802469135804e-05, |
|
"loss": 0.0433, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 272.0, |
|
"learning_rate": 2.4074074074074074e-05, |
|
"loss": 0.0457, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 272.0, |
|
"eval_accuracy": 0.225, |
|
"eval_loss": 4.827383518218994, |
|
"eval_runtime": 4.4878, |
|
"eval_samples_per_second": 133.697, |
|
"eval_steps_per_second": 4.234, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 272.53, |
|
"learning_rate": 2.4012345679012348e-05, |
|
"loss": 0.0546, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 272.96, |
|
"eval_accuracy": 0.225, |
|
"eval_loss": 4.844120502471924, |
|
"eval_runtime": 4.5115, |
|
"eval_samples_per_second": 132.993, |
|
"eval_steps_per_second": 4.211, |
|
"step": 5118 |
|
}, |
|
{ |
|
"epoch": 273.07, |
|
"learning_rate": 2.3950617283950618e-05, |
|
"loss": 0.0427, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 273.6, |
|
"learning_rate": 2.3888888888888892e-05, |
|
"loss": 0.0378, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 273.97, |
|
"eval_accuracy": 0.225, |
|
"eval_loss": 4.822915077209473, |
|
"eval_runtime": 4.4872, |
|
"eval_samples_per_second": 133.713, |
|
"eval_steps_per_second": 4.234, |
|
"step": 5137 |
|
}, |
|
{ |
|
"epoch": 274.13, |
|
"learning_rate": 2.3827160493827162e-05, |
|
"loss": 0.0603, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 274.67, |
|
"learning_rate": 2.3765432098765433e-05, |
|
"loss": 0.0374, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 274.99, |
|
"eval_accuracy": 0.22166666666666668, |
|
"eval_loss": 4.805349349975586, |
|
"eval_runtime": 4.5038, |
|
"eval_samples_per_second": 133.22, |
|
"eval_steps_per_second": 4.219, |
|
"step": 5156 |
|
}, |
|
{ |
|
"epoch": 275.2, |
|
"learning_rate": 2.3703703703703707e-05, |
|
"loss": 0.0384, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 275.73, |
|
"learning_rate": 2.3641975308641977e-05, |
|
"loss": 0.047, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 276.0, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.861939907073975, |
|
"eval_runtime": 4.5272, |
|
"eval_samples_per_second": 132.531, |
|
"eval_steps_per_second": 4.197, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 276.27, |
|
"learning_rate": 2.3580246913580247e-05, |
|
"loss": 0.0352, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 276.8, |
|
"learning_rate": 2.351851851851852e-05, |
|
"loss": 0.0526, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 276.96, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.87931489944458, |
|
"eval_runtime": 4.5397, |
|
"eval_samples_per_second": 132.166, |
|
"eval_steps_per_second": 4.185, |
|
"step": 5193 |
|
}, |
|
{ |
|
"epoch": 277.33, |
|
"learning_rate": 2.345679012345679e-05, |
|
"loss": 0.0406, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 277.87, |
|
"learning_rate": 2.3395061728395062e-05, |
|
"loss": 0.0503, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 277.97, |
|
"eval_accuracy": 0.22833333333333333, |
|
"eval_loss": 4.9059576988220215, |
|
"eval_runtime": 4.5301, |
|
"eval_samples_per_second": 132.446, |
|
"eval_steps_per_second": 4.194, |
|
"step": 5212 |
|
}, |
|
{ |
|
"epoch": 278.4, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.043, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 278.93, |
|
"learning_rate": 2.3271604938271606e-05, |
|
"loss": 0.0414, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 278.99, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.86867094039917, |
|
"eval_runtime": 4.4935, |
|
"eval_samples_per_second": 133.527, |
|
"eval_steps_per_second": 4.228, |
|
"step": 5231 |
|
}, |
|
{ |
|
"epoch": 279.47, |
|
"learning_rate": 2.3209876543209877e-05, |
|
"loss": 0.0561, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 280.0, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.0361, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 280.0, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.853731155395508, |
|
"eval_runtime": 4.5103, |
|
"eval_samples_per_second": 133.029, |
|
"eval_steps_per_second": 4.213, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 280.53, |
|
"learning_rate": 2.308641975308642e-05, |
|
"loss": 0.0449, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 280.96, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.8204240798950195, |
|
"eval_runtime": 4.5205, |
|
"eval_samples_per_second": 132.729, |
|
"eval_steps_per_second": 4.203, |
|
"step": 5268 |
|
}, |
|
{ |
|
"epoch": 281.07, |
|
"learning_rate": 2.302469135802469e-05, |
|
"loss": 0.0527, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 281.6, |
|
"learning_rate": 2.2962962962962965e-05, |
|
"loss": 0.0596, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 281.97, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.803044319152832, |
|
"eval_runtime": 4.5651, |
|
"eval_samples_per_second": 131.433, |
|
"eval_steps_per_second": 4.162, |
|
"step": 5287 |
|
}, |
|
{ |
|
"epoch": 282.13, |
|
"learning_rate": 2.2901234567901235e-05, |
|
"loss": 0.056, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 282.67, |
|
"learning_rate": 2.2839506172839506e-05, |
|
"loss": 0.0494, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 282.99, |
|
"eval_accuracy": 0.24833333333333332, |
|
"eval_loss": 4.8059892654418945, |
|
"eval_runtime": 4.5556, |
|
"eval_samples_per_second": 131.706, |
|
"eval_steps_per_second": 4.171, |
|
"step": 5306 |
|
}, |
|
{ |
|
"epoch": 283.2, |
|
"learning_rate": 2.277777777777778e-05, |
|
"loss": 0.0311, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 283.73, |
|
"learning_rate": 2.271604938271605e-05, |
|
"loss": 0.0483, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 284.0, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.7877960205078125, |
|
"eval_runtime": 4.5534, |
|
"eval_samples_per_second": 131.77, |
|
"eval_steps_per_second": 4.173, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 284.27, |
|
"learning_rate": 2.2654320987654324e-05, |
|
"loss": 0.0606, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 284.8, |
|
"learning_rate": 2.2592592592592594e-05, |
|
"loss": 0.0338, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 284.96, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.825440406799316, |
|
"eval_runtime": 4.5446, |
|
"eval_samples_per_second": 132.024, |
|
"eval_steps_per_second": 4.181, |
|
"step": 5343 |
|
}, |
|
{ |
|
"epoch": 285.33, |
|
"learning_rate": 2.2530864197530865e-05, |
|
"loss": 0.0454, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 285.87, |
|
"learning_rate": 2.246913580246914e-05, |
|
"loss": 0.0319, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 285.97, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.826366424560547, |
|
"eval_runtime": 4.5558, |
|
"eval_samples_per_second": 131.701, |
|
"eval_steps_per_second": 4.171, |
|
"step": 5362 |
|
}, |
|
{ |
|
"epoch": 286.4, |
|
"learning_rate": 2.240740740740741e-05, |
|
"loss": 0.0414, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 286.93, |
|
"learning_rate": 2.234567901234568e-05, |
|
"loss": 0.0454, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 286.99, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.842591762542725, |
|
"eval_runtime": 4.55, |
|
"eval_samples_per_second": 131.868, |
|
"eval_steps_per_second": 4.176, |
|
"step": 5381 |
|
}, |
|
{ |
|
"epoch": 287.47, |
|
"learning_rate": 2.2283950617283953e-05, |
|
"loss": 0.0426, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 288.0, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.0409, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 288.0, |
|
"eval_accuracy": 0.24833333333333332, |
|
"eval_loss": 4.819784164428711, |
|
"eval_runtime": 4.5681, |
|
"eval_samples_per_second": 131.345, |
|
"eval_steps_per_second": 4.159, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 288.53, |
|
"learning_rate": 2.2160493827160494e-05, |
|
"loss": 0.0435, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 288.96, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.8339433670043945, |
|
"eval_runtime": 4.5995, |
|
"eval_samples_per_second": 130.45, |
|
"eval_steps_per_second": 4.131, |
|
"step": 5418 |
|
}, |
|
{ |
|
"epoch": 289.07, |
|
"learning_rate": 2.2098765432098767e-05, |
|
"loss": 0.0452, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 289.6, |
|
"learning_rate": 2.2037037037037038e-05, |
|
"loss": 0.0498, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 289.97, |
|
"eval_accuracy": 0.225, |
|
"eval_loss": 4.838677883148193, |
|
"eval_runtime": 4.6181, |
|
"eval_samples_per_second": 129.923, |
|
"eval_steps_per_second": 4.114, |
|
"step": 5437 |
|
}, |
|
{ |
|
"epoch": 290.13, |
|
"learning_rate": 2.1975308641975308e-05, |
|
"loss": 0.0467, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 290.67, |
|
"learning_rate": 2.1913580246913582e-05, |
|
"loss": 0.0447, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 290.99, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.834191799163818, |
|
"eval_runtime": 4.5905, |
|
"eval_samples_per_second": 130.706, |
|
"eval_steps_per_second": 4.139, |
|
"step": 5456 |
|
}, |
|
{ |
|
"epoch": 291.2, |
|
"learning_rate": 2.1851851851851852e-05, |
|
"loss": 0.0441, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 291.73, |
|
"learning_rate": 2.1790123456790123e-05, |
|
"loss": 0.0402, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 292.0, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.849569320678711, |
|
"eval_runtime": 4.5792, |
|
"eval_samples_per_second": 131.028, |
|
"eval_steps_per_second": 4.149, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 292.27, |
|
"learning_rate": 2.1728395061728397e-05, |
|
"loss": 0.0314, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 292.8, |
|
"learning_rate": 2.1666666666666667e-05, |
|
"loss": 0.0366, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 292.96, |
|
"eval_accuracy": 0.23166666666666666, |
|
"eval_loss": 4.867130279541016, |
|
"eval_runtime": 4.5608, |
|
"eval_samples_per_second": 131.555, |
|
"eval_steps_per_second": 4.166, |
|
"step": 5493 |
|
}, |
|
{ |
|
"epoch": 293.33, |
|
"learning_rate": 2.1604938271604937e-05, |
|
"loss": 0.0388, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 293.87, |
|
"learning_rate": 2.154320987654321e-05, |
|
"loss": 0.0369, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 293.97, |
|
"eval_accuracy": 0.24666666666666667, |
|
"eval_loss": 4.836596488952637, |
|
"eval_runtime": 4.583, |
|
"eval_samples_per_second": 130.917, |
|
"eval_steps_per_second": 4.146, |
|
"step": 5512 |
|
}, |
|
{ |
|
"epoch": 294.4, |
|
"learning_rate": 2.148148148148148e-05, |
|
"loss": 0.0651, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 294.93, |
|
"learning_rate": 2.1419753086419755e-05, |
|
"loss": 0.0361, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 294.99, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.799242973327637, |
|
"eval_runtime": 4.6324, |
|
"eval_samples_per_second": 129.522, |
|
"eval_steps_per_second": 4.102, |
|
"step": 5531 |
|
}, |
|
{ |
|
"epoch": 295.47, |
|
"learning_rate": 2.1358024691358026e-05, |
|
"loss": 0.0549, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 296.0, |
|
"learning_rate": 2.1296296296296296e-05, |
|
"loss": 0.0448, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 296.0, |
|
"eval_accuracy": 0.22666666666666666, |
|
"eval_loss": 4.848645210266113, |
|
"eval_runtime": 4.6425, |
|
"eval_samples_per_second": 129.241, |
|
"eval_steps_per_second": 4.093, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 296.53, |
|
"learning_rate": 2.123456790123457e-05, |
|
"loss": 0.055, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 296.96, |
|
"eval_accuracy": 0.22666666666666666, |
|
"eval_loss": 4.897942066192627, |
|
"eval_runtime": 4.5933, |
|
"eval_samples_per_second": 130.624, |
|
"eval_steps_per_second": 4.136, |
|
"step": 5568 |
|
}, |
|
{ |
|
"epoch": 297.07, |
|
"learning_rate": 2.117283950617284e-05, |
|
"loss": 0.0427, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 297.6, |
|
"learning_rate": 2.111111111111111e-05, |
|
"loss": 0.0585, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 297.97, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.866022109985352, |
|
"eval_runtime": 4.6056, |
|
"eval_samples_per_second": 130.276, |
|
"eval_steps_per_second": 4.125, |
|
"step": 5587 |
|
}, |
|
{ |
|
"epoch": 298.13, |
|
"learning_rate": 2.1049382716049385e-05, |
|
"loss": 0.0478, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 298.67, |
|
"learning_rate": 2.0987654320987655e-05, |
|
"loss": 0.0477, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 298.99, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.8717451095581055, |
|
"eval_runtime": 4.6241, |
|
"eval_samples_per_second": 129.756, |
|
"eval_steps_per_second": 4.109, |
|
"step": 5606 |
|
}, |
|
{ |
|
"epoch": 299.2, |
|
"learning_rate": 2.0925925925925925e-05, |
|
"loss": 0.0515, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 299.73, |
|
"learning_rate": 2.08641975308642e-05, |
|
"loss": 0.0247, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"eval_accuracy": 0.22833333333333333, |
|
"eval_loss": 4.883845806121826, |
|
"eval_runtime": 4.6098, |
|
"eval_samples_per_second": 130.158, |
|
"eval_steps_per_second": 4.122, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 300.27, |
|
"learning_rate": 2.0802469135802473e-05, |
|
"loss": 0.0438, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 300.8, |
|
"learning_rate": 2.074074074074074e-05, |
|
"loss": 0.047, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 300.96, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.824845314025879, |
|
"eval_runtime": 4.5984, |
|
"eval_samples_per_second": 130.482, |
|
"eval_steps_per_second": 4.132, |
|
"step": 5643 |
|
}, |
|
{ |
|
"epoch": 301.33, |
|
"learning_rate": 2.0679012345679014e-05, |
|
"loss": 0.0497, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 301.87, |
|
"learning_rate": 2.0617283950617287e-05, |
|
"loss": 0.0608, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 301.97, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.832959175109863, |
|
"eval_runtime": 4.6004, |
|
"eval_samples_per_second": 130.422, |
|
"eval_steps_per_second": 4.13, |
|
"step": 5662 |
|
}, |
|
{ |
|
"epoch": 302.4, |
|
"learning_rate": 2.0555555555555555e-05, |
|
"loss": 0.0413, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 302.93, |
|
"learning_rate": 2.0493827160493828e-05, |
|
"loss": 0.0417, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 302.99, |
|
"eval_accuracy": 0.23166666666666666, |
|
"eval_loss": 4.823630332946777, |
|
"eval_runtime": 4.5991, |
|
"eval_samples_per_second": 130.462, |
|
"eval_steps_per_second": 4.131, |
|
"step": 5681 |
|
}, |
|
{ |
|
"epoch": 303.47, |
|
"learning_rate": 2.0432098765432102e-05, |
|
"loss": 0.0329, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 304.0, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 0.0494, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 304.0, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.807046413421631, |
|
"eval_runtime": 4.6151, |
|
"eval_samples_per_second": 130.008, |
|
"eval_steps_per_second": 4.117, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 304.53, |
|
"learning_rate": 2.0308641975308643e-05, |
|
"loss": 0.0316, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 304.96, |
|
"eval_accuracy": 0.22666666666666666, |
|
"eval_loss": 4.821282386779785, |
|
"eval_runtime": 4.6056, |
|
"eval_samples_per_second": 130.275, |
|
"eval_steps_per_second": 4.125, |
|
"step": 5718 |
|
}, |
|
{ |
|
"epoch": 305.07, |
|
"learning_rate": 2.0246913580246917e-05, |
|
"loss": 0.0473, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 305.6, |
|
"learning_rate": 2.0185185185185187e-05, |
|
"loss": 0.0421, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 305.97, |
|
"eval_accuracy": 0.23166666666666666, |
|
"eval_loss": 4.863409996032715, |
|
"eval_runtime": 4.6418, |
|
"eval_samples_per_second": 129.259, |
|
"eval_steps_per_second": 4.093, |
|
"step": 5737 |
|
}, |
|
{ |
|
"epoch": 306.13, |
|
"learning_rate": 2.0123456790123457e-05, |
|
"loss": 0.0382, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 306.67, |
|
"learning_rate": 2.006172839506173e-05, |
|
"loss": 0.0411, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 306.99, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.877004623413086, |
|
"eval_runtime": 4.6583, |
|
"eval_samples_per_second": 128.802, |
|
"eval_steps_per_second": 4.079, |
|
"step": 5756 |
|
}, |
|
{ |
|
"epoch": 307.2, |
|
"learning_rate": 2e-05, |
|
"loss": 0.042, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 307.73, |
|
"learning_rate": 1.9938271604938272e-05, |
|
"loss": 0.0404, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 308.0, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.9029860496521, |
|
"eval_runtime": 4.6363, |
|
"eval_samples_per_second": 129.412, |
|
"eval_steps_per_second": 4.098, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 308.27, |
|
"learning_rate": 1.9876543209876546e-05, |
|
"loss": 0.0401, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 308.8, |
|
"learning_rate": 1.9814814814814816e-05, |
|
"loss": 0.0397, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 308.96, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.943274974822998, |
|
"eval_runtime": 4.6219, |
|
"eval_samples_per_second": 129.817, |
|
"eval_steps_per_second": 4.111, |
|
"step": 5793 |
|
}, |
|
{ |
|
"epoch": 309.33, |
|
"learning_rate": 1.9753086419753087e-05, |
|
"loss": 0.0467, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 309.87, |
|
"learning_rate": 1.969135802469136e-05, |
|
"loss": 0.053, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 309.97, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.930065631866455, |
|
"eval_runtime": 4.6916, |
|
"eval_samples_per_second": 127.887, |
|
"eval_steps_per_second": 4.05, |
|
"step": 5812 |
|
}, |
|
{ |
|
"epoch": 310.4, |
|
"learning_rate": 1.962962962962963e-05, |
|
"loss": 0.0459, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 310.93, |
|
"learning_rate": 1.95679012345679e-05, |
|
"loss": 0.0303, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 310.99, |
|
"eval_accuracy": 0.22833333333333333, |
|
"eval_loss": 4.8961286544799805, |
|
"eval_runtime": 4.6651, |
|
"eval_samples_per_second": 128.614, |
|
"eval_steps_per_second": 4.073, |
|
"step": 5831 |
|
}, |
|
{ |
|
"epoch": 311.47, |
|
"learning_rate": 1.950617283950617e-05, |
|
"loss": 0.0314, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 312.0, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.0369, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 312.0, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.856044292449951, |
|
"eval_runtime": 4.6488, |
|
"eval_samples_per_second": 129.067, |
|
"eval_steps_per_second": 4.087, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 312.53, |
|
"learning_rate": 1.938271604938272e-05, |
|
"loss": 0.0423, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 312.96, |
|
"eval_accuracy": 0.225, |
|
"eval_loss": 4.917734146118164, |
|
"eval_runtime": 4.6571, |
|
"eval_samples_per_second": 128.835, |
|
"eval_steps_per_second": 4.08, |
|
"step": 5868 |
|
}, |
|
{ |
|
"epoch": 313.07, |
|
"learning_rate": 1.9320987654320986e-05, |
|
"loss": 0.03, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 313.6, |
|
"learning_rate": 1.925925925925926e-05, |
|
"loss": 0.0343, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 313.97, |
|
"eval_accuracy": 0.22333333333333333, |
|
"eval_loss": 4.892765522003174, |
|
"eval_runtime": 4.6848, |
|
"eval_samples_per_second": 128.074, |
|
"eval_steps_per_second": 4.056, |
|
"step": 5887 |
|
}, |
|
{ |
|
"epoch": 314.13, |
|
"learning_rate": 1.9197530864197534e-05, |
|
"loss": 0.0299, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 314.67, |
|
"learning_rate": 1.91358024691358e-05, |
|
"loss": 0.0216, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 314.99, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.895847320556641, |
|
"eval_runtime": 4.6722, |
|
"eval_samples_per_second": 128.42, |
|
"eval_steps_per_second": 4.067, |
|
"step": 5906 |
|
}, |
|
{ |
|
"epoch": 315.2, |
|
"learning_rate": 1.9074074074074075e-05, |
|
"loss": 0.0604, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 315.73, |
|
"learning_rate": 1.901234567901235e-05, |
|
"loss": 0.0287, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 316.0, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.880258083343506, |
|
"eval_runtime": 4.6757, |
|
"eval_samples_per_second": 128.323, |
|
"eval_steps_per_second": 4.064, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 316.27, |
|
"learning_rate": 1.8950617283950615e-05, |
|
"loss": 0.0269, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 316.8, |
|
"learning_rate": 1.888888888888889e-05, |
|
"loss": 0.0286, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 316.96, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.86151123046875, |
|
"eval_runtime": 4.6507, |
|
"eval_samples_per_second": 129.012, |
|
"eval_steps_per_second": 4.085, |
|
"step": 5943 |
|
}, |
|
{ |
|
"epoch": 317.33, |
|
"learning_rate": 1.8827160493827163e-05, |
|
"loss": 0.0478, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 317.87, |
|
"learning_rate": 1.8765432098765433e-05, |
|
"loss": 0.0304, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 317.97, |
|
"eval_accuracy": 0.23166666666666666, |
|
"eval_loss": 4.873566150665283, |
|
"eval_runtime": 4.6681, |
|
"eval_samples_per_second": 128.532, |
|
"eval_steps_per_second": 4.07, |
|
"step": 5962 |
|
}, |
|
{ |
|
"epoch": 318.4, |
|
"learning_rate": 1.8703703703703704e-05, |
|
"loss": 0.0346, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 318.93, |
|
"learning_rate": 1.8641975308641977e-05, |
|
"loss": 0.0486, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 318.99, |
|
"eval_accuracy": 0.22333333333333333, |
|
"eval_loss": 4.882538318634033, |
|
"eval_runtime": 4.6432, |
|
"eval_samples_per_second": 129.222, |
|
"eval_steps_per_second": 4.092, |
|
"step": 5981 |
|
}, |
|
{ |
|
"epoch": 319.47, |
|
"learning_rate": 1.8580246913580248e-05, |
|
"loss": 0.0391, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 320.0, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.0404, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 320.0, |
|
"eval_accuracy": 0.22833333333333333, |
|
"eval_loss": 4.861847400665283, |
|
"eval_runtime": 4.6741, |
|
"eval_samples_per_second": 128.368, |
|
"eval_steps_per_second": 4.065, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 320.53, |
|
"learning_rate": 1.8456790123456792e-05, |
|
"loss": 0.0439, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 320.96, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.884802341461182, |
|
"eval_runtime": 4.66, |
|
"eval_samples_per_second": 128.756, |
|
"eval_steps_per_second": 4.077, |
|
"step": 6018 |
|
}, |
|
{ |
|
"epoch": 321.07, |
|
"learning_rate": 1.8395061728395062e-05, |
|
"loss": 0.0387, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 321.6, |
|
"learning_rate": 1.8333333333333333e-05, |
|
"loss": 0.0428, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 321.97, |
|
"eval_accuracy": 0.22666666666666666, |
|
"eval_loss": 4.897517681121826, |
|
"eval_runtime": 4.6751, |
|
"eval_samples_per_second": 128.34, |
|
"eval_steps_per_second": 4.064, |
|
"step": 6037 |
|
}, |
|
{ |
|
"epoch": 322.13, |
|
"learning_rate": 1.8271604938271607e-05, |
|
"loss": 0.0261, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 322.67, |
|
"learning_rate": 1.8209876543209877e-05, |
|
"loss": 0.0498, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 322.99, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.8614420890808105, |
|
"eval_runtime": 4.6819, |
|
"eval_samples_per_second": 128.153, |
|
"eval_steps_per_second": 4.058, |
|
"step": 6056 |
|
}, |
|
{ |
|
"epoch": 323.2, |
|
"learning_rate": 1.814814814814815e-05, |
|
"loss": 0.0403, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 323.73, |
|
"learning_rate": 1.808641975308642e-05, |
|
"loss": 0.0314, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 324.0, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.871830463409424, |
|
"eval_runtime": 4.6693, |
|
"eval_samples_per_second": 128.5, |
|
"eval_steps_per_second": 4.069, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 324.27, |
|
"learning_rate": 1.802469135802469e-05, |
|
"loss": 0.0365, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 324.8, |
|
"learning_rate": 1.7962962962962965e-05, |
|
"loss": 0.0334, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 324.96, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.902100563049316, |
|
"eval_runtime": 4.7082, |
|
"eval_samples_per_second": 127.436, |
|
"eval_steps_per_second": 4.035, |
|
"step": 6093 |
|
}, |
|
{ |
|
"epoch": 325.33, |
|
"learning_rate": 1.7901234567901236e-05, |
|
"loss": 0.0446, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 325.87, |
|
"learning_rate": 1.7839506172839506e-05, |
|
"loss": 0.0431, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 325.97, |
|
"eval_accuracy": 0.22833333333333333, |
|
"eval_loss": 4.897326946258545, |
|
"eval_runtime": 4.7241, |
|
"eval_samples_per_second": 127.008, |
|
"eval_steps_per_second": 4.022, |
|
"step": 6112 |
|
}, |
|
{ |
|
"epoch": 326.4, |
|
"learning_rate": 1.777777777777778e-05, |
|
"loss": 0.0316, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 326.93, |
|
"learning_rate": 1.771604938271605e-05, |
|
"loss": 0.0473, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 326.99, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.867129802703857, |
|
"eval_runtime": 4.6999, |
|
"eval_samples_per_second": 127.662, |
|
"eval_steps_per_second": 4.043, |
|
"step": 6131 |
|
}, |
|
{ |
|
"epoch": 327.47, |
|
"learning_rate": 1.765432098765432e-05, |
|
"loss": 0.0365, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 328.0, |
|
"learning_rate": 1.7592592592592595e-05, |
|
"loss": 0.0348, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 328.0, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.905031204223633, |
|
"eval_runtime": 4.6993, |
|
"eval_samples_per_second": 127.679, |
|
"eval_steps_per_second": 4.043, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 328.53, |
|
"learning_rate": 1.7530864197530865e-05, |
|
"loss": 0.0718, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 328.96, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.886887073516846, |
|
"eval_runtime": 4.7046, |
|
"eval_samples_per_second": 127.535, |
|
"eval_steps_per_second": 4.039, |
|
"step": 6168 |
|
}, |
|
{ |
|
"epoch": 329.07, |
|
"learning_rate": 1.7469135802469135e-05, |
|
"loss": 0.0418, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 329.6, |
|
"learning_rate": 1.740740740740741e-05, |
|
"loss": 0.0387, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 329.97, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.855226039886475, |
|
"eval_runtime": 4.7051, |
|
"eval_samples_per_second": 127.52, |
|
"eval_steps_per_second": 4.038, |
|
"step": 6187 |
|
}, |
|
{ |
|
"epoch": 330.13, |
|
"learning_rate": 1.734567901234568e-05, |
|
"loss": 0.032, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 330.67, |
|
"learning_rate": 1.728395061728395e-05, |
|
"loss": 0.0335, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 330.99, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.893190860748291, |
|
"eval_runtime": 4.7105, |
|
"eval_samples_per_second": 127.376, |
|
"eval_steps_per_second": 4.034, |
|
"step": 6206 |
|
}, |
|
{ |
|
"epoch": 331.2, |
|
"learning_rate": 1.7222222222222224e-05, |
|
"loss": 0.0271, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 331.73, |
|
"learning_rate": 1.7160493827160494e-05, |
|
"loss": 0.0355, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 332.0, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.919488906860352, |
|
"eval_runtime": 4.7258, |
|
"eval_samples_per_second": 126.963, |
|
"eval_steps_per_second": 4.02, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 332.27, |
|
"learning_rate": 1.7098765432098765e-05, |
|
"loss": 0.038, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 332.8, |
|
"learning_rate": 1.7037037037037038e-05, |
|
"loss": 0.0407, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 332.96, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.9162702560424805, |
|
"eval_runtime": 4.7131, |
|
"eval_samples_per_second": 127.305, |
|
"eval_steps_per_second": 4.031, |
|
"step": 6243 |
|
}, |
|
{ |
|
"epoch": 333.33, |
|
"learning_rate": 1.697530864197531e-05, |
|
"loss": 0.0434, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 333.87, |
|
"learning_rate": 1.6913580246913582e-05, |
|
"loss": 0.0471, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 333.97, |
|
"eval_accuracy": 0.225, |
|
"eval_loss": 4.885989189147949, |
|
"eval_runtime": 4.7137, |
|
"eval_samples_per_second": 127.289, |
|
"eval_steps_per_second": 4.031, |
|
"step": 6262 |
|
}, |
|
{ |
|
"epoch": 334.4, |
|
"learning_rate": 1.6851851851851853e-05, |
|
"loss": 0.0336, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 334.93, |
|
"learning_rate": 1.6790123456790123e-05, |
|
"loss": 0.0334, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 334.99, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.894328594207764, |
|
"eval_runtime": 4.6988, |
|
"eval_samples_per_second": 127.693, |
|
"eval_steps_per_second": 4.044, |
|
"step": 6281 |
|
}, |
|
{ |
|
"epoch": 335.47, |
|
"learning_rate": 1.6728395061728397e-05, |
|
"loss": 0.0441, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 336.0, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.0301, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 336.0, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.922252178192139, |
|
"eval_runtime": 4.7453, |
|
"eval_samples_per_second": 126.441, |
|
"eval_steps_per_second": 4.004, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 336.53, |
|
"learning_rate": 1.6604938271604938e-05, |
|
"loss": 0.0281, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 336.96, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.910050392150879, |
|
"eval_runtime": 4.7284, |
|
"eval_samples_per_second": 126.892, |
|
"eval_steps_per_second": 4.018, |
|
"step": 6318 |
|
}, |
|
{ |
|
"epoch": 337.07, |
|
"learning_rate": 1.654320987654321e-05, |
|
"loss": 0.0365, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 337.6, |
|
"learning_rate": 1.6481481481481482e-05, |
|
"loss": 0.0305, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 337.97, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.889711856842041, |
|
"eval_runtime": 4.7203, |
|
"eval_samples_per_second": 127.111, |
|
"eval_steps_per_second": 4.025, |
|
"step": 6337 |
|
}, |
|
{ |
|
"epoch": 338.13, |
|
"learning_rate": 1.6419753086419752e-05, |
|
"loss": 0.0542, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 338.67, |
|
"learning_rate": 1.6358024691358026e-05, |
|
"loss": 0.0505, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 338.99, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.929032802581787, |
|
"eval_runtime": 4.7098, |
|
"eval_samples_per_second": 127.395, |
|
"eval_steps_per_second": 4.034, |
|
"step": 6356 |
|
}, |
|
{ |
|
"epoch": 339.2, |
|
"learning_rate": 1.62962962962963e-05, |
|
"loss": 0.0414, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 339.73, |
|
"learning_rate": 1.6234567901234567e-05, |
|
"loss": 0.024, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 340.0, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.944223880767822, |
|
"eval_runtime": 4.7276, |
|
"eval_samples_per_second": 126.914, |
|
"eval_steps_per_second": 4.019, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 340.27, |
|
"learning_rate": 1.617283950617284e-05, |
|
"loss": 0.0267, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 340.8, |
|
"learning_rate": 1.6111111111111115e-05, |
|
"loss": 0.0504, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 340.96, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.918275833129883, |
|
"eval_runtime": 4.709, |
|
"eval_samples_per_second": 127.416, |
|
"eval_steps_per_second": 4.035, |
|
"step": 6393 |
|
}, |
|
{ |
|
"epoch": 341.33, |
|
"learning_rate": 1.604938271604938e-05, |
|
"loss": 0.0413, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 341.87, |
|
"learning_rate": 1.5987654320987655e-05, |
|
"loss": 0.0259, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 341.97, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.883179187774658, |
|
"eval_runtime": 4.734, |
|
"eval_samples_per_second": 126.742, |
|
"eval_steps_per_second": 4.014, |
|
"step": 6412 |
|
}, |
|
{ |
|
"epoch": 342.4, |
|
"learning_rate": 1.5925925925925926e-05, |
|
"loss": 0.0338, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 342.93, |
|
"learning_rate": 1.5864197530864196e-05, |
|
"loss": 0.0313, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 342.99, |
|
"eval_accuracy": 0.23166666666666666, |
|
"eval_loss": 4.895809650421143, |
|
"eval_runtime": 4.7766, |
|
"eval_samples_per_second": 125.612, |
|
"eval_steps_per_second": 3.978, |
|
"step": 6431 |
|
}, |
|
{ |
|
"epoch": 343.47, |
|
"learning_rate": 1.580246913580247e-05, |
|
"loss": 0.0213, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 344.0, |
|
"learning_rate": 1.574074074074074e-05, |
|
"loss": 0.0293, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 344.0, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.897879123687744, |
|
"eval_runtime": 4.8088, |
|
"eval_samples_per_second": 124.771, |
|
"eval_steps_per_second": 3.951, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 344.53, |
|
"learning_rate": 1.5679012345679014e-05, |
|
"loss": 0.0427, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 344.96, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.9055495262146, |
|
"eval_runtime": 4.8414, |
|
"eval_samples_per_second": 123.93, |
|
"eval_steps_per_second": 3.924, |
|
"step": 6468 |
|
}, |
|
{ |
|
"epoch": 345.07, |
|
"learning_rate": 1.5617283950617285e-05, |
|
"loss": 0.0344, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 345.6, |
|
"learning_rate": 1.5555555555555555e-05, |
|
"loss": 0.0399, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 345.97, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.895743370056152, |
|
"eval_runtime": 4.7399, |
|
"eval_samples_per_second": 126.584, |
|
"eval_steps_per_second": 4.009, |
|
"step": 6487 |
|
}, |
|
{ |
|
"epoch": 346.13, |
|
"learning_rate": 1.549382716049383e-05, |
|
"loss": 0.0253, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 346.67, |
|
"learning_rate": 1.54320987654321e-05, |
|
"loss": 0.0273, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 346.99, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.8988518714904785, |
|
"eval_runtime": 4.7852, |
|
"eval_samples_per_second": 125.387, |
|
"eval_steps_per_second": 3.971, |
|
"step": 6506 |
|
}, |
|
{ |
|
"epoch": 347.2, |
|
"learning_rate": 1.537037037037037e-05, |
|
"loss": 0.0261, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 347.73, |
|
"learning_rate": 1.5308641975308643e-05, |
|
"loss": 0.0388, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 348.0, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.9087018966674805, |
|
"eval_runtime": 4.7683, |
|
"eval_samples_per_second": 125.83, |
|
"eval_steps_per_second": 3.985, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 348.27, |
|
"learning_rate": 1.5246913580246914e-05, |
|
"loss": 0.0323, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 348.8, |
|
"learning_rate": 1.5185185185185186e-05, |
|
"loss": 0.0306, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 348.96, |
|
"eval_accuracy": 0.22833333333333333, |
|
"eval_loss": 4.926441669464111, |
|
"eval_runtime": 4.7772, |
|
"eval_samples_per_second": 125.598, |
|
"eval_steps_per_second": 3.977, |
|
"step": 6543 |
|
}, |
|
{ |
|
"epoch": 349.33, |
|
"learning_rate": 1.5123456790123458e-05, |
|
"loss": 0.0249, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 349.87, |
|
"learning_rate": 1.506172839506173e-05, |
|
"loss": 0.0411, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 349.97, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.921908855438232, |
|
"eval_runtime": 4.7531, |
|
"eval_samples_per_second": 126.234, |
|
"eval_steps_per_second": 3.997, |
|
"step": 6562 |
|
}, |
|
{ |
|
"epoch": 350.4, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.031, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 350.93, |
|
"learning_rate": 1.4938271604938272e-05, |
|
"loss": 0.0394, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 350.99, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.89980936050415, |
|
"eval_runtime": 4.7836, |
|
"eval_samples_per_second": 125.428, |
|
"eval_steps_per_second": 3.972, |
|
"step": 6581 |
|
}, |
|
{ |
|
"epoch": 351.47, |
|
"learning_rate": 1.4876543209876545e-05, |
|
"loss": 0.0465, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 352.0, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.0507, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 352.0, |
|
"eval_accuracy": 0.23166666666666666, |
|
"eval_loss": 4.930387496948242, |
|
"eval_runtime": 4.8303, |
|
"eval_samples_per_second": 124.216, |
|
"eval_steps_per_second": 3.934, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 352.53, |
|
"learning_rate": 1.4753086419753087e-05, |
|
"loss": 0.0263, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 352.96, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.923248767852783, |
|
"eval_runtime": 4.822, |
|
"eval_samples_per_second": 124.429, |
|
"eval_steps_per_second": 3.94, |
|
"step": 6618 |
|
}, |
|
{ |
|
"epoch": 353.07, |
|
"learning_rate": 1.4691358024691359e-05, |
|
"loss": 0.0355, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 353.6, |
|
"learning_rate": 1.462962962962963e-05, |
|
"loss": 0.0395, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 353.97, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.92411470413208, |
|
"eval_runtime": 4.7975, |
|
"eval_samples_per_second": 125.066, |
|
"eval_steps_per_second": 3.96, |
|
"step": 6637 |
|
}, |
|
{ |
|
"epoch": 354.13, |
|
"learning_rate": 1.4567901234567902e-05, |
|
"loss": 0.0258, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 354.67, |
|
"learning_rate": 1.4506172839506174e-05, |
|
"loss": 0.0394, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 354.99, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.926273345947266, |
|
"eval_runtime": 4.7995, |
|
"eval_samples_per_second": 125.013, |
|
"eval_steps_per_second": 3.959, |
|
"step": 6656 |
|
}, |
|
{ |
|
"epoch": 355.2, |
|
"learning_rate": 1.4444444444444444e-05, |
|
"loss": 0.0345, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 355.73, |
|
"learning_rate": 1.4382716049382716e-05, |
|
"loss": 0.0391, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 356.0, |
|
"eval_accuracy": 0.26, |
|
"eval_loss": 4.927285671234131, |
|
"eval_runtime": 4.819, |
|
"eval_samples_per_second": 124.507, |
|
"eval_steps_per_second": 3.943, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 356.27, |
|
"learning_rate": 1.4320987654320988e-05, |
|
"loss": 0.0274, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 356.8, |
|
"learning_rate": 1.425925925925926e-05, |
|
"loss": 0.0647, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 356.96, |
|
"eval_accuracy": 0.2633333333333333, |
|
"eval_loss": 4.903398513793945, |
|
"eval_runtime": 4.8546, |
|
"eval_samples_per_second": 123.593, |
|
"eval_steps_per_second": 3.914, |
|
"step": 6693 |
|
}, |
|
{ |
|
"epoch": 357.33, |
|
"learning_rate": 1.419753086419753e-05, |
|
"loss": 0.0427, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 357.87, |
|
"learning_rate": 1.4135802469135803e-05, |
|
"loss": 0.038, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 357.97, |
|
"eval_accuracy": 0.24666666666666667, |
|
"eval_loss": 4.891026496887207, |
|
"eval_runtime": 4.8918, |
|
"eval_samples_per_second": 122.655, |
|
"eval_steps_per_second": 3.884, |
|
"step": 6712 |
|
}, |
|
{ |
|
"epoch": 358.4, |
|
"learning_rate": 1.4074074074074075e-05, |
|
"loss": 0.0254, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 358.93, |
|
"learning_rate": 1.4012345679012345e-05, |
|
"loss": 0.0368, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 358.99, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.883033275604248, |
|
"eval_runtime": 4.8388, |
|
"eval_samples_per_second": 123.997, |
|
"eval_steps_per_second": 3.927, |
|
"step": 6731 |
|
}, |
|
{ |
|
"epoch": 359.47, |
|
"learning_rate": 1.3950617283950617e-05, |
|
"loss": 0.0278, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 360.0, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.0308, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 360.0, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.886683940887451, |
|
"eval_runtime": 4.8656, |
|
"eval_samples_per_second": 123.316, |
|
"eval_steps_per_second": 3.905, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 360.53, |
|
"learning_rate": 1.382716049382716e-05, |
|
"loss": 0.0346, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 360.96, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.86568021774292, |
|
"eval_runtime": 4.8606, |
|
"eval_samples_per_second": 123.442, |
|
"eval_steps_per_second": 3.909, |
|
"step": 6768 |
|
}, |
|
{ |
|
"epoch": 361.07, |
|
"learning_rate": 1.3765432098765432e-05, |
|
"loss": 0.0254, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 361.6, |
|
"learning_rate": 1.3703703703703704e-05, |
|
"loss": 0.0279, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 361.97, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.8677825927734375, |
|
"eval_runtime": 4.8214, |
|
"eval_samples_per_second": 124.446, |
|
"eval_steps_per_second": 3.941, |
|
"step": 6787 |
|
}, |
|
{ |
|
"epoch": 362.13, |
|
"learning_rate": 1.3641975308641978e-05, |
|
"loss": 0.0367, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 362.67, |
|
"learning_rate": 1.3580246913580247e-05, |
|
"loss": 0.0443, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 362.99, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.872296333312988, |
|
"eval_runtime": 4.8453, |
|
"eval_samples_per_second": 123.832, |
|
"eval_steps_per_second": 3.921, |
|
"step": 6806 |
|
}, |
|
{ |
|
"epoch": 363.2, |
|
"learning_rate": 1.3518518518518519e-05, |
|
"loss": 0.0224, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 363.73, |
|
"learning_rate": 1.3456790123456793e-05, |
|
"loss": 0.027, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 364.0, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.875555515289307, |
|
"eval_runtime": 4.8571, |
|
"eval_samples_per_second": 123.529, |
|
"eval_steps_per_second": 3.912, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 364.27, |
|
"learning_rate": 1.3395061728395061e-05, |
|
"loss": 0.0394, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 364.8, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.0447, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 364.96, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.874227046966553, |
|
"eval_runtime": 4.8623, |
|
"eval_samples_per_second": 123.397, |
|
"eval_steps_per_second": 3.908, |
|
"step": 6843 |
|
}, |
|
{ |
|
"epoch": 365.33, |
|
"learning_rate": 1.3271604938271605e-05, |
|
"loss": 0.0402, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 365.87, |
|
"learning_rate": 1.3209876543209876e-05, |
|
"loss": 0.028, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 365.97, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.904233455657959, |
|
"eval_runtime": 4.8676, |
|
"eval_samples_per_second": 123.264, |
|
"eval_steps_per_second": 3.903, |
|
"step": 6862 |
|
}, |
|
{ |
|
"epoch": 366.4, |
|
"learning_rate": 1.3148148148148148e-05, |
|
"loss": 0.0306, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 366.93, |
|
"learning_rate": 1.308641975308642e-05, |
|
"loss": 0.0483, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 366.99, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.908579349517822, |
|
"eval_runtime": 4.8554, |
|
"eval_samples_per_second": 123.573, |
|
"eval_steps_per_second": 3.913, |
|
"step": 6881 |
|
}, |
|
{ |
|
"epoch": 367.47, |
|
"learning_rate": 1.3024691358024694e-05, |
|
"loss": 0.0321, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 368.0, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.034, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 368.0, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.888582229614258, |
|
"eval_runtime": 4.9075, |
|
"eval_samples_per_second": 122.261, |
|
"eval_steps_per_second": 3.872, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 368.53, |
|
"learning_rate": 1.2901234567901235e-05, |
|
"loss": 0.0363, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 368.96, |
|
"eval_accuracy": 0.24666666666666667, |
|
"eval_loss": 4.877806186676025, |
|
"eval_runtime": 4.9146, |
|
"eval_samples_per_second": 122.085, |
|
"eval_steps_per_second": 3.866, |
|
"step": 6918 |
|
}, |
|
{ |
|
"epoch": 369.07, |
|
"learning_rate": 1.2839506172839508e-05, |
|
"loss": 0.0401, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 369.6, |
|
"learning_rate": 1.2777777777777777e-05, |
|
"loss": 0.0417, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 369.97, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.905084609985352, |
|
"eval_runtime": 4.8859, |
|
"eval_samples_per_second": 122.802, |
|
"eval_steps_per_second": 3.889, |
|
"step": 6937 |
|
}, |
|
{ |
|
"epoch": 370.13, |
|
"learning_rate": 1.2716049382716049e-05, |
|
"loss": 0.0338, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 370.67, |
|
"learning_rate": 1.2654320987654323e-05, |
|
"loss": 0.0326, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 370.99, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.911184310913086, |
|
"eval_runtime": 4.8779, |
|
"eval_samples_per_second": 123.003, |
|
"eval_steps_per_second": 3.895, |
|
"step": 6956 |
|
}, |
|
{ |
|
"epoch": 371.2, |
|
"learning_rate": 1.2592592592592592e-05, |
|
"loss": 0.0261, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 371.73, |
|
"learning_rate": 1.2530864197530864e-05, |
|
"loss": 0.028, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 372.0, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.911579608917236, |
|
"eval_runtime": 4.906, |
|
"eval_samples_per_second": 122.299, |
|
"eval_steps_per_second": 3.873, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 372.27, |
|
"learning_rate": 1.2469135802469137e-05, |
|
"loss": 0.0196, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 372.8, |
|
"learning_rate": 1.2407407407407408e-05, |
|
"loss": 0.0343, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 372.96, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.910400390625, |
|
"eval_runtime": 4.8963, |
|
"eval_samples_per_second": 122.541, |
|
"eval_steps_per_second": 3.88, |
|
"step": 6993 |
|
}, |
|
{ |
|
"epoch": 373.33, |
|
"learning_rate": 1.2345679012345678e-05, |
|
"loss": 0.0246, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 373.87, |
|
"learning_rate": 1.2283950617283952e-05, |
|
"loss": 0.0229, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 373.97, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.940114974975586, |
|
"eval_runtime": 4.8862, |
|
"eval_samples_per_second": 122.794, |
|
"eval_steps_per_second": 3.888, |
|
"step": 7012 |
|
}, |
|
{ |
|
"epoch": 374.4, |
|
"learning_rate": 1.2222222222222222e-05, |
|
"loss": 0.0235, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 374.93, |
|
"learning_rate": 1.2160493827160495e-05, |
|
"loss": 0.0337, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 374.99, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.934114933013916, |
|
"eval_runtime": 4.9143, |
|
"eval_samples_per_second": 122.092, |
|
"eval_steps_per_second": 3.866, |
|
"step": 7031 |
|
}, |
|
{ |
|
"epoch": 375.47, |
|
"learning_rate": 1.2098765432098767e-05, |
|
"loss": 0.0334, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 376.0, |
|
"learning_rate": 1.2037037037037037e-05, |
|
"loss": 0.0356, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 376.0, |
|
"eval_accuracy": 0.23166666666666666, |
|
"eval_loss": 4.933629035949707, |
|
"eval_runtime": 4.9706, |
|
"eval_samples_per_second": 120.709, |
|
"eval_steps_per_second": 3.822, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 376.53, |
|
"learning_rate": 1.1975308641975309e-05, |
|
"loss": 0.029, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 376.96, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.913231372833252, |
|
"eval_runtime": 4.888, |
|
"eval_samples_per_second": 122.75, |
|
"eval_steps_per_second": 3.887, |
|
"step": 7068 |
|
}, |
|
{ |
|
"epoch": 377.07, |
|
"learning_rate": 1.1913580246913581e-05, |
|
"loss": 0.0348, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 377.6, |
|
"learning_rate": 1.1851851851851853e-05, |
|
"loss": 0.0272, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 377.97, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.910186290740967, |
|
"eval_runtime": 4.9298, |
|
"eval_samples_per_second": 121.709, |
|
"eval_steps_per_second": 3.854, |
|
"step": 7087 |
|
}, |
|
{ |
|
"epoch": 378.13, |
|
"learning_rate": 1.1790123456790124e-05, |
|
"loss": 0.0269, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 378.67, |
|
"learning_rate": 1.1728395061728396e-05, |
|
"loss": 0.0256, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 378.99, |
|
"eval_accuracy": 0.23166666666666666, |
|
"eval_loss": 4.925504207611084, |
|
"eval_runtime": 4.948, |
|
"eval_samples_per_second": 121.26, |
|
"eval_steps_per_second": 3.84, |
|
"step": 7106 |
|
}, |
|
{ |
|
"epoch": 379.2, |
|
"learning_rate": 1.1666666666666668e-05, |
|
"loss": 0.0315, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 379.73, |
|
"learning_rate": 1.1604938271604938e-05, |
|
"loss": 0.0276, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 380.0, |
|
"eval_accuracy": 0.22666666666666666, |
|
"eval_loss": 4.928166389465332, |
|
"eval_runtime": 4.9278, |
|
"eval_samples_per_second": 121.757, |
|
"eval_steps_per_second": 3.856, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 380.27, |
|
"learning_rate": 1.154320987654321e-05, |
|
"loss": 0.0286, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 380.8, |
|
"learning_rate": 1.1481481481481482e-05, |
|
"loss": 0.026, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 380.96, |
|
"eval_accuracy": 0.22, |
|
"eval_loss": 4.952660083770752, |
|
"eval_runtime": 4.9502, |
|
"eval_samples_per_second": 121.208, |
|
"eval_steps_per_second": 3.838, |
|
"step": 7143 |
|
}, |
|
{ |
|
"epoch": 381.33, |
|
"learning_rate": 1.1419753086419753e-05, |
|
"loss": 0.031, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 381.87, |
|
"learning_rate": 1.1358024691358025e-05, |
|
"loss": 0.0385, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 381.97, |
|
"eval_accuracy": 0.22166666666666668, |
|
"eval_loss": 4.941068649291992, |
|
"eval_runtime": 4.9601, |
|
"eval_samples_per_second": 120.966, |
|
"eval_steps_per_second": 3.831, |
|
"step": 7162 |
|
}, |
|
{ |
|
"epoch": 382.4, |
|
"learning_rate": 1.1296296296296297e-05, |
|
"loss": 0.0269, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 382.93, |
|
"learning_rate": 1.123456790123457e-05, |
|
"loss": 0.026, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 382.99, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.952951908111572, |
|
"eval_runtime": 4.9497, |
|
"eval_samples_per_second": 121.22, |
|
"eval_steps_per_second": 3.839, |
|
"step": 7181 |
|
}, |
|
{ |
|
"epoch": 383.47, |
|
"learning_rate": 1.117283950617284e-05, |
|
"loss": 0.0262, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 384.0, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.0444, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 384.0, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.9387335777282715, |
|
"eval_runtime": 4.9515, |
|
"eval_samples_per_second": 121.176, |
|
"eval_steps_per_second": 3.837, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 384.53, |
|
"learning_rate": 1.1049382716049384e-05, |
|
"loss": 0.0369, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 384.96, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.904226303100586, |
|
"eval_runtime": 4.9498, |
|
"eval_samples_per_second": 121.217, |
|
"eval_steps_per_second": 3.839, |
|
"step": 7218 |
|
}, |
|
{ |
|
"epoch": 385.07, |
|
"learning_rate": 1.0987654320987654e-05, |
|
"loss": 0.0258, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 385.6, |
|
"learning_rate": 1.0925925925925926e-05, |
|
"loss": 0.0203, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 385.97, |
|
"eval_accuracy": 0.23, |
|
"eval_loss": 4.886034965515137, |
|
"eval_runtime": 4.9753, |
|
"eval_samples_per_second": 120.596, |
|
"eval_steps_per_second": 3.819, |
|
"step": 7237 |
|
}, |
|
{ |
|
"epoch": 386.13, |
|
"learning_rate": 1.0864197530864198e-05, |
|
"loss": 0.0332, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 386.67, |
|
"learning_rate": 1.0802469135802469e-05, |
|
"loss": 0.0238, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 386.99, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.877529144287109, |
|
"eval_runtime": 4.9631, |
|
"eval_samples_per_second": 120.891, |
|
"eval_steps_per_second": 3.828, |
|
"step": 7256 |
|
}, |
|
{ |
|
"epoch": 387.2, |
|
"learning_rate": 1.074074074074074e-05, |
|
"loss": 0.0266, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 387.73, |
|
"learning_rate": 1.0679012345679013e-05, |
|
"loss": 0.0315, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 388.0, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.864088535308838, |
|
"eval_runtime": 4.9534, |
|
"eval_samples_per_second": 121.128, |
|
"eval_steps_per_second": 3.836, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 388.27, |
|
"learning_rate": 1.0617283950617285e-05, |
|
"loss": 0.0208, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 388.8, |
|
"learning_rate": 1.0555555555555555e-05, |
|
"loss": 0.0349, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 388.96, |
|
"eval_accuracy": 0.24666666666666667, |
|
"eval_loss": 4.86765718460083, |
|
"eval_runtime": 4.9642, |
|
"eval_samples_per_second": 120.866, |
|
"eval_steps_per_second": 3.827, |
|
"step": 7293 |
|
}, |
|
{ |
|
"epoch": 389.33, |
|
"learning_rate": 1.0493827160493827e-05, |
|
"loss": 0.0336, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 389.87, |
|
"learning_rate": 1.04320987654321e-05, |
|
"loss": 0.038, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 389.97, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.868815898895264, |
|
"eval_runtime": 4.9771, |
|
"eval_samples_per_second": 120.552, |
|
"eval_steps_per_second": 3.817, |
|
"step": 7312 |
|
}, |
|
{ |
|
"epoch": 390.4, |
|
"learning_rate": 1.037037037037037e-05, |
|
"loss": 0.0188, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 390.93, |
|
"learning_rate": 1.0308641975308644e-05, |
|
"loss": 0.0301, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 390.99, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.8932037353515625, |
|
"eval_runtime": 4.9575, |
|
"eval_samples_per_second": 121.03, |
|
"eval_steps_per_second": 3.833, |
|
"step": 7331 |
|
}, |
|
{ |
|
"epoch": 391.47, |
|
"learning_rate": 1.0246913580246914e-05, |
|
"loss": 0.0418, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 392.0, |
|
"learning_rate": 1.0185185185185185e-05, |
|
"loss": 0.0363, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 392.0, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.902304172515869, |
|
"eval_runtime": 4.9867, |
|
"eval_samples_per_second": 120.321, |
|
"eval_steps_per_second": 3.81, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 392.53, |
|
"learning_rate": 1.0123456790123458e-05, |
|
"loss": 0.0329, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 392.96, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.882464408874512, |
|
"eval_runtime": 4.9782, |
|
"eval_samples_per_second": 120.525, |
|
"eval_steps_per_second": 3.817, |
|
"step": 7368 |
|
}, |
|
{ |
|
"epoch": 393.07, |
|
"learning_rate": 1.0061728395061729e-05, |
|
"loss": 0.0255, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 393.6, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0174, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 393.97, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.87109375, |
|
"eval_runtime": 4.9821, |
|
"eval_samples_per_second": 120.431, |
|
"eval_steps_per_second": 3.814, |
|
"step": 7387 |
|
}, |
|
{ |
|
"epoch": 394.13, |
|
"learning_rate": 9.938271604938273e-06, |
|
"loss": 0.0256, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 394.67, |
|
"learning_rate": 9.876543209876543e-06, |
|
"loss": 0.0284, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 394.99, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.876201152801514, |
|
"eval_runtime": 5.0563, |
|
"eval_samples_per_second": 118.663, |
|
"eval_steps_per_second": 3.758, |
|
"step": 7406 |
|
}, |
|
{ |
|
"epoch": 395.2, |
|
"learning_rate": 9.814814814814815e-06, |
|
"loss": 0.0458, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 395.73, |
|
"learning_rate": 9.753086419753086e-06, |
|
"loss": 0.0178, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 396.0, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.868426322937012, |
|
"eval_runtime": 5.0279, |
|
"eval_samples_per_second": 119.333, |
|
"eval_steps_per_second": 3.779, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 396.27, |
|
"learning_rate": 9.69135802469136e-06, |
|
"loss": 0.0154, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 396.8, |
|
"learning_rate": 9.62962962962963e-06, |
|
"loss": 0.0359, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 396.96, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.865981578826904, |
|
"eval_runtime": 5.0157, |
|
"eval_samples_per_second": 119.625, |
|
"eval_steps_per_second": 3.788, |
|
"step": 7443 |
|
}, |
|
{ |
|
"epoch": 397.33, |
|
"learning_rate": 9.5679012345679e-06, |
|
"loss": 0.0295, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 397.87, |
|
"learning_rate": 9.506172839506174e-06, |
|
"loss": 0.029, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 397.97, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.879904270172119, |
|
"eval_runtime": 5.0461, |
|
"eval_samples_per_second": 118.904, |
|
"eval_steps_per_second": 3.765, |
|
"step": 7462 |
|
}, |
|
{ |
|
"epoch": 398.4, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 0.0368, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 398.93, |
|
"learning_rate": 9.382716049382717e-06, |
|
"loss": 0.0227, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 398.99, |
|
"eval_accuracy": 0.25, |
|
"eval_loss": 4.884500503540039, |
|
"eval_runtime": 5.007, |
|
"eval_samples_per_second": 119.833, |
|
"eval_steps_per_second": 3.795, |
|
"step": 7481 |
|
}, |
|
{ |
|
"epoch": 399.47, |
|
"learning_rate": 9.320987654320989e-06, |
|
"loss": 0.0338, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.0135, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.889830112457275, |
|
"eval_runtime": 5.0086, |
|
"eval_samples_per_second": 119.793, |
|
"eval_steps_per_second": 3.793, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 400.53, |
|
"learning_rate": 9.197530864197531e-06, |
|
"loss": 0.0297, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 400.96, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.896719455718994, |
|
"eval_runtime": 5.0281, |
|
"eval_samples_per_second": 119.329, |
|
"eval_steps_per_second": 3.779, |
|
"step": 7518 |
|
}, |
|
{ |
|
"epoch": 401.07, |
|
"learning_rate": 9.135802469135803e-06, |
|
"loss": 0.0364, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 401.6, |
|
"learning_rate": 9.074074074074075e-06, |
|
"loss": 0.0263, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 401.97, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.888412952423096, |
|
"eval_runtime": 5.0155, |
|
"eval_samples_per_second": 119.63, |
|
"eval_steps_per_second": 3.788, |
|
"step": 7537 |
|
}, |
|
{ |
|
"epoch": 402.13, |
|
"learning_rate": 9.012345679012346e-06, |
|
"loss": 0.035, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 402.67, |
|
"learning_rate": 8.950617283950618e-06, |
|
"loss": 0.0386, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 402.99, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.871886730194092, |
|
"eval_runtime": 4.9851, |
|
"eval_samples_per_second": 120.358, |
|
"eval_steps_per_second": 3.811, |
|
"step": 7556 |
|
}, |
|
{ |
|
"epoch": 403.2, |
|
"learning_rate": 8.88888888888889e-06, |
|
"loss": 0.0339, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 403.73, |
|
"learning_rate": 8.82716049382716e-06, |
|
"loss": 0.0298, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 404.0, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.860945701599121, |
|
"eval_runtime": 5.0553, |
|
"eval_samples_per_second": 118.688, |
|
"eval_steps_per_second": 3.758, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 404.27, |
|
"learning_rate": 8.765432098765432e-06, |
|
"loss": 0.0318, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 404.8, |
|
"learning_rate": 8.703703703703705e-06, |
|
"loss": 0.0232, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 404.96, |
|
"eval_accuracy": 0.24833333333333332, |
|
"eval_loss": 4.860249996185303, |
|
"eval_runtime": 5.0445, |
|
"eval_samples_per_second": 118.942, |
|
"eval_steps_per_second": 3.767, |
|
"step": 7593 |
|
}, |
|
{ |
|
"epoch": 405.33, |
|
"learning_rate": 8.641975308641975e-06, |
|
"loss": 0.0367, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 405.87, |
|
"learning_rate": 8.580246913580247e-06, |
|
"loss": 0.0232, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 405.97, |
|
"eval_accuracy": 0.24666666666666667, |
|
"eval_loss": 4.866739749908447, |
|
"eval_runtime": 5.0222, |
|
"eval_samples_per_second": 119.47, |
|
"eval_steps_per_second": 3.783, |
|
"step": 7612 |
|
}, |
|
{ |
|
"epoch": 406.4, |
|
"learning_rate": 8.518518518518519e-06, |
|
"loss": 0.0223, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 406.93, |
|
"learning_rate": 8.456790123456791e-06, |
|
"loss": 0.032, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 406.99, |
|
"eval_accuracy": 0.24833333333333332, |
|
"eval_loss": 4.8684258460998535, |
|
"eval_runtime": 5.0294, |
|
"eval_samples_per_second": 119.298, |
|
"eval_steps_per_second": 3.778, |
|
"step": 7631 |
|
}, |
|
{ |
|
"epoch": 407.47, |
|
"learning_rate": 8.395061728395062e-06, |
|
"loss": 0.0272, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 408.0, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.0306, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 408.0, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.875509262084961, |
|
"eval_runtime": 5.0495, |
|
"eval_samples_per_second": 118.823, |
|
"eval_steps_per_second": 3.763, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 408.53, |
|
"learning_rate": 8.271604938271606e-06, |
|
"loss": 0.0299, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 408.96, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.868679046630859, |
|
"eval_runtime": 5.094, |
|
"eval_samples_per_second": 117.785, |
|
"eval_steps_per_second": 3.73, |
|
"step": 7668 |
|
}, |
|
{ |
|
"epoch": 409.07, |
|
"learning_rate": 8.209876543209876e-06, |
|
"loss": 0.0402, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 409.6, |
|
"learning_rate": 8.14814814814815e-06, |
|
"loss": 0.0307, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 409.97, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.872376441955566, |
|
"eval_runtime": 5.0699, |
|
"eval_samples_per_second": 118.345, |
|
"eval_steps_per_second": 3.748, |
|
"step": 7687 |
|
}, |
|
{ |
|
"epoch": 410.13, |
|
"learning_rate": 8.08641975308642e-06, |
|
"loss": 0.0315, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 410.67, |
|
"learning_rate": 8.02469135802469e-06, |
|
"loss": 0.0304, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 410.99, |
|
"eval_accuracy": 0.25, |
|
"eval_loss": 4.879815101623535, |
|
"eval_runtime": 5.0824, |
|
"eval_samples_per_second": 118.054, |
|
"eval_steps_per_second": 3.738, |
|
"step": 7706 |
|
}, |
|
{ |
|
"epoch": 411.2, |
|
"learning_rate": 7.962962962962963e-06, |
|
"loss": 0.039, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 411.73, |
|
"learning_rate": 7.901234567901235e-06, |
|
"loss": 0.0293, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 412.0, |
|
"eval_accuracy": 0.24833333333333332, |
|
"eval_loss": 4.890056133270264, |
|
"eval_runtime": 5.1118, |
|
"eval_samples_per_second": 117.376, |
|
"eval_steps_per_second": 3.717, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 412.27, |
|
"learning_rate": 7.839506172839507e-06, |
|
"loss": 0.0276, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 412.8, |
|
"learning_rate": 7.777777777777777e-06, |
|
"loss": 0.0273, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 412.96, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.902527332305908, |
|
"eval_runtime": 5.0871, |
|
"eval_samples_per_second": 117.946, |
|
"eval_steps_per_second": 3.735, |
|
"step": 7743 |
|
}, |
|
{ |
|
"epoch": 413.33, |
|
"learning_rate": 7.71604938271605e-06, |
|
"loss": 0.0334, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 413.87, |
|
"learning_rate": 7.654320987654322e-06, |
|
"loss": 0.0184, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 413.97, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.886964321136475, |
|
"eval_runtime": 5.0738, |
|
"eval_samples_per_second": 118.254, |
|
"eval_steps_per_second": 3.745, |
|
"step": 7762 |
|
}, |
|
{ |
|
"epoch": 414.4, |
|
"learning_rate": 7.592592592592593e-06, |
|
"loss": 0.0227, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 414.93, |
|
"learning_rate": 7.530864197530865e-06, |
|
"loss": 0.0377, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 414.99, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.890088081359863, |
|
"eval_runtime": 5.0985, |
|
"eval_samples_per_second": 117.682, |
|
"eval_steps_per_second": 3.727, |
|
"step": 7781 |
|
}, |
|
{ |
|
"epoch": 415.47, |
|
"learning_rate": 7.469135802469136e-06, |
|
"loss": 0.0284, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 416.0, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.0278, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 416.0, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.889472484588623, |
|
"eval_runtime": 5.0982, |
|
"eval_samples_per_second": 117.689, |
|
"eval_steps_per_second": 3.727, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 416.53, |
|
"learning_rate": 7.3456790123456796e-06, |
|
"loss": 0.0345, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 416.96, |
|
"eval_accuracy": 0.25333333333333335, |
|
"eval_loss": 4.904553413391113, |
|
"eval_runtime": 5.0835, |
|
"eval_samples_per_second": 118.028, |
|
"eval_steps_per_second": 3.738, |
|
"step": 7818 |
|
}, |
|
{ |
|
"epoch": 417.07, |
|
"learning_rate": 7.283950617283951e-06, |
|
"loss": 0.0376, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 417.6, |
|
"learning_rate": 7.222222222222222e-06, |
|
"loss": 0.0301, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 417.97, |
|
"eval_accuracy": 0.24833333333333332, |
|
"eval_loss": 4.900204181671143, |
|
"eval_runtime": 5.1232, |
|
"eval_samples_per_second": 117.114, |
|
"eval_steps_per_second": 3.709, |
|
"step": 7837 |
|
}, |
|
{ |
|
"epoch": 418.13, |
|
"learning_rate": 7.160493827160494e-06, |
|
"loss": 0.0204, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 418.67, |
|
"learning_rate": 7.098765432098765e-06, |
|
"loss": 0.0159, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 418.99, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.89817476272583, |
|
"eval_runtime": 5.1276, |
|
"eval_samples_per_second": 117.015, |
|
"eval_steps_per_second": 3.705, |
|
"step": 7856 |
|
}, |
|
{ |
|
"epoch": 419.2, |
|
"learning_rate": 7.0370370370370375e-06, |
|
"loss": 0.0217, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 419.73, |
|
"learning_rate": 6.975308641975309e-06, |
|
"loss": 0.0203, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 420.0, |
|
"eval_accuracy": 0.24833333333333332, |
|
"eval_loss": 4.900780200958252, |
|
"eval_runtime": 5.1291, |
|
"eval_samples_per_second": 116.98, |
|
"eval_steps_per_second": 3.704, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 420.27, |
|
"learning_rate": 6.91358024691358e-06, |
|
"loss": 0.0295, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 420.8, |
|
"learning_rate": 6.851851851851852e-06, |
|
"loss": 0.0182, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 420.96, |
|
"eval_accuracy": 0.24666666666666667, |
|
"eval_loss": 4.9113335609436035, |
|
"eval_runtime": 5.1351, |
|
"eval_samples_per_second": 116.842, |
|
"eval_steps_per_second": 3.7, |
|
"step": 7893 |
|
}, |
|
{ |
|
"epoch": 421.33, |
|
"learning_rate": 6.790123456790123e-06, |
|
"loss": 0.0321, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 421.87, |
|
"learning_rate": 6.728395061728396e-06, |
|
"loss": 0.0258, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 421.97, |
|
"eval_accuracy": 0.25, |
|
"eval_loss": 4.918017387390137, |
|
"eval_runtime": 5.1337, |
|
"eval_samples_per_second": 116.875, |
|
"eval_steps_per_second": 3.701, |
|
"step": 7912 |
|
}, |
|
{ |
|
"epoch": 422.4, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.0277, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 422.93, |
|
"learning_rate": 6.604938271604938e-06, |
|
"loss": 0.0266, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 422.99, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.913443565368652, |
|
"eval_runtime": 5.1296, |
|
"eval_samples_per_second": 116.969, |
|
"eval_steps_per_second": 3.704, |
|
"step": 7931 |
|
}, |
|
{ |
|
"epoch": 423.47, |
|
"learning_rate": 6.54320987654321e-06, |
|
"loss": 0.037, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 424.0, |
|
"learning_rate": 6.481481481481481e-06, |
|
"loss": 0.0304, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 424.0, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.900519847869873, |
|
"eval_runtime": 5.1632, |
|
"eval_samples_per_second": 116.207, |
|
"eval_steps_per_second": 3.68, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 424.53, |
|
"learning_rate": 6.419753086419754e-06, |
|
"loss": 0.0247, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 424.96, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.893669605255127, |
|
"eval_runtime": 5.2111, |
|
"eval_samples_per_second": 115.139, |
|
"eval_steps_per_second": 3.646, |
|
"step": 7968 |
|
}, |
|
{ |
|
"epoch": 425.07, |
|
"learning_rate": 6.3580246913580246e-06, |
|
"loss": 0.0335, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 425.6, |
|
"learning_rate": 6.296296296296296e-06, |
|
"loss": 0.0493, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 425.97, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.883533954620361, |
|
"eval_runtime": 5.1746, |
|
"eval_samples_per_second": 115.951, |
|
"eval_steps_per_second": 3.672, |
|
"step": 7987 |
|
}, |
|
{ |
|
"epoch": 426.13, |
|
"learning_rate": 6.234567901234569e-06, |
|
"loss": 0.0157, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 426.67, |
|
"learning_rate": 6.172839506172839e-06, |
|
"loss": 0.0286, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 426.99, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.896754741668701, |
|
"eval_runtime": 5.1903, |
|
"eval_samples_per_second": 115.6, |
|
"eval_steps_per_second": 3.661, |
|
"step": 8006 |
|
}, |
|
{ |
|
"epoch": 427.2, |
|
"learning_rate": 6.111111111111111e-06, |
|
"loss": 0.0249, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 427.73, |
|
"learning_rate": 6.049382716049383e-06, |
|
"loss": 0.0228, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 428.0, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.906573295593262, |
|
"eval_runtime": 5.2526, |
|
"eval_samples_per_second": 114.23, |
|
"eval_steps_per_second": 3.617, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 428.27, |
|
"learning_rate": 5.9876543209876546e-06, |
|
"loss": 0.0324, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 428.8, |
|
"learning_rate": 5.925925925925927e-06, |
|
"loss": 0.0362, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 428.96, |
|
"eval_accuracy": 0.245, |
|
"eval_loss": 4.903099536895752, |
|
"eval_runtime": 5.2207, |
|
"eval_samples_per_second": 114.927, |
|
"eval_steps_per_second": 3.639, |
|
"step": 8043 |
|
}, |
|
{ |
|
"epoch": 429.33, |
|
"learning_rate": 5.864197530864198e-06, |
|
"loss": 0.0213, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 429.87, |
|
"learning_rate": 5.802469135802469e-06, |
|
"loss": 0.0244, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 429.97, |
|
"eval_accuracy": 0.24666666666666667, |
|
"eval_loss": 4.899692535400391, |
|
"eval_runtime": 5.2551, |
|
"eval_samples_per_second": 114.174, |
|
"eval_steps_per_second": 3.616, |
|
"step": 8062 |
|
}, |
|
{ |
|
"epoch": 430.4, |
|
"learning_rate": 5.740740740740741e-06, |
|
"loss": 0.0405, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 430.93, |
|
"learning_rate": 5.6790123456790125e-06, |
|
"loss": 0.0204, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 430.99, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.905935287475586, |
|
"eval_runtime": 5.2136, |
|
"eval_samples_per_second": 115.084, |
|
"eval_steps_per_second": 3.644, |
|
"step": 8081 |
|
}, |
|
{ |
|
"epoch": 431.47, |
|
"learning_rate": 5.617283950617285e-06, |
|
"loss": 0.0175, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 432.0, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.0344, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 432.0, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.905205726623535, |
|
"eval_runtime": 5.2045, |
|
"eval_samples_per_second": 115.286, |
|
"eval_steps_per_second": 3.651, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 432.53, |
|
"learning_rate": 5.493827160493827e-06, |
|
"loss": 0.0252, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 432.96, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.897459983825684, |
|
"eval_runtime": 5.1933, |
|
"eval_samples_per_second": 115.533, |
|
"eval_steps_per_second": 3.659, |
|
"step": 8118 |
|
}, |
|
{ |
|
"epoch": 433.07, |
|
"learning_rate": 5.432098765432099e-06, |
|
"loss": 0.0185, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 433.6, |
|
"learning_rate": 5.37037037037037e-06, |
|
"loss": 0.0242, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 433.97, |
|
"eval_accuracy": 0.24666666666666667, |
|
"eval_loss": 4.896070957183838, |
|
"eval_runtime": 5.2428, |
|
"eval_samples_per_second": 114.443, |
|
"eval_steps_per_second": 3.624, |
|
"step": 8137 |
|
}, |
|
{ |
|
"epoch": 434.13, |
|
"learning_rate": 5.3086419753086425e-06, |
|
"loss": 0.0372, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 434.67, |
|
"learning_rate": 5.246913580246914e-06, |
|
"loss": 0.0135, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 434.99, |
|
"eval_accuracy": 0.24666666666666667, |
|
"eval_loss": 4.908581733703613, |
|
"eval_runtime": 5.248, |
|
"eval_samples_per_second": 114.329, |
|
"eval_steps_per_second": 3.62, |
|
"step": 8156 |
|
}, |
|
{ |
|
"epoch": 435.2, |
|
"learning_rate": 5.185185185185185e-06, |
|
"loss": 0.0322, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 435.73, |
|
"learning_rate": 5.123456790123457e-06, |
|
"loss": 0.0296, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 436.0, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.9134602546691895, |
|
"eval_runtime": 5.2357, |
|
"eval_samples_per_second": 114.598, |
|
"eval_steps_per_second": 3.629, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 436.27, |
|
"learning_rate": 5.061728395061729e-06, |
|
"loss": 0.0164, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 436.8, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0432, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 436.96, |
|
"eval_accuracy": 0.24333333333333335, |
|
"eval_loss": 4.907933712005615, |
|
"eval_runtime": 5.268, |
|
"eval_samples_per_second": 113.896, |
|
"eval_steps_per_second": 3.607, |
|
"step": 8193 |
|
}, |
|
{ |
|
"epoch": 437.33, |
|
"learning_rate": 4.938271604938272e-06, |
|
"loss": 0.017, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 437.87, |
|
"learning_rate": 4.876543209876543e-06, |
|
"loss": 0.0242, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 437.97, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.898138046264648, |
|
"eval_runtime": 5.2464, |
|
"eval_samples_per_second": 114.365, |
|
"eval_steps_per_second": 3.622, |
|
"step": 8212 |
|
}, |
|
{ |
|
"epoch": 438.4, |
|
"learning_rate": 4.814814814814815e-06, |
|
"loss": 0.026, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 438.93, |
|
"learning_rate": 4.753086419753087e-06, |
|
"loss": 0.0227, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 438.99, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.8857102394104, |
|
"eval_runtime": 5.264, |
|
"eval_samples_per_second": 113.981, |
|
"eval_steps_per_second": 3.609, |
|
"step": 8231 |
|
}, |
|
{ |
|
"epoch": 439.47, |
|
"learning_rate": 4.691358024691358e-06, |
|
"loss": 0.0309, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 440.0, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 0.021, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 440.0, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.887371063232422, |
|
"eval_runtime": 5.2856, |
|
"eval_samples_per_second": 113.516, |
|
"eval_steps_per_second": 3.595, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 440.53, |
|
"learning_rate": 4.567901234567902e-06, |
|
"loss": 0.0244, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 440.96, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.884664058685303, |
|
"eval_runtime": 5.2546, |
|
"eval_samples_per_second": 114.187, |
|
"eval_steps_per_second": 3.616, |
|
"step": 8268 |
|
}, |
|
{ |
|
"epoch": 441.07, |
|
"learning_rate": 4.506172839506173e-06, |
|
"loss": 0.0484, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 441.6, |
|
"learning_rate": 4.444444444444445e-06, |
|
"loss": 0.0234, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 441.97, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.896438121795654, |
|
"eval_runtime": 5.2769, |
|
"eval_samples_per_second": 113.703, |
|
"eval_steps_per_second": 3.601, |
|
"step": 8287 |
|
}, |
|
{ |
|
"epoch": 442.13, |
|
"learning_rate": 4.382716049382716e-06, |
|
"loss": 0.0293, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 442.67, |
|
"learning_rate": 4.3209876543209875e-06, |
|
"loss": 0.0278, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 442.99, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.916093349456787, |
|
"eval_runtime": 5.2536, |
|
"eval_samples_per_second": 114.207, |
|
"eval_steps_per_second": 3.617, |
|
"step": 8306 |
|
}, |
|
{ |
|
"epoch": 443.2, |
|
"learning_rate": 4.2592592592592596e-06, |
|
"loss": 0.0224, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 443.73, |
|
"learning_rate": 4.197530864197531e-06, |
|
"loss": 0.0322, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 444.0, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.921158790588379, |
|
"eval_runtime": 5.2757, |
|
"eval_samples_per_second": 113.729, |
|
"eval_steps_per_second": 3.601, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 444.27, |
|
"learning_rate": 4.135802469135803e-06, |
|
"loss": 0.0201, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 444.8, |
|
"learning_rate": 4.074074074074075e-06, |
|
"loss": 0.038, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 444.96, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.925096035003662, |
|
"eval_runtime": 5.3078, |
|
"eval_samples_per_second": 113.041, |
|
"eval_steps_per_second": 3.58, |
|
"step": 8343 |
|
}, |
|
{ |
|
"epoch": 445.33, |
|
"learning_rate": 4.012345679012345e-06, |
|
"loss": 0.035, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 445.87, |
|
"learning_rate": 3.9506172839506175e-06, |
|
"loss": 0.0327, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 445.97, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.933958530426025, |
|
"eval_runtime": 5.2639, |
|
"eval_samples_per_second": 113.985, |
|
"eval_steps_per_second": 3.61, |
|
"step": 8362 |
|
}, |
|
{ |
|
"epoch": 446.4, |
|
"learning_rate": 3.888888888888889e-06, |
|
"loss": 0.0226, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 446.93, |
|
"learning_rate": 3.827160493827161e-06, |
|
"loss": 0.0256, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 446.99, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.924610614776611, |
|
"eval_runtime": 5.2666, |
|
"eval_samples_per_second": 113.926, |
|
"eval_steps_per_second": 3.608, |
|
"step": 8381 |
|
}, |
|
{ |
|
"epoch": 447.47, |
|
"learning_rate": 3.7654320987654325e-06, |
|
"loss": 0.027, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 448.0, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.0327, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 448.0, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.929351329803467, |
|
"eval_runtime": 5.2953, |
|
"eval_samples_per_second": 113.308, |
|
"eval_steps_per_second": 3.588, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 448.53, |
|
"learning_rate": 3.6419753086419754e-06, |
|
"loss": 0.0246, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 448.96, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.931128025054932, |
|
"eval_runtime": 5.3144, |
|
"eval_samples_per_second": 112.901, |
|
"eval_steps_per_second": 3.575, |
|
"step": 8418 |
|
}, |
|
{ |
|
"epoch": 449.07, |
|
"learning_rate": 3.580246913580247e-06, |
|
"loss": 0.027, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 449.6, |
|
"learning_rate": 3.5185185185185187e-06, |
|
"loss": 0.0239, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 449.97, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.92203950881958, |
|
"eval_runtime": 5.2899, |
|
"eval_samples_per_second": 113.424, |
|
"eval_steps_per_second": 3.592, |
|
"step": 8437 |
|
}, |
|
{ |
|
"epoch": 450.13, |
|
"learning_rate": 3.45679012345679e-06, |
|
"loss": 0.0313, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 450.67, |
|
"learning_rate": 3.3950617283950617e-06, |
|
"loss": 0.0219, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 450.99, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.920533180236816, |
|
"eval_runtime": 5.4385, |
|
"eval_samples_per_second": 110.325, |
|
"eval_steps_per_second": 3.494, |
|
"step": 8456 |
|
}, |
|
{ |
|
"epoch": 451.2, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0214, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 451.73, |
|
"learning_rate": 3.271604938271605e-06, |
|
"loss": 0.0287, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 452.0, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.924895763397217, |
|
"eval_runtime": 5.3449, |
|
"eval_samples_per_second": 112.256, |
|
"eval_steps_per_second": 3.555, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 452.27, |
|
"learning_rate": 3.209876543209877e-06, |
|
"loss": 0.0226, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 452.8, |
|
"learning_rate": 3.148148148148148e-06, |
|
"loss": 0.0244, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 452.96, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.927518844604492, |
|
"eval_runtime": 5.3234, |
|
"eval_samples_per_second": 112.711, |
|
"eval_steps_per_second": 3.569, |
|
"step": 8493 |
|
}, |
|
{ |
|
"epoch": 453.33, |
|
"learning_rate": 3.0864197530864196e-06, |
|
"loss": 0.0215, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 453.87, |
|
"learning_rate": 3.0246913580246917e-06, |
|
"loss": 0.0222, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 453.97, |
|
"eval_accuracy": 0.24166666666666667, |
|
"eval_loss": 4.932238578796387, |
|
"eval_runtime": 5.3268, |
|
"eval_samples_per_second": 112.637, |
|
"eval_steps_per_second": 3.567, |
|
"step": 8512 |
|
}, |
|
{ |
|
"epoch": 454.4, |
|
"learning_rate": 2.9629629629629633e-06, |
|
"loss": 0.0269, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 454.93, |
|
"learning_rate": 2.9012345679012346e-06, |
|
"loss": 0.0277, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 454.99, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.931756019592285, |
|
"eval_runtime": 5.3343, |
|
"eval_samples_per_second": 112.48, |
|
"eval_steps_per_second": 3.562, |
|
"step": 8531 |
|
}, |
|
{ |
|
"epoch": 455.47, |
|
"learning_rate": 2.8395061728395062e-06, |
|
"loss": 0.0355, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 456.0, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.0315, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 456.0, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.929112911224365, |
|
"eval_runtime": 5.4018, |
|
"eval_samples_per_second": 111.075, |
|
"eval_steps_per_second": 3.517, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 456.53, |
|
"learning_rate": 2.7160493827160496e-06, |
|
"loss": 0.021, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 456.96, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.929343223571777, |
|
"eval_runtime": 5.3767, |
|
"eval_samples_per_second": 111.594, |
|
"eval_steps_per_second": 3.534, |
|
"step": 8568 |
|
}, |
|
{ |
|
"epoch": 457.07, |
|
"learning_rate": 2.6543209876543212e-06, |
|
"loss": 0.0232, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 457.6, |
|
"learning_rate": 2.5925925925925925e-06, |
|
"loss": 0.0288, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 457.97, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.923261642456055, |
|
"eval_runtime": 5.3734, |
|
"eval_samples_per_second": 111.661, |
|
"eval_steps_per_second": 3.536, |
|
"step": 8587 |
|
}, |
|
{ |
|
"epoch": 458.13, |
|
"learning_rate": 2.5308641975308646e-06, |
|
"loss": 0.0381, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 458.67, |
|
"learning_rate": 2.469135802469136e-06, |
|
"loss": 0.0229, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 458.99, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.923562049865723, |
|
"eval_runtime": 5.3963, |
|
"eval_samples_per_second": 111.186, |
|
"eval_steps_per_second": 3.521, |
|
"step": 8606 |
|
}, |
|
{ |
|
"epoch": 459.2, |
|
"learning_rate": 2.4074074074074075e-06, |
|
"loss": 0.0308, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 459.73, |
|
"learning_rate": 2.345679012345679e-06, |
|
"loss": 0.0257, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 460.0, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.922508716583252, |
|
"eval_runtime": 5.4351, |
|
"eval_samples_per_second": 110.394, |
|
"eval_steps_per_second": 3.496, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 460.27, |
|
"learning_rate": 2.283950617283951e-06, |
|
"loss": 0.0193, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 460.8, |
|
"learning_rate": 2.2222222222222225e-06, |
|
"loss": 0.0291, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 460.96, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.922194004058838, |
|
"eval_runtime": 5.3746, |
|
"eval_samples_per_second": 111.636, |
|
"eval_steps_per_second": 3.535, |
|
"step": 8643 |
|
}, |
|
{ |
|
"epoch": 461.33, |
|
"learning_rate": 2.1604938271604937e-06, |
|
"loss": 0.0292, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 461.87, |
|
"learning_rate": 2.0987654320987654e-06, |
|
"loss": 0.0325, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 461.97, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.921637535095215, |
|
"eval_runtime": 5.38, |
|
"eval_samples_per_second": 111.524, |
|
"eval_steps_per_second": 3.532, |
|
"step": 8662 |
|
}, |
|
{ |
|
"epoch": 462.4, |
|
"learning_rate": 2.0370370370370375e-06, |
|
"loss": 0.0292, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 462.93, |
|
"learning_rate": 1.9753086419753087e-06, |
|
"loss": 0.0268, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 462.99, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.92024564743042, |
|
"eval_runtime": 5.3601, |
|
"eval_samples_per_second": 111.939, |
|
"eval_steps_per_second": 3.545, |
|
"step": 8681 |
|
}, |
|
{ |
|
"epoch": 463.47, |
|
"learning_rate": 1.9135802469135804e-06, |
|
"loss": 0.0268, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 464.0, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.0156, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 464.0, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.91749906539917, |
|
"eval_runtime": 5.3813, |
|
"eval_samples_per_second": 111.497, |
|
"eval_steps_per_second": 3.531, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 464.53, |
|
"learning_rate": 1.7901234567901235e-06, |
|
"loss": 0.0196, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 464.96, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.914690017700195, |
|
"eval_runtime": 5.3651, |
|
"eval_samples_per_second": 111.834, |
|
"eval_steps_per_second": 3.541, |
|
"step": 8718 |
|
}, |
|
{ |
|
"epoch": 465.07, |
|
"learning_rate": 1.728395061728395e-06, |
|
"loss": 0.0199, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 465.6, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 0.0448, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 465.97, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.910024642944336, |
|
"eval_runtime": 5.359, |
|
"eval_samples_per_second": 111.962, |
|
"eval_steps_per_second": 3.545, |
|
"step": 8737 |
|
}, |
|
{ |
|
"epoch": 466.13, |
|
"learning_rate": 1.6049382716049385e-06, |
|
"loss": 0.021, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 466.67, |
|
"learning_rate": 1.5432098765432098e-06, |
|
"loss": 0.0232, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 466.99, |
|
"eval_accuracy": 0.23333333333333334, |
|
"eval_loss": 4.908828258514404, |
|
"eval_runtime": 5.3987, |
|
"eval_samples_per_second": 111.138, |
|
"eval_steps_per_second": 3.519, |
|
"step": 8756 |
|
}, |
|
{ |
|
"epoch": 467.2, |
|
"learning_rate": 1.4814814814814817e-06, |
|
"loss": 0.0224, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 467.73, |
|
"learning_rate": 1.4197530864197531e-06, |
|
"loss": 0.0274, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 468.0, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.909602165222168, |
|
"eval_runtime": 5.4071, |
|
"eval_samples_per_second": 110.966, |
|
"eval_steps_per_second": 3.514, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 468.27, |
|
"learning_rate": 1.3580246913580248e-06, |
|
"loss": 0.0344, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 468.8, |
|
"learning_rate": 1.2962962962962962e-06, |
|
"loss": 0.029, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 468.96, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.910549640655518, |
|
"eval_runtime": 5.3868, |
|
"eval_samples_per_second": 111.383, |
|
"eval_steps_per_second": 3.527, |
|
"step": 8793 |
|
}, |
|
{ |
|
"epoch": 469.33, |
|
"learning_rate": 1.234567901234568e-06, |
|
"loss": 0.0265, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 469.87, |
|
"learning_rate": 1.1728395061728396e-06, |
|
"loss": 0.0337, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 469.97, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.912467956542969, |
|
"eval_runtime": 5.3985, |
|
"eval_samples_per_second": 111.143, |
|
"eval_steps_per_second": 3.52, |
|
"step": 8812 |
|
}, |
|
{ |
|
"epoch": 470.4, |
|
"learning_rate": 1.1111111111111112e-06, |
|
"loss": 0.0298, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 470.93, |
|
"learning_rate": 1.0493827160493827e-06, |
|
"loss": 0.0178, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 470.99, |
|
"eval_accuracy": 0.235, |
|
"eval_loss": 4.91201639175415, |
|
"eval_runtime": 5.399, |
|
"eval_samples_per_second": 111.132, |
|
"eval_steps_per_second": 3.519, |
|
"step": 8831 |
|
}, |
|
{ |
|
"epoch": 471.47, |
|
"learning_rate": 9.876543209876544e-07, |
|
"loss": 0.0179, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 472.0, |
|
"learning_rate": 9.259259259259259e-07, |
|
"loss": 0.0286, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 472.0, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.912468910217285, |
|
"eval_runtime": 5.4913, |
|
"eval_samples_per_second": 109.263, |
|
"eval_steps_per_second": 3.46, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 472.53, |
|
"learning_rate": 8.641975308641975e-07, |
|
"loss": 0.0159, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 472.96, |
|
"eval_accuracy": 0.23666666666666666, |
|
"eval_loss": 4.910217761993408, |
|
"eval_runtime": 5.4334, |
|
"eval_samples_per_second": 110.428, |
|
"eval_steps_per_second": 3.497, |
|
"step": 8868 |
|
}, |
|
{ |
|
"epoch": 473.07, |
|
"learning_rate": 8.024691358024693e-07, |
|
"loss": 0.0247, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 473.6, |
|
"learning_rate": 7.407407407407408e-07, |
|
"loss": 0.0318, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 473.97, |
|
"eval_accuracy": 0.23833333333333334, |
|
"eval_loss": 4.9116291999816895, |
|
"eval_runtime": 5.4254, |
|
"eval_samples_per_second": 110.59, |
|
"eval_steps_per_second": 3.502, |
|
"step": 8887 |
|
}, |
|
{ |
|
"epoch": 474.13, |
|
"learning_rate": 6.790123456790124e-07, |
|
"loss": 0.0311, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 474.67, |
|
"learning_rate": 6.17283950617284e-07, |
|
"loss": 0.0302, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 474.99, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.911314487457275, |
|
"eval_runtime": 5.4263, |
|
"eval_samples_per_second": 110.573, |
|
"eval_steps_per_second": 3.501, |
|
"step": 8906 |
|
}, |
|
{ |
|
"epoch": 475.2, |
|
"learning_rate": 5.555555555555556e-07, |
|
"loss": 0.0305, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 475.73, |
|
"learning_rate": 4.938271604938272e-07, |
|
"loss": 0.0184, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 476.0, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.911987781524658, |
|
"eval_runtime": 5.4618, |
|
"eval_samples_per_second": 109.854, |
|
"eval_steps_per_second": 3.479, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 476.27, |
|
"learning_rate": 4.3209876543209875e-07, |
|
"loss": 0.0415, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 476.8, |
|
"learning_rate": 3.703703703703704e-07, |
|
"loss": 0.025, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 476.96, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.912769794464111, |
|
"eval_runtime": 5.4549, |
|
"eval_samples_per_second": 109.993, |
|
"eval_steps_per_second": 3.483, |
|
"step": 8943 |
|
}, |
|
{ |
|
"epoch": 477.33, |
|
"learning_rate": 3.08641975308642e-07, |
|
"loss": 0.013, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 477.87, |
|
"learning_rate": 2.469135802469136e-07, |
|
"loss": 0.027, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 477.97, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.912613868713379, |
|
"eval_runtime": 5.4968, |
|
"eval_samples_per_second": 109.154, |
|
"eval_steps_per_second": 3.457, |
|
"step": 8962 |
|
}, |
|
{ |
|
"epoch": 478.4, |
|
"learning_rate": 1.851851851851852e-07, |
|
"loss": 0.0156, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 478.93, |
|
"learning_rate": 1.234567901234568e-07, |
|
"loss": 0.0298, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 478.99, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.913006782531738, |
|
"eval_runtime": 5.4672, |
|
"eval_samples_per_second": 109.745, |
|
"eval_steps_per_second": 3.475, |
|
"step": 8981 |
|
}, |
|
{ |
|
"epoch": 479.47, |
|
"learning_rate": 6.17283950617284e-08, |
|
"loss": 0.0213, |
|
"step": 8990 |
|
}, |
|
{ |
|
"epoch": 480.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.0349, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 480.0, |
|
"eval_accuracy": 0.24, |
|
"eval_loss": 4.9129838943481445, |
|
"eval_runtime": 5.4668, |
|
"eval_samples_per_second": 109.753, |
|
"eval_steps_per_second": 3.476, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 480.0, |
|
"step": 9000, |
|
"total_flos": 6.6744785965028475e+19, |
|
"train_loss": 0.3827814753833744, |
|
"train_runtime": 27227.0796, |
|
"train_samples_per_second": 44.019, |
|
"train_steps_per_second": 0.331 |
|
} |
|
], |
|
"max_steps": 9000, |
|
"num_train_epochs": 500, |
|
"total_flos": 6.6744785965028475e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|