|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 6378, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 8.77742946708464e-07, |
|
"loss": 1.2983, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.1316614420062697e-06, |
|
"loss": 1.2266, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.3855799373040753e-06, |
|
"loss": 0.9665, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.639498432601881e-06, |
|
"loss": 0.654, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.8934169278996865e-06, |
|
"loss": 0.4329, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.147335423197492e-06, |
|
"loss": 0.3411, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.4012539184953e-06, |
|
"loss": 0.2794, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.655172413793105e-06, |
|
"loss": 0.2419, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.0909090909090909e-05, |
|
"loss": 0.1808, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.2163009404388715e-05, |
|
"loss": 0.1434, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.341692789968652e-05, |
|
"loss": 0.1201, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.4670846394984329e-05, |
|
"loss": 0.1144, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.5924764890282133e-05, |
|
"loss": 0.1168, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.717868338557994e-05, |
|
"loss": 0.095, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8432601880877744e-05, |
|
"loss": 0.1066, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.968652037617555e-05, |
|
"loss": 0.111, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.0940438871473355e-05, |
|
"loss": 0.1068, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.2194357366771163e-05, |
|
"loss": 0.1166, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.3448275862068967e-05, |
|
"loss": 0.0985, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.4702194357366774e-05, |
|
"loss": 0.0961, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.595611285266458e-05, |
|
"loss": 0.0819, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.7210031347962385e-05, |
|
"loss": 0.0932, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.8463949843260192e-05, |
|
"loss": 0.1094, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.9717868338557996e-05, |
|
"loss": 0.0926, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.097178683385581e-05, |
|
"loss": 0.0935, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.222570532915361e-05, |
|
"loss": 0.0869, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.3479623824451415e-05, |
|
"loss": 0.091, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.4733542319749215e-05, |
|
"loss": 0.0863, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.598746081504703e-05, |
|
"loss": 0.0969, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.724137931034483e-05, |
|
"loss": 0.085, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.849529780564264e-05, |
|
"loss": 0.0966, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.974921630094044e-05, |
|
"loss": 0.0787, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.1003134796238245e-05, |
|
"loss": 0.0905, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.225705329153606e-05, |
|
"loss": 0.0986, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.351097178683386e-05, |
|
"loss": 0.0952, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.476489028213166e-05, |
|
"loss": 0.1079, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.601880877742947e-05, |
|
"loss": 0.0874, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.727272727272728e-05, |
|
"loss": 0.0776, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.852664576802508e-05, |
|
"loss": 0.0948, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.978056426332288e-05, |
|
"loss": 0.086, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5.10344827586207e-05, |
|
"loss": 0.0974, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5.2288401253918504e-05, |
|
"loss": 0.1048, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 5.3542319749216304e-05, |
|
"loss": 0.0918, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 5.4796238244514105e-05, |
|
"loss": 0.0922, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 5.605015673981192e-05, |
|
"loss": 0.093, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5.7304075235109726e-05, |
|
"loss": 0.0775, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5.855799373040753e-05, |
|
"loss": 0.0976, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 5.9811912225705334e-05, |
|
"loss": 0.0968, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 6.106583072100315e-05, |
|
"loss": 0.0755, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 6.231974921630095e-05, |
|
"loss": 0.0871, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 6.357366771159875e-05, |
|
"loss": 0.081, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 6.482758620689655e-05, |
|
"loss": 0.0822, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 6.608150470219436e-05, |
|
"loss": 0.0815, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 6.733542319749216e-05, |
|
"loss": 0.1007, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 6.858934169278998e-05, |
|
"loss": 0.0772, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 6.984326018808778e-05, |
|
"loss": 0.0765, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.109717868338559e-05, |
|
"loss": 0.0892, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.23510971786834e-05, |
|
"loss": 0.0774, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.36050156739812e-05, |
|
"loss": 0.0789, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 7.485893416927901e-05, |
|
"loss": 0.0886, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.611285266457681e-05, |
|
"loss": 0.0759, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 7.736677115987461e-05, |
|
"loss": 0.0836, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.862068965517242e-05, |
|
"loss": 0.0855, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 7.987460815047022e-05, |
|
"loss": 0.0882, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.987456445993032e-05, |
|
"loss": 0.0661, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 7.973519163763066e-05, |
|
"loss": 0.0918, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.959581881533101e-05, |
|
"loss": 0.0689, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.945644599303136e-05, |
|
"loss": 0.0854, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 7.931707317073171e-05, |
|
"loss": 0.0754, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.917770034843206e-05, |
|
"loss": 0.0902, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 7.90383275261324e-05, |
|
"loss": 0.097, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.889895470383276e-05, |
|
"loss": 0.0805, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 7.87595818815331e-05, |
|
"loss": 0.0871, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.862020905923346e-05, |
|
"loss": 0.0762, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 7.84808362369338e-05, |
|
"loss": 0.077, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.834146341463415e-05, |
|
"loss": 0.0832, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 7.82020905923345e-05, |
|
"loss": 0.0758, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.806271777003485e-05, |
|
"loss": 0.0722, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 7.79233449477352e-05, |
|
"loss": 0.085, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.778397212543555e-05, |
|
"loss": 0.0823, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 7.76445993031359e-05, |
|
"loss": 0.0854, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 7.750522648083624e-05, |
|
"loss": 0.0753, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 7.736585365853659e-05, |
|
"loss": 0.0869, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 7.722648083623694e-05, |
|
"loss": 0.071, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 7.708710801393729e-05, |
|
"loss": 0.083, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 7.694773519163764e-05, |
|
"loss": 0.0862, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.680836236933799e-05, |
|
"loss": 0.0798, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 7.666898954703834e-05, |
|
"loss": 0.0745, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.652961672473867e-05, |
|
"loss": 0.0716, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.639024390243902e-05, |
|
"loss": 0.0756, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 7.625087108013937e-05, |
|
"loss": 0.0764, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 7.611149825783972e-05, |
|
"loss": 0.0717, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 7.597212543554007e-05, |
|
"loss": 0.0862, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 7.583275261324042e-05, |
|
"loss": 0.0727, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 7.569337979094077e-05, |
|
"loss": 0.0776, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 7.555400696864112e-05, |
|
"loss": 0.0711, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.541463414634147e-05, |
|
"loss": 0.0844, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.527526132404182e-05, |
|
"loss": 0.0749, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 7.513588850174217e-05, |
|
"loss": 0.0901, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 7.499651567944252e-05, |
|
"loss": 0.0867, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.485714285714287e-05, |
|
"loss": 0.0806, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.471777003484322e-05, |
|
"loss": 0.0759, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.457839721254357e-05, |
|
"loss": 0.0758, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 7.443902439024392e-05, |
|
"loss": 0.0764, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 7.429965156794425e-05, |
|
"loss": 0.0731, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 7.41602787456446e-05, |
|
"loss": 0.0936, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 7.402090592334495e-05, |
|
"loss": 0.079, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 7.38815331010453e-05, |
|
"loss": 0.0739, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 7.374216027874565e-05, |
|
"loss": 0.0679, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 7.3602787456446e-05, |
|
"loss": 0.0763, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 7.346341463414635e-05, |
|
"loss": 0.0821, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 7.33240418118467e-05, |
|
"loss": 0.0831, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 7.318466898954704e-05, |
|
"loss": 0.0721, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 7.304529616724739e-05, |
|
"loss": 0.061, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 7.290592334494774e-05, |
|
"loss": 0.0723, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 7.276655052264809e-05, |
|
"loss": 0.0759, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 7.262717770034844e-05, |
|
"loss": 0.0745, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 7.248780487804878e-05, |
|
"loss": 0.073, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 7.234843205574913e-05, |
|
"loss": 0.085, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 7.220905923344948e-05, |
|
"loss": 0.0754, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 7.206968641114983e-05, |
|
"loss": 0.0784, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 7.193031358885018e-05, |
|
"loss": 0.0741, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.179094076655053e-05, |
|
"loss": 0.0813, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.165156794425088e-05, |
|
"loss": 0.0779, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.151219512195123e-05, |
|
"loss": 0.0829, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.137282229965158e-05, |
|
"loss": 0.0776, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.123344947735193e-05, |
|
"loss": 0.0711, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.109407665505227e-05, |
|
"loss": 0.0801, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.095470383275262e-05, |
|
"loss": 0.0742, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.081533101045297e-05, |
|
"loss": 0.0772, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.067595818815332e-05, |
|
"loss": 0.0777, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.053658536585367e-05, |
|
"loss": 0.0772, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.039721254355402e-05, |
|
"loss": 0.0644, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.025783972125437e-05, |
|
"loss": 0.0716, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.011846689895471e-05, |
|
"loss": 0.0669, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.997909407665505e-05, |
|
"loss": 0.0757, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.98397212543554e-05, |
|
"loss": 0.083, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.970034843205575e-05, |
|
"loss": 0.0784, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 6.95609756097561e-05, |
|
"loss": 0.0675, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.942160278745645e-05, |
|
"loss": 0.0622, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.92822299651568e-05, |
|
"loss": 0.0714, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.914285714285715e-05, |
|
"loss": 0.0737, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.90034843205575e-05, |
|
"loss": 0.0706, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.886411149825785e-05, |
|
"loss": 0.0749, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.87247386759582e-05, |
|
"loss": 0.0644, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.858536585365855e-05, |
|
"loss": 0.0774, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.84459930313589e-05, |
|
"loss": 0.079, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.830662020905925e-05, |
|
"loss": 0.0896, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.81672473867596e-05, |
|
"loss": 0.0886, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 6.802787456445995e-05, |
|
"loss": 0.0731, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 6.788850174216028e-05, |
|
"loss": 0.0687, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 6.774912891986063e-05, |
|
"loss": 0.0705, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 6.760975609756098e-05, |
|
"loss": 0.0762, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 6.747038327526133e-05, |
|
"loss": 0.0674, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 6.733101045296168e-05, |
|
"loss": 0.0877, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 6.719163763066203e-05, |
|
"loss": 0.0728, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 6.705226480836238e-05, |
|
"loss": 0.0625, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 6.691289198606273e-05, |
|
"loss": 0.0773, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 6.677351916376307e-05, |
|
"loss": 0.0721, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 6.663414634146341e-05, |
|
"loss": 0.0921, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 6.649477351916376e-05, |
|
"loss": 0.0798, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 6.635540069686411e-05, |
|
"loss": 0.0702, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.621602787456446e-05, |
|
"loss": 0.0724, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.607665505226481e-05, |
|
"loss": 0.0751, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.593728222996516e-05, |
|
"loss": 0.0705, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.579790940766551e-05, |
|
"loss": 0.0758, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.565853658536585e-05, |
|
"loss": 0.0637, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.55191637630662e-05, |
|
"loss": 0.069, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.537979094076655e-05, |
|
"loss": 0.0844, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.52404181184669e-05, |
|
"loss": 0.0705, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.510104529616725e-05, |
|
"loss": 0.0727, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 6.49616724738676e-05, |
|
"loss": 0.08, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 6.482229965156795e-05, |
|
"loss": 0.0821, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 6.46829268292683e-05, |
|
"loss": 0.0841, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 6.454355400696865e-05, |
|
"loss": 0.069, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 6.4404181184669e-05, |
|
"loss": 0.0604, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 6.426480836236935e-05, |
|
"loss": 0.0915, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.41254355400697e-05, |
|
"loss": 0.0744, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 6.398606271777004e-05, |
|
"loss": 0.0603, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.38466898954704e-05, |
|
"loss": 0.0601, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 6.370731707317074e-05, |
|
"loss": 0.07, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.356794425087109e-05, |
|
"loss": 0.0838, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.342857142857143e-05, |
|
"loss": 0.0712, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.328919860627178e-05, |
|
"loss": 0.0635, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.314982578397213e-05, |
|
"loss": 0.0764, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 6.301045296167248e-05, |
|
"loss": 0.0797, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.287108013937283e-05, |
|
"loss": 0.0761, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.273170731707318e-05, |
|
"loss": 0.0664, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.259233449477353e-05, |
|
"loss": 0.0617, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.245296167247386e-05, |
|
"loss": 0.0581, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.231358885017421e-05, |
|
"loss": 0.0637, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 6.217421602787456e-05, |
|
"loss": 0.0805, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.203484320557491e-05, |
|
"loss": 0.0857, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 6.189547038327526e-05, |
|
"loss": 0.0662, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.175609756097561e-05, |
|
"loss": 0.0682, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.161672473867596e-05, |
|
"loss": 0.0728, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.147735191637631e-05, |
|
"loss": 0.0679, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 6.133797909407666e-05, |
|
"loss": 0.0804, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.119860627177701e-05, |
|
"loss": 0.0797, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.105923344947736e-05, |
|
"loss": 0.0797, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.09198606271777e-05, |
|
"loss": 0.0853, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.078048780487805e-05, |
|
"loss": 0.0648, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 6.06411149825784e-05, |
|
"loss": 0.0687, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.050174216027875e-05, |
|
"loss": 0.0744, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6.03623693379791e-05, |
|
"loss": 0.077, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.0222996515679443e-05, |
|
"loss": 0.0731, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.008362369337979e-05, |
|
"loss": 0.0666, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.994425087108014e-05, |
|
"loss": 0.0796, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.980487804878049e-05, |
|
"loss": 0.066, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.966550522648084e-05, |
|
"loss": 0.0723, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 5.952613240418119e-05, |
|
"loss": 0.0904, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.938675958188154e-05, |
|
"loss": 0.0746, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.975273572806923, |
|
"eval_f1": 0.763480445233661, |
|
"eval_loss": 0.06570570170879364, |
|
"eval_precision": 0.7414822915790359, |
|
"eval_recall": 0.786823781467595, |
|
"eval_runtime": 9.3826, |
|
"eval_samples_per_second": 1509.169, |
|
"eval_steps_per_second": 188.646, |
|
"step": 2126 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.924738675958189e-05, |
|
"loss": 0.0887, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5.9108013937282234e-05, |
|
"loss": 0.0663, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 5.896864111498258e-05, |
|
"loss": 0.0568, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5.882926829268293e-05, |
|
"loss": 0.0628, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 5.868989547038328e-05, |
|
"loss": 0.069, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5.855052264808363e-05, |
|
"loss": 0.0542, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5.841114982578398e-05, |
|
"loss": 0.0443, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 5.827177700348433e-05, |
|
"loss": 0.0647, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5.813240418118467e-05, |
|
"loss": 0.0651, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5.799303135888502e-05, |
|
"loss": 0.0628, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5.785365853658537e-05, |
|
"loss": 0.0541, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5.7714285714285716e-05, |
|
"loss": 0.0544, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5.7574912891986066e-05, |
|
"loss": 0.0531, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5.7435540069686416e-05, |
|
"loss": 0.0514, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5.7296167247386765e-05, |
|
"loss": 0.0708, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5.7156794425087115e-05, |
|
"loss": 0.0546, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5.701742160278746e-05, |
|
"loss": 0.0528, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 5.687804878048781e-05, |
|
"loss": 0.0523, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5.673867595818816e-05, |
|
"loss": 0.0589, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 5.659930313588851e-05, |
|
"loss": 0.064, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5.6459930313588856e-05, |
|
"loss": 0.0543, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 5.6320557491289206e-05, |
|
"loss": 0.0647, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5.6181184668989555e-05, |
|
"loss": 0.0593, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5.6041811846689905e-05, |
|
"loss": 0.0573, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5.590243902439025e-05, |
|
"loss": 0.0594, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5.57630662020906e-05, |
|
"loss": 0.0624, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5.562369337979095e-05, |
|
"loss": 0.0592, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5.54843205574913e-05, |
|
"loss": 0.0561, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 5.5344947735191646e-05, |
|
"loss": 0.05, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5.520557491289199e-05, |
|
"loss": 0.0598, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 5.506620209059234e-05, |
|
"loss": 0.0661, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5.492682926829269e-05, |
|
"loss": 0.0556, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5.478745644599303e-05, |
|
"loss": 0.0486, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5.464808362369338e-05, |
|
"loss": 0.0561, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 5.450871080139373e-05, |
|
"loss": 0.0557, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5.436933797909408e-05, |
|
"loss": 0.0621, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 5.422996515679443e-05, |
|
"loss": 0.0558, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5.409059233449478e-05, |
|
"loss": 0.0566, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 5.395121951219513e-05, |
|
"loss": 0.0558, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5.381184668989547e-05, |
|
"loss": 0.0566, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5.367247386759582e-05, |
|
"loss": 0.0636, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5.353310104529617e-05, |
|
"loss": 0.0649, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5.339372822299652e-05, |
|
"loss": 0.0556, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 5.325435540069687e-05, |
|
"loss": 0.0495, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5.311498257839722e-05, |
|
"loss": 0.0656, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 5.297560975609757e-05, |
|
"loss": 0.0628, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5.283623693379792e-05, |
|
"loss": 0.0536, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5.2696864111498255e-05, |
|
"loss": 0.0608, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5.2557491289198605e-05, |
|
"loss": 0.0518, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 5.2418118466898955e-05, |
|
"loss": 0.0553, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5.2278745644599304e-05, |
|
"loss": 0.0563, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 5.2139372822299654e-05, |
|
"loss": 0.0582, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5.2000000000000004e-05, |
|
"loss": 0.0541, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5.186062717770035e-05, |
|
"loss": 0.0645, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5.17212543554007e-05, |
|
"loss": 0.0512, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5.1581881533101046e-05, |
|
"loss": 0.0539, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5.1442508710801395e-05, |
|
"loss": 0.0554, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5.1303135888501745e-05, |
|
"loss": 0.0637, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 5.1163763066202095e-05, |
|
"loss": 0.0523, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5.1024390243902444e-05, |
|
"loss": 0.0634, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 5.0885017421602794e-05, |
|
"loss": 0.0693, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5.0745644599303143e-05, |
|
"loss": 0.0637, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 5.060627177700349e-05, |
|
"loss": 0.0423, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.0466898954703836e-05, |
|
"loss": 0.0451, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5.0327526132404186e-05, |
|
"loss": 0.0669, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.0188153310104535e-05, |
|
"loss": 0.0602, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5.0048780487804885e-05, |
|
"loss": 0.0586, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.9909407665505234e-05, |
|
"loss": 0.0654, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.9770034843205584e-05, |
|
"loss": 0.0567, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.9630662020905934e-05, |
|
"loss": 0.058, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.949128919860627e-05, |
|
"loss": 0.0514, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.935191637630662e-05, |
|
"loss": 0.0544, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.921254355400697e-05, |
|
"loss": 0.0523, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.907317073170732e-05, |
|
"loss": 0.0521, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.893379790940767e-05, |
|
"loss": 0.0657, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.879442508710802e-05, |
|
"loss": 0.0611, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.865505226480837e-05, |
|
"loss": 0.0536, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.851567944250872e-05, |
|
"loss": 0.0504, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.837630662020906e-05, |
|
"loss": 0.0531, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.823693379790941e-05, |
|
"loss": 0.0676, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.809756097560976e-05, |
|
"loss": 0.0533, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.795818815331011e-05, |
|
"loss": 0.0457, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.781881533101046e-05, |
|
"loss": 0.063, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.767944250871081e-05, |
|
"loss": 0.0507, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.754006968641116e-05, |
|
"loss": 0.0658, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.740069686411151e-05, |
|
"loss": 0.0578, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.726132404181185e-05, |
|
"loss": 0.0601, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.71219512195122e-05, |
|
"loss": 0.0569, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.698257839721254e-05, |
|
"loss": 0.066, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.684320557491289e-05, |
|
"loss": 0.0556, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.670383275261324e-05, |
|
"loss": 0.0578, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.656445993031359e-05, |
|
"loss": 0.0572, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.642508710801394e-05, |
|
"loss": 0.0628, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.628571428571429e-05, |
|
"loss": 0.0564, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.6146341463414634e-05, |
|
"loss": 0.0457, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.600696864111498e-05, |
|
"loss": 0.0538, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.586759581881533e-05, |
|
"loss": 0.0597, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.572822299651568e-05, |
|
"loss": 0.0506, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.558885017421603e-05, |
|
"loss": 0.0539, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 4.544947735191638e-05, |
|
"loss": 0.056, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 4.531010452961673e-05, |
|
"loss": 0.0416, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 4.5170731707317074e-05, |
|
"loss": 0.0569, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 4.5031358885017424e-05, |
|
"loss": 0.0668, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.4891986062717773e-05, |
|
"loss": 0.0627, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.475261324041812e-05, |
|
"loss": 0.0613, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.461324041811847e-05, |
|
"loss": 0.0556, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.447386759581882e-05, |
|
"loss": 0.0473, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.433449477351917e-05, |
|
"loss": 0.0493, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.419512195121952e-05, |
|
"loss": 0.0475, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.405574912891986e-05, |
|
"loss": 0.0532, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.391637630662021e-05, |
|
"loss": 0.0551, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.377700348432056e-05, |
|
"loss": 0.0557, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.3637630662020907e-05, |
|
"loss": 0.0562, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.3498257839721256e-05, |
|
"loss": 0.0575, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.3358885017421606e-05, |
|
"loss": 0.0571, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.3219512195121955e-05, |
|
"loss": 0.0544, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.3080139372822305e-05, |
|
"loss": 0.0526, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.294076655052265e-05, |
|
"loss": 0.0545, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.2801393728223e-05, |
|
"loss": 0.059, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.266202090592335e-05, |
|
"loss": 0.0527, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.25226480836237e-05, |
|
"loss": 0.0646, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.2383275261324046e-05, |
|
"loss": 0.0547, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.2243902439024396e-05, |
|
"loss": 0.0725, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.2104529616724746e-05, |
|
"loss": 0.0542, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.1965156794425095e-05, |
|
"loss": 0.0594, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.182578397212544e-05, |
|
"loss": 0.0594, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.168641114982579e-05, |
|
"loss": 0.0599, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.154703832752614e-05, |
|
"loss": 0.0526, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.140766550522649e-05, |
|
"loss": 0.0634, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.1268292682926837e-05, |
|
"loss": 0.0509, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.1128919860627186e-05, |
|
"loss": 0.0652, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.0989547038327536e-05, |
|
"loss": 0.0543, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.085017421602787e-05, |
|
"loss": 0.0582, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.071080139372822e-05, |
|
"loss": 0.0591, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.057142857142857e-05, |
|
"loss": 0.0629, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.043205574912892e-05, |
|
"loss": 0.054, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.029268292682927e-05, |
|
"loss": 0.0525, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.015331010452962e-05, |
|
"loss": 0.0571, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.001393728222997e-05, |
|
"loss": 0.0512, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.987456445993032e-05, |
|
"loss": 0.0597, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.973519163763067e-05, |
|
"loss": 0.0662, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.959581881533102e-05, |
|
"loss": 0.0578, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.945644599303136e-05, |
|
"loss": 0.0546, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.931707317073171e-05, |
|
"loss": 0.0685, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.917770034843206e-05, |
|
"loss": 0.0612, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.903832752613241e-05, |
|
"loss": 0.0572, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.889895470383275e-05, |
|
"loss": 0.0495, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.87595818815331e-05, |
|
"loss": 0.0538, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.862020905923345e-05, |
|
"loss": 0.0629, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 3.84808362369338e-05, |
|
"loss": 0.0565, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.8341463414634145e-05, |
|
"loss": 0.0628, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.8202090592334494e-05, |
|
"loss": 0.0621, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.8062717770034844e-05, |
|
"loss": 0.0611, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.7923344947735194e-05, |
|
"loss": 0.0585, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.778397212543554e-05, |
|
"loss": 0.0689, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.764459930313589e-05, |
|
"loss": 0.0624, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.750522648083624e-05, |
|
"loss": 0.0624, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.736585365853659e-05, |
|
"loss": 0.0546, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.7226480836236935e-05, |
|
"loss": 0.0564, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.7087108013937285e-05, |
|
"loss": 0.0575, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.6947735191637634e-05, |
|
"loss": 0.0605, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.680836236933798e-05, |
|
"loss": 0.0608, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.666898954703833e-05, |
|
"loss": 0.0486, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.6529616724738676e-05, |
|
"loss": 0.0583, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.6390243902439026e-05, |
|
"loss": 0.0596, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.6250871080139376e-05, |
|
"loss": 0.0617, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.6111498257839725e-05, |
|
"loss": 0.0656, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.5972125435540075e-05, |
|
"loss": 0.0562, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.5832752613240425e-05, |
|
"loss": 0.0562, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.569337979094077e-05, |
|
"loss": 0.0562, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.555400696864112e-05, |
|
"loss": 0.0655, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.541463414634147e-05, |
|
"loss": 0.0572, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.5275261324041816e-05, |
|
"loss": 0.0548, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.513588850174216e-05, |
|
"loss": 0.0471, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.499651567944251e-05, |
|
"loss": 0.0545, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.485714285714286e-05, |
|
"loss": 0.0489, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.471777003484321e-05, |
|
"loss": 0.0627, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.457839721254356e-05, |
|
"loss": 0.0641, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.443902439024391e-05, |
|
"loss": 0.0538, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.429965156794426e-05, |
|
"loss": 0.0527, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.4160278745644606e-05, |
|
"loss": 0.0578, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.402090592334495e-05, |
|
"loss": 0.0588, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.38815331010453e-05, |
|
"loss": 0.0575, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.374216027874565e-05, |
|
"loss": 0.0562, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.3602787456446e-05, |
|
"loss": 0.0614, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.346341463414634e-05, |
|
"loss": 0.0539, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.332404181184669e-05, |
|
"loss": 0.0634, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.318466898954704e-05, |
|
"loss": 0.058, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.304529616724739e-05, |
|
"loss": 0.0525, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.290592334494774e-05, |
|
"loss": 0.0678, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.276655052264809e-05, |
|
"loss": 0.0558, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.262717770034844e-05, |
|
"loss": 0.0641, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.248780487804879e-05, |
|
"loss": 0.0637, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.234843205574913e-05, |
|
"loss": 0.0548, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.220905923344948e-05, |
|
"loss": 0.054, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.206968641114983e-05, |
|
"loss": 0.0529, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.193031358885017e-05, |
|
"loss": 0.0605, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.179094076655052e-05, |
|
"loss": 0.0469, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.165156794425087e-05, |
|
"loss": 0.0504, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.151219512195122e-05, |
|
"loss": 0.0587, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.137282229965157e-05, |
|
"loss": 0.0491, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.123344947735192e-05, |
|
"loss": 0.0603, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.1094076655052264e-05, |
|
"loss": 0.0505, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.0954703832752614e-05, |
|
"loss": 0.0613, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.0815331010452964e-05, |
|
"loss": 0.0484, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.067595818815331e-05, |
|
"loss": 0.0507, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.053658536585366e-05, |
|
"loss": 0.0636, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.0397212543554012e-05, |
|
"loss": 0.0628, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.0257839721254355e-05, |
|
"loss": 0.0582, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.0118466898954705e-05, |
|
"loss": 0.0682, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.9979094076655055e-05, |
|
"loss": 0.0553, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.9839721254355404e-05, |
|
"loss": 0.0603, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.970034843205575e-05, |
|
"loss": 0.0485, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9763942324979324, |
|
"eval_f1": 0.7764963823722867, |
|
"eval_loss": 0.0650782585144043, |
|
"eval_precision": 0.7630785141773679, |
|
"eval_recall": 0.7903945723977861, |
|
"eval_runtime": 9.7722, |
|
"eval_samples_per_second": 1449.015, |
|
"eval_steps_per_second": 181.127, |
|
"step": 4252 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.95609756097561e-05, |
|
"loss": 0.036, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.942160278745645e-05, |
|
"loss": 0.047, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 2.92822299651568e-05, |
|
"loss": 0.0401, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 2.9142857142857146e-05, |
|
"loss": 0.0395, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 2.9003484320557492e-05, |
|
"loss": 0.0369, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 2.886411149825784e-05, |
|
"loss": 0.0369, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 2.872473867595819e-05, |
|
"loss": 0.0435, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 2.8585365853658537e-05, |
|
"loss": 0.0417, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 2.8445993031358887e-05, |
|
"loss": 0.0366, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2.8306620209059237e-05, |
|
"loss": 0.0448, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2.8167247386759586e-05, |
|
"loss": 0.0352, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 2.8027874564459932e-05, |
|
"loss": 0.0424, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 2.7888501742160282e-05, |
|
"loss": 0.0412, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 2.774912891986063e-05, |
|
"loss": 0.0423, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 2.7609756097560974e-05, |
|
"loss": 0.0499, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 2.7470383275261324e-05, |
|
"loss": 0.0483, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.7331010452961674e-05, |
|
"loss": 0.0349, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.7191637630662023e-05, |
|
"loss": 0.0495, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 2.705226480836237e-05, |
|
"loss": 0.0428, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 2.691289198606272e-05, |
|
"loss": 0.0367, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.677351916376307e-05, |
|
"loss": 0.0421, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.663414634146342e-05, |
|
"loss": 0.0428, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.6494773519163765e-05, |
|
"loss": 0.0391, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.6355400696864114e-05, |
|
"loss": 0.048, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.6216027874564464e-05, |
|
"loss": 0.0451, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.6076655052264814e-05, |
|
"loss": 0.0491, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.5937282229965156e-05, |
|
"loss": 0.041, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.5797909407665506e-05, |
|
"loss": 0.0432, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.5658536585365856e-05, |
|
"loss": 0.0487, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.5519163763066205e-05, |
|
"loss": 0.0454, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.537979094076655e-05, |
|
"loss": 0.04, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 2.52404181184669e-05, |
|
"loss": 0.0453, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 2.510104529616725e-05, |
|
"loss": 0.0393, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.49616724738676e-05, |
|
"loss": 0.0463, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.4822299651567943e-05, |
|
"loss": 0.041, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.4682926829268293e-05, |
|
"loss": 0.0393, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.4543554006968642e-05, |
|
"loss": 0.0412, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.4404181184668992e-05, |
|
"loss": 0.0365, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.426480836236934e-05, |
|
"loss": 0.037, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.4125435540069688e-05, |
|
"loss": 0.0388, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.3986062717770038e-05, |
|
"loss": 0.0442, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.3846689895470387e-05, |
|
"loss": 0.0435, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.3707317073170733e-05, |
|
"loss": 0.0483, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.3567944250871083e-05, |
|
"loss": 0.0389, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.3428571428571433e-05, |
|
"loss": 0.0387, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.3289198606271776e-05, |
|
"loss": 0.0378, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.3149825783972125e-05, |
|
"loss": 0.0424, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.3010452961672475e-05, |
|
"loss": 0.0337, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.2871080139372824e-05, |
|
"loss": 0.0469, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.273170731707317e-05, |
|
"loss": 0.0428, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.259233449477352e-05, |
|
"loss": 0.0397, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.245296167247387e-05, |
|
"loss": 0.0442, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.231358885017422e-05, |
|
"loss": 0.0351, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.2174216027874566e-05, |
|
"loss": 0.0466, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.2034843205574915e-05, |
|
"loss": 0.0435, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.1895470383275265e-05, |
|
"loss": 0.0455, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.1756097560975615e-05, |
|
"loss": 0.0385, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.1616724738675958e-05, |
|
"loss": 0.041, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.1477351916376307e-05, |
|
"loss": 0.0425, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.1337979094076657e-05, |
|
"loss": 0.0427, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.1198606271777006e-05, |
|
"loss": 0.0401, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.1059233449477353e-05, |
|
"loss": 0.0427, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.0919860627177702e-05, |
|
"loss": 0.0373, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.0780487804878052e-05, |
|
"loss": 0.0403, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.06411149825784e-05, |
|
"loss": 0.037, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.0501742160278744e-05, |
|
"loss": 0.0435, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.0362369337979094e-05, |
|
"loss": 0.0349, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.0222996515679444e-05, |
|
"loss": 0.0418, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.0083623693379793e-05, |
|
"loss": 0.0492, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.9944250871080143e-05, |
|
"loss": 0.0405, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.980487804878049e-05, |
|
"loss": 0.0515, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.966550522648084e-05, |
|
"loss": 0.0413, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.9526132404181185e-05, |
|
"loss": 0.0357, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.9386759581881535e-05, |
|
"loss": 0.0363, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.9247386759581884e-05, |
|
"loss": 0.0401, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.910801393728223e-05, |
|
"loss": 0.0364, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.896864111498258e-05, |
|
"loss": 0.0429, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.8829268292682926e-05, |
|
"loss": 0.0499, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.8689895470383276e-05, |
|
"loss": 0.0425, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.8550522648083626e-05, |
|
"loss": 0.0389, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.8411149825783975e-05, |
|
"loss": 0.04, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.827177700348432e-05, |
|
"loss": 0.0413, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.813240418118467e-05, |
|
"loss": 0.0345, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.7993031358885017e-05, |
|
"loss": 0.0431, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.7853658536585367e-05, |
|
"loss": 0.0444, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.7714285714285717e-05, |
|
"loss": 0.0381, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.7574912891986066e-05, |
|
"loss": 0.0337, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 1.7435540069686412e-05, |
|
"loss": 0.0412, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.7296167247386762e-05, |
|
"loss": 0.0377, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.7156794425087108e-05, |
|
"loss": 0.0359, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.7017421602787458e-05, |
|
"loss": 0.0317, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.6878048780487804e-05, |
|
"loss": 0.0407, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.6738675958188154e-05, |
|
"loss": 0.0441, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.6599303135888503e-05, |
|
"loss": 0.0439, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.6459930313588853e-05, |
|
"loss": 0.0426, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 1.63205574912892e-05, |
|
"loss": 0.035, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.618118466898955e-05, |
|
"loss": 0.0357, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.6041811846689895e-05, |
|
"loss": 0.0416, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.5902439024390245e-05, |
|
"loss": 0.0358, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.5763066202090594e-05, |
|
"loss": 0.0389, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.5623693379790944e-05, |
|
"loss": 0.0366, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.548432055749129e-05, |
|
"loss": 0.0352, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.534494773519164e-05, |
|
"loss": 0.0364, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.5205574912891988e-05, |
|
"loss": 0.0408, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.5066202090592337e-05, |
|
"loss": 0.0426, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.4926829268292684e-05, |
|
"loss": 0.0322, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.4787456445993033e-05, |
|
"loss": 0.0418, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.4648083623693381e-05, |
|
"loss": 0.0438, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 1.4508710801393729e-05, |
|
"loss": 0.0326, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.4369337979094079e-05, |
|
"loss": 0.0424, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.4229965156794425e-05, |
|
"loss": 0.0478, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.4090592334494775e-05, |
|
"loss": 0.0398, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.3951219512195122e-05, |
|
"loss": 0.0462, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.3811846689895472e-05, |
|
"loss": 0.0416, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.3672473867595818e-05, |
|
"loss": 0.0468, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.3533101045296168e-05, |
|
"loss": 0.0408, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.3393728222996516e-05, |
|
"loss": 0.0394, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.3254355400696866e-05, |
|
"loss": 0.0363, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.3114982578397213e-05, |
|
"loss": 0.0369, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.2975609756097563e-05, |
|
"loss": 0.0458, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.283623693379791e-05, |
|
"loss": 0.0415, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.2696864111498259e-05, |
|
"loss": 0.0407, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.2557491289198607e-05, |
|
"loss": 0.0406, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 1.2418118466898957e-05, |
|
"loss": 0.0427, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.2278745644599304e-05, |
|
"loss": 0.0405, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 1.2139372822299652e-05, |
|
"loss": 0.0429, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.0337, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.186062717770035e-05, |
|
"loss": 0.0276, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.1721254355400698e-05, |
|
"loss": 0.0341, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.1581881533101047e-05, |
|
"loss": 0.0426, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.1442508710801394e-05, |
|
"loss": 0.0358, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.1303135888501743e-05, |
|
"loss": 0.0515, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.1163763066202091e-05, |
|
"loss": 0.0491, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.1024390243902441e-05, |
|
"loss": 0.046, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.0885017421602789e-05, |
|
"loss": 0.0372, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.0745644599303138e-05, |
|
"loss": 0.0364, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 1.0606271777003485e-05, |
|
"loss": 0.0441, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.0466898954703834e-05, |
|
"loss": 0.0431, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.0327526132404182e-05, |
|
"loss": 0.0351, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.018815331010453e-05, |
|
"loss": 0.0389, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 1.0048780487804878e-05, |
|
"loss": 0.0359, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 9.909407665505228e-06, |
|
"loss": 0.0343, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 9.770034843205576e-06, |
|
"loss": 0.0454, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 9.630662020905924e-06, |
|
"loss": 0.0407, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 9.491289198606273e-06, |
|
"loss": 0.0387, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 9.351916376306621e-06, |
|
"loss": 0.0355, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 9.212543554006969e-06, |
|
"loss": 0.0371, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 9.073170731707319e-06, |
|
"loss": 0.0516, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 8.933797909407667e-06, |
|
"loss": 0.0467, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 8.794425087108015e-06, |
|
"loss": 0.0438, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 8.655052264808364e-06, |
|
"loss": 0.039, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 8.515679442508712e-06, |
|
"loss": 0.0376, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 8.37630662020906e-06, |
|
"loss": 0.0346, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 8.23693379790941e-06, |
|
"loss": 0.0446, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 8.097560975609758e-06, |
|
"loss": 0.0375, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 7.958188153310104e-06, |
|
"loss": 0.0334, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 7.818815331010453e-06, |
|
"loss": 0.039, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 7.679442508710801e-06, |
|
"loss": 0.0464, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 7.54006968641115e-06, |
|
"loss": 0.0402, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 7.400696864111498e-06, |
|
"loss": 0.0441, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 7.261324041811847e-06, |
|
"loss": 0.0418, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 7.121951219512196e-06, |
|
"loss": 0.0405, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 6.982578397212544e-06, |
|
"loss": 0.0365, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 6.843205574912892e-06, |
|
"loss": 0.047, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 6.703832752613241e-06, |
|
"loss": 0.0391, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 6.564459930313589e-06, |
|
"loss": 0.0359, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 6.425087108013938e-06, |
|
"loss": 0.0468, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 6.285714285714286e-06, |
|
"loss": 0.045, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 6.1463414634146346e-06, |
|
"loss": 0.0398, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 6.006968641114983e-06, |
|
"loss": 0.0477, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 5.867595818815331e-06, |
|
"loss": 0.0405, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5.72822299651568e-06, |
|
"loss": 0.0407, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5.588850174216028e-06, |
|
"loss": 0.0434, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 5.449477351916377e-06, |
|
"loss": 0.0391, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 5.3101045296167255e-06, |
|
"loss": 0.0359, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 5.1707317073170735e-06, |
|
"loss": 0.0455, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 5.031358885017422e-06, |
|
"loss": 0.0425, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 4.891986062717771e-06, |
|
"loss": 0.0372, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 4.752613240418119e-06, |
|
"loss": 0.0408, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 4.613240418118467e-06, |
|
"loss": 0.0373, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 4.473867595818816e-06, |
|
"loss": 0.0361, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 4.334494773519164e-06, |
|
"loss": 0.0331, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 4.195121951219512e-06, |
|
"loss": 0.0397, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 4.055749128919861e-06, |
|
"loss": 0.0391, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 3.916376306620209e-06, |
|
"loss": 0.0272, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 3.777003484320558e-06, |
|
"loss": 0.0446, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 3.6376306620209062e-06, |
|
"loss": 0.0364, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 3.4982578397212546e-06, |
|
"loss": 0.0401, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.358885017421603e-06, |
|
"loss": 0.033, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.2195121951219517e-06, |
|
"loss": 0.0523, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.0801393728223e-06, |
|
"loss": 0.0438, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 2.9407665505226484e-06, |
|
"loss": 0.0379, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 2.8013937282229968e-06, |
|
"loss": 0.0375, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 2.6620209059233455e-06, |
|
"loss": 0.0445, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.522648083623694e-06, |
|
"loss": 0.038, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.383275261324042e-06, |
|
"loss": 0.0418, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.2439024390243906e-06, |
|
"loss": 0.0408, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.104529616724739e-06, |
|
"loss": 0.0414, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.9651567944250873e-06, |
|
"loss": 0.0429, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.8257839721254357e-06, |
|
"loss": 0.0365, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.686411149825784e-06, |
|
"loss": 0.0366, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.5470383275261324e-06, |
|
"loss": 0.0431, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.407665505226481e-06, |
|
"loss": 0.0355, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.2682926829268293e-06, |
|
"loss": 0.0409, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.1289198606271779e-06, |
|
"loss": 0.0332, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 9.895470383275262e-07, |
|
"loss": 0.0345, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 8.501742160278746e-07, |
|
"loss": 0.036, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 7.10801393728223e-07, |
|
"loss": 0.0466, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 5.714285714285715e-07, |
|
"loss": 0.0482, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 4.320557491289199e-07, |
|
"loss": 0.0406, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.926829268292683e-07, |
|
"loss": 0.0369, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.5331010452961674e-07, |
|
"loss": 0.044, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9766189637193916, |
|
"eval_f1": 0.780249736194161, |
|
"eval_loss": 0.07066146284341812, |
|
"eval_precision": 0.7687575810084907, |
|
"eval_recall": 0.7920906980896268, |
|
"eval_runtime": 9.5779, |
|
"eval_samples_per_second": 1478.396, |
|
"eval_steps_per_second": 184.799, |
|
"step": 6378 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 6378, |
|
"total_flos": 6259843525582944.0, |
|
"train_loss": 0.06725984670200778, |
|
"train_runtime": 305.6957, |
|
"train_samples_per_second": 1001.172, |
|
"train_steps_per_second": 20.864 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 6378, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 6259843525582944.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|