diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,21950 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 18257, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.00019999999837529454, + "loss": 1.1032, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999995938236562, + "loss": 1.0664, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999983752949535, + "loss": 1.0333, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999963444148814, + "loss": 1.1211, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999935011850908, + "loss": 1.1523, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999989845607891, + "loss": 1.0734, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999853776862518, + "loss": 1.1928, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999800974238022, + "loss": 1.1473, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999740048248324, + "loss": 1.1761, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999670998942912, + "loss": 1.036, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999593826377883, + "loss": 1.1627, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999950853061592, + "loss": 1.086, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999941511172632, + "loss": 1.0676, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999313569784967, + "loss": 1.1365, + "step": 65 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019999203904874353, + "loss": 1.1211, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001999908611708357, + "loss": 1.043, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019998960206508287, + "loss": 1.0945, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019998826173250802, + "loss": 1.129, + "step": 85 + }, + { + "epoch": 0.0, + "learning_rate": 0.00019998684017419997, + "loss": 1.1267, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998533739131342, + "loss": 1.1279, + "step": 95 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998375338506927, + "loss": 1.1077, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019998208815675427, + "loss": 1.0842, + "step": 105 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999803417077211, + "loss": 1.1657, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999785140393886, + "loss": 1.0209, + "step": 115 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999766051532414, + "loss": 1.1343, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019997461505083023, + "loss": 1.1039, + "step": 125 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999725437337717, + "loss": 1.1007, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999703912037485, + "loss": 1.1289, + "step": 135 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019996815746250925, + "loss": 1.1169, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019996584251186846, + "loss": 1.1366, + "step": 145 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019996344635370684, + "loss": 1.0876, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019996096898997078, + "loss": 1.1273, + "step": 155 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019995841042267284, + "loss": 1.1308, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999557706538914, + "loss": 1.1608, + "step": 165 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019995304968577093, + "loss": 1.2318, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019995024752052186, + "loss": 1.1642, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019994736416042048, + "loss": 1.1404, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999443996078091, + "loss": 1.0635, + "step": 185 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019994135386509603, + "loss": 1.1754, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019993822693475542, + "loss": 1.1087, + "step": 195 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019993501881932746, + "loss": 1.0764, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999317295214183, + "loss": 1.0868, + "step": 205 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019992835904369998, + "loss": 1.0977, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019992490738891057, + "loss": 1.1653, + "step": 215 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019992137455985396, + "loss": 1.1657, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999177605594001, + "loss": 1.1186, + "step": 225 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001999140653904848, + "loss": 1.1123, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019991028905610986, + "loss": 1.1111, + "step": 235 + }, + { + "epoch": 0.01, + "learning_rate": 0.000199906431559343, + "loss": 1.1037, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019990249290331784, + "loss": 1.1314, + "step": 245 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019989847309123397, + "loss": 1.0606, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019989437212635693, + "loss": 1.0934, + "step": 255 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019989019001201815, + "loss": 1.0947, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019988592675161492, + "loss": 1.0364, + "step": 265 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019988158234861058, + "loss": 1.1504, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001998771568065343, + "loss": 1.2071, + "step": 275 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001998726501289811, + "loss": 1.1213, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019986806231961208, + "loss": 1.1015, + "step": 285 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019986339338215416, + "loss": 1.0587, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019985864332040012, + "loss": 1.0896, + "step": 295 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019985381213820876, + "loss": 1.2225, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001998488998395046, + "loss": 1.1455, + "step": 305 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001998439064282782, + "loss": 1.1164, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019983883190858602, + "loss": 1.1763, + "step": 315 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001998336762845503, + "loss": 1.1246, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001998284395603592, + "loss": 1.091, + "step": 325 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001998231217402668, + "loss": 1.1619, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019981772282859315, + "loss": 1.1208, + "step": 335 + }, + { + "epoch": 0.02, + "learning_rate": 0.000199812242829724, + "loss": 1.1494, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019980668174811103, + "loss": 1.0512, + "step": 345 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001998010395882718, + "loss": 1.0742, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019979531635478978, + "loss": 1.1415, + "step": 355 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001997895120523142, + "loss": 1.0945, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001997836266855602, + "loss": 1.1632, + "step": 365 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019977766025930878, + "loss": 1.0635, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001997716127784068, + "loss": 1.0715, + "step": 375 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001997654842477669, + "loss": 1.0331, + "step": 380 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001997592746723677, + "loss": 1.0083, + "step": 385 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001997529840572535, + "loss": 1.0541, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019974661240753448, + "loss": 1.1628, + "step": 395 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019974015972838678, + "loss": 1.1128, + "step": 400 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019973362602505212, + "loss": 1.0925, + "step": 405 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019972701130283822, + "loss": 1.1262, + "step": 410 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019972031556711862, + "loss": 1.1006, + "step": 415 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001997135388233325, + "loss": 1.0861, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001997066810769851, + "loss": 1.1397, + "step": 425 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019969974233364723, + "loss": 1.1535, + "step": 430 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019969272259895565, + "loss": 1.1099, + "step": 435 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019968562187861277, + "loss": 1.1154, + "step": 440 + }, + { + "epoch": 0.02, + "learning_rate": 0.000199678440178387, + "loss": 1.0709, + "step": 445 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019967117750411233, + "loss": 1.1128, + "step": 450 + }, + { + "epoch": 0.02, + "learning_rate": 0.00019966383386168867, + "loss": 1.134, + "step": 455 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019965640925708158, + "loss": 1.0992, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019964890369632255, + "loss": 1.0872, + "step": 465 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019964131718550873, + "loss": 1.0695, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 0.000199633649730803, + "loss": 1.1137, + "step": 475 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019962590133843397, + "loss": 1.1437, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001996180720146962, + "loss": 1.0785, + "step": 485 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019961016176594978, + "loss": 1.0316, + "step": 490 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019960217059862063, + "loss": 1.1335, + "step": 495 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019959409851920043, + "loss": 1.0676, + "step": 500 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019958594553424658, + "loss": 1.0861, + "step": 505 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001995777116503821, + "loss": 1.1432, + "step": 510 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019956939687429582, + "loss": 1.0515, + "step": 515 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019956100121274232, + "loss": 1.1532, + "step": 520 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001995525246725418, + "loss": 1.1023, + "step": 525 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001995439672605802, + "loss": 1.0932, + "step": 530 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001995353289838092, + "loss": 1.1421, + "step": 535 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019952660984924608, + "loss": 1.2585, + "step": 540 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019951780986397392, + "loss": 1.1386, + "step": 545 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019950892903514129, + "loss": 1.152, + "step": 550 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019949996736996265, + "loss": 1.0659, + "step": 555 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019949092487571798, + "loss": 1.0348, + "step": 560 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019948180155975303, + "loss": 1.1131, + "step": 565 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019947259742947911, + "loss": 1.1788, + "step": 570 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019946331249237327, + "loss": 1.064, + "step": 575 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019945394675597807, + "loss": 1.202, + "step": 580 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019944450022790184, + "loss": 1.124, + "step": 585 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019943497291581845, + "loss": 1.1475, + "step": 590 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001994253648274675, + "loss": 1.0344, + "step": 595 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019941567597065414, + "loss": 1.1871, + "step": 600 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019940590635324915, + "loss": 1.0982, + "step": 605 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019939605598318888, + "loss": 1.0975, + "step": 610 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001993861248684753, + "loss": 1.1043, + "step": 615 + }, + { + "epoch": 0.03, + "learning_rate": 0.000199376113017176, + "loss": 1.131, + "step": 620 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019936602043742408, + "loss": 1.0508, + "step": 625 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019935584713741826, + "loss": 1.175, + "step": 630 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019934559312542296, + "loss": 1.1795, + "step": 635 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019933525840976795, + "loss": 1.1411, + "step": 640 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001993248429988487, + "loss": 1.1145, + "step": 645 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019931434690112626, + "loss": 1.1791, + "step": 650 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019930377012512707, + "loss": 1.1538, + "step": 655 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019929311267944324, + "loss": 1.0565, + "step": 660 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001992823745727324, + "loss": 1.1262, + "step": 665 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019927155581371767, + "loss": 1.088, + "step": 670 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019926065641118763, + "loss": 1.1132, + "step": 675 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001992496763739965, + "loss": 1.1825, + "step": 680 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019923861571106397, + "loss": 1.2013, + "step": 685 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001992274744313751, + "loss": 1.1262, + "step": 690 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001992162525439806, + "loss": 1.0849, + "step": 695 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019920495005799663, + "loss": 1.1018, + "step": 700 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001991935669826048, + "loss": 1.1481, + "step": 705 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019918210332705213, + "loss": 1.1132, + "step": 710 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019917055910065117, + "loss": 1.1347, + "step": 715 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019915893431277987, + "loss": 1.0726, + "step": 720 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001991472289728817, + "loss": 1.1456, + "step": 725 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019913544309046555, + "loss": 1.1092, + "step": 730 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019912357667510564, + "loss": 1.0654, + "step": 735 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019911162973644165, + "loss": 1.1094, + "step": 740 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001990996022841788, + "loss": 1.1564, + "step": 745 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019908749432808766, + "loss": 1.1808, + "step": 750 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019907530587800413, + "loss": 1.0844, + "step": 755 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019906303694382948, + "loss": 1.0718, + "step": 760 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019905068753553045, + "loss": 1.1844, + "step": 765 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001990382576631391, + "loss": 1.0922, + "step": 770 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019902574733675284, + "loss": 1.1269, + "step": 775 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019901315656653445, + "loss": 1.1092, + "step": 780 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001990004853627122, + "loss": 1.1223, + "step": 785 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019898773373557948, + "loss": 1.0795, + "step": 790 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001989749016954951, + "loss": 1.0622, + "step": 795 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019896198925288322, + "loss": 1.1183, + "step": 800 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001989489964182333, + "loss": 1.0611, + "step": 805 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019893592320210013, + "loss": 1.1477, + "step": 810 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019892276961510377, + "loss": 1.1472, + "step": 815 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019890953566792957, + "loss": 1.1411, + "step": 820 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001988962213713281, + "loss": 1.0948, + "step": 825 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001988828267361153, + "loss": 1.1414, + "step": 830 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001988693517731724, + "loss": 1.1161, + "step": 835 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019885579649344578, + "loss": 1.1123, + "step": 840 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019884216090794711, + "loss": 1.1128, + "step": 845 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019882844502775324, + "loss": 1.1177, + "step": 850 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019881464886400637, + "loss": 1.0724, + "step": 855 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019880077242791385, + "loss": 1.1243, + "step": 860 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019878681573074817, + "loss": 1.0985, + "step": 865 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019877277878384712, + "loss": 1.1704, + "step": 870 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019875866159861375, + "loss": 1.2201, + "step": 875 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019874446418651612, + "loss": 1.0622, + "step": 880 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001987301865590875, + "loss": 1.1618, + "step": 885 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001987158287279264, + "loss": 1.1283, + "step": 890 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019870139070469643, + "loss": 1.1129, + "step": 895 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019868687250112636, + "loss": 1.1868, + "step": 900 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019867227412901007, + "loss": 1.119, + "step": 905 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001986575956002066, + "loss": 1.1817, + "step": 910 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019864283692664015, + "loss": 1.1666, + "step": 915 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001986279981202999, + "loss": 1.0744, + "step": 920 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019861307919324023, + "loss": 1.1244, + "step": 925 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001985980801575805, + "loss": 1.0783, + "step": 930 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001985830010255053, + "loss": 1.1358, + "step": 935 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019856784180926417, + "loss": 1.1039, + "step": 940 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001985526025211718, + "loss": 1.1694, + "step": 945 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019853728317360773, + "loss": 1.2141, + "step": 950 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019852188377901677, + "loss": 1.1452, + "step": 955 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019850640434990869, + "loss": 1.0506, + "step": 960 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019849084489885813, + "loss": 1.0857, + "step": 965 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019847520543850498, + "loss": 1.1396, + "step": 970 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019845948598155391, + "loss": 1.1202, + "step": 975 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019844368654077467, + "loss": 1.112, + "step": 980 + }, + { + "epoch": 0.05, + "learning_rate": 0.000198427807129002, + "loss": 1.0507, + "step": 985 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001984118477591356, + "loss": 1.1932, + "step": 990 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001983958084441401, + "loss": 1.0773, + "step": 995 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019837968919704502, + "loss": 1.1981, + "step": 1000 + }, + { + "epoch": 0.06, + "learning_rate": 0.000198363490030945, + "loss": 1.0932, + "step": 1005 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001983472109589993, + "loss": 1.0799, + "step": 1010 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019833085199443245, + "loss": 1.0515, + "step": 1015 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019831441315053359, + "loss": 1.0079, + "step": 1020 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019829789444065696, + "loss": 1.1045, + "step": 1025 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019828129587822144, + "loss": 1.1379, + "step": 1030 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019826461747671098, + "loss": 1.0673, + "step": 1035 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019824785924967432, + "loss": 1.1513, + "step": 1040 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019823102121072506, + "loss": 1.0987, + "step": 1045 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019821410337354168, + "loss": 1.0806, + "step": 1050 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001981971057518673, + "loss": 1.075, + "step": 1055 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019818002835951006, + "loss": 1.2132, + "step": 1060 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019816287121034287, + "loss": 1.1387, + "step": 1065 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001981456343183033, + "loss": 1.0743, + "step": 1070 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019812831769739383, + "loss": 1.1505, + "step": 1075 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019811092136168162, + "loss": 1.1212, + "step": 1080 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019809344532529865, + "loss": 1.0111, + "step": 1085 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001980758896024417, + "loss": 1.0533, + "step": 1090 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019805825420737215, + "loss": 1.1439, + "step": 1095 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019804053915441615, + "loss": 1.0587, + "step": 1100 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019802274445796458, + "loss": 1.1231, + "step": 1105 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019800487013247302, + "loss": 1.0135, + "step": 1110 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019798691619246173, + "loss": 1.05, + "step": 1115 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019796888265251558, + "loss": 1.0607, + "step": 1120 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019795076952728425, + "loss": 1.1267, + "step": 1125 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019793257683148194, + "loss": 1.1437, + "step": 1130 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001979143045798876, + "loss": 1.1138, + "step": 1135 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019789595278734464, + "loss": 1.1417, + "step": 1140 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019787752146876127, + "loss": 1.0941, + "step": 1145 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019785901063911016, + "loss": 1.0948, + "step": 1150 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001978404203134287, + "loss": 1.0978, + "step": 1155 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019782175050681865, + "loss": 1.1043, + "step": 1160 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019780300123444663, + "loss": 1.128, + "step": 1165 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001977841725115436, + "loss": 1.1261, + "step": 1170 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019776526435340516, + "loss": 1.0692, + "step": 1175 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019774627677539137, + "loss": 1.0835, + "step": 1180 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019772720979292685, + "loss": 1.1175, + "step": 1185 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019770806342150066, + "loss": 1.1298, + "step": 1190 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001976888376766665, + "loss": 1.1589, + "step": 1195 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001976695325740425, + "loss": 1.1263, + "step": 1200 + }, + { + "epoch": 0.07, + "learning_rate": 0.000197650148129311, + "loss": 1.227, + "step": 1205 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019763068435821917, + "loss": 1.1143, + "step": 1210 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019761114127657842, + "loss": 1.1257, + "step": 1215 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019759151890026463, + "loss": 1.1668, + "step": 1220 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001975718172452181, + "loss": 1.0921, + "step": 1225 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019755203632744353, + "loss": 1.1107, + "step": 1230 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019753217616300995, + "loss": 1.0978, + "step": 1235 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019751223676805083, + "loss": 1.118, + "step": 1240 + }, + { + "epoch": 0.07, + "learning_rate": 0.000197492218158764, + "loss": 1.1295, + "step": 1245 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001974721203514117, + "loss": 1.1201, + "step": 1250 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019745194336232038, + "loss": 1.0858, + "step": 1255 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001974316872078808, + "loss": 1.1888, + "step": 1260 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019741135190454818, + "loss": 1.13, + "step": 1265 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019739093746884195, + "loss": 1.092, + "step": 1270 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019737044391734585, + "loss": 1.2147, + "step": 1275 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001973498712667078, + "loss": 1.2387, + "step": 1280 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001973292195336401, + "loss": 1.0601, + "step": 1285 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019730848873491925, + "loss": 1.0656, + "step": 1290 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019728767888738602, + "loss": 1.1525, + "step": 1295 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019726679000794524, + "loss": 1.1181, + "step": 1300 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001972458221135661, + "loss": 1.043, + "step": 1305 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001972247752212819, + "loss": 1.1091, + "step": 1310 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001972036493481902, + "loss": 1.057, + "step": 1315 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019718244451145256, + "loss": 1.0931, + "step": 1320 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001971611607282949, + "loss": 1.14, + "step": 1325 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019713979801600707, + "loss": 1.0144, + "step": 1330 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019711835639194317, + "loss": 1.1032, + "step": 1335 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019709683587352134, + "loss": 1.1165, + "step": 1340 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019707523647822386, + "loss": 1.1322, + "step": 1345 + }, + { + "epoch": 0.07, + "learning_rate": 0.000197053558223597, + "loss": 1.1617, + "step": 1350 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019703180112725123, + "loss": 1.1173, + "step": 1355 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019700996520686089, + "loss": 1.1341, + "step": 1360 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019698805048016453, + "loss": 1.0231, + "step": 1365 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019696605696496464, + "loss": 1.1765, + "step": 1370 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019694398467912767, + "loss": 1.0667, + "step": 1375 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001969218336405841, + "loss": 1.182, + "step": 1380 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019689960386732837, + "loss": 1.1106, + "step": 1385 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019687729537741893, + "loss": 1.1577, + "step": 1390 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019685490818897817, + "loss": 1.0872, + "step": 1395 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001968324423201923, + "loss": 1.0934, + "step": 1400 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019680989778931155, + "loss": 1.0749, + "step": 1405 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019678727461465014, + "loss": 1.1183, + "step": 1410 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001967645728145859, + "loss": 1.1155, + "step": 1415 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001967417924075608, + "loss": 1.0706, + "step": 1420 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019671893341208053, + "loss": 1.1589, + "step": 1425 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019669599584671467, + "loss": 1.0504, + "step": 1430 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019667297973009664, + "loss": 1.1075, + "step": 1435 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019664988508092356, + "loss": 1.1005, + "step": 1440 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001966267119179565, + "loss": 1.1036, + "step": 1445 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019660346026002026, + "loss": 1.0551, + "step": 1450 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019658013012600342, + "loss": 1.0745, + "step": 1455 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019655672153485817, + "loss": 1.0538, + "step": 1460 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001965332345056006, + "loss": 1.1155, + "step": 1465 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019650966905731043, + "loss": 1.1895, + "step": 1470 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019648602520913112, + "loss": 1.1041, + "step": 1475 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019646230298026984, + "loss": 1.0371, + "step": 1480 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001964385023899974, + "loss": 1.1189, + "step": 1485 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019641462345764825, + "loss": 1.1471, + "step": 1490 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019639066620262053, + "loss": 1.1409, + "step": 1495 + }, + { + "epoch": 0.08, + "learning_rate": 0.000196366630644376, + "loss": 1.0641, + "step": 1500 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001963425168024399, + "loss": 1.1547, + "step": 1505 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001963183246964013, + "loss": 1.0744, + "step": 1510 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019629405434591269, + "loss": 1.1013, + "step": 1515 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019626970577069013, + "loss": 1.1103, + "step": 1520 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019624527899051323, + "loss": 1.1574, + "step": 1525 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019622077402522522, + "loss": 1.0905, + "step": 1530 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019619619089473276, + "loss": 1.0994, + "step": 1535 + }, + { + "epoch": 0.08, + "learning_rate": 0.000196171529619006, + "loss": 1.1846, + "step": 1540 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001961467902180786, + "loss": 1.1078, + "step": 1545 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019612197271204763, + "loss": 1.1833, + "step": 1550 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019609707712107372, + "loss": 1.1436, + "step": 1555 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001960721034653808, + "loss": 1.0994, + "step": 1560 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019604705176525642, + "loss": 1.1771, + "step": 1565 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019602192204105123, + "loss": 1.2116, + "step": 1570 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019599671431317958, + "loss": 1.0557, + "step": 1575 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019597142860211894, + "loss": 1.1689, + "step": 1580 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001959460649284102, + "loss": 1.0702, + "step": 1585 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001959206233126577, + "loss": 1.1284, + "step": 1590 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019589510377552902, + "loss": 1.1074, + "step": 1595 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019586950633775492, + "loss": 1.0759, + "step": 1600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001958438310201296, + "loss": 1.1689, + "step": 1605 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019581807784351047, + "loss": 1.0666, + "step": 1610 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001957922468288182, + "loss": 1.2082, + "step": 1615 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001957663379970367, + "loss": 1.0637, + "step": 1620 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019574035136921304, + "loss": 1.1707, + "step": 1625 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019571428696645756, + "loss": 1.166, + "step": 1630 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019568814480994373, + "loss": 1.1066, + "step": 1635 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019566192492090817, + "loss": 1.1559, + "step": 1640 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019563562732065071, + "loss": 1.1194, + "step": 1645 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019560925203053432, + "loss": 1.1467, + "step": 1650 + }, + { + "epoch": 0.09, + "learning_rate": 0.000195582799071985, + "loss": 1.1215, + "step": 1655 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019555626846649184, + "loss": 1.1997, + "step": 1660 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019552966023560713, + "loss": 1.2071, + "step": 1665 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019550297440094604, + "loss": 1.0758, + "step": 1670 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019547621098418697, + "loss": 1.1419, + "step": 1675 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019544937000707122, + "loss": 1.0925, + "step": 1680 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001954224514914031, + "loss": 1.0968, + "step": 1685 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019539545545904996, + "loss": 1.1292, + "step": 1690 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019536838193194217, + "loss": 1.0844, + "step": 1695 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019534123093207286, + "loss": 1.0658, + "step": 1700 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019531400248149828, + "loss": 1.0474, + "step": 1705 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019528669660233755, + "loss": 1.0425, + "step": 1710 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019525931331677265, + "loss": 1.1484, + "step": 1715 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019523185264704848, + "loss": 1.0311, + "step": 1720 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019520431461547276, + "loss": 1.0861, + "step": 1725 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019517669924441612, + "loss": 1.1524, + "step": 1730 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019514900655631187, + "loss": 1.0902, + "step": 1735 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019512123657365637, + "loss": 1.0677, + "step": 1740 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019509338931900861, + "loss": 1.1203, + "step": 1745 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019506546481499042, + "loss": 1.098, + "step": 1750 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019503746308428629, + "loss": 1.0712, + "step": 1755 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001950093841496435, + "loss": 1.0832, + "step": 1760 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019498122803387208, + "loss": 1.0931, + "step": 1765 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019495299475984467, + "loss": 1.0802, + "step": 1770 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019492468435049673, + "loss": 1.1396, + "step": 1775 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001948962968288262, + "loss": 1.1725, + "step": 1780 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019486783221789384, + "loss": 1.0901, + "step": 1785 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019483929054082296, + "loss": 1.0767, + "step": 1790 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019481067182079934, + "loss": 1.1499, + "step": 1795 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001947819760810716, + "loss": 1.0896, + "step": 1800 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019475320334495075, + "loss": 1.0615, + "step": 1805 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001947243536358104, + "loss": 1.0938, + "step": 1810 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019469542697708674, + "loss": 1.1678, + "step": 1815 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001946664233922784, + "loss": 1.0664, + "step": 1820 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019463734290494644, + "loss": 1.1349, + "step": 1825 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019460818553871458, + "loss": 1.1067, + "step": 1830 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019457895131726888, + "loss": 1.1333, + "step": 1835 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019454964026435775, + "loss": 1.0924, + "step": 1840 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019452025240379215, + "loss": 1.0811, + "step": 1845 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019449078775944537, + "loss": 1.1043, + "step": 1850 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001944612463552531, + "loss": 1.1101, + "step": 1855 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019443162821521342, + "loss": 1.218, + "step": 1860 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019440193336338674, + "loss": 1.1898, + "step": 1865 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019437216182389565, + "loss": 1.0866, + "step": 1870 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019434231362092515, + "loss": 1.0558, + "step": 1875 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019431238877872257, + "loss": 1.1855, + "step": 1880 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019428238732159737, + "loss": 1.099, + "step": 1885 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001942523092739214, + "loss": 1.165, + "step": 1890 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019422215466012856, + "loss": 1.1275, + "step": 1895 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019419192350471508, + "loss": 1.1306, + "step": 1900 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019416161583223928, + "loss": 1.1622, + "step": 1905 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019413123166732172, + "loss": 1.1309, + "step": 1910 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019410077103464501, + "loss": 1.1474, + "step": 1915 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019407023395895398, + "loss": 1.132, + "step": 1920 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001940396204650555, + "loss": 1.0644, + "step": 1925 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001940089305778185, + "loss": 1.142, + "step": 1930 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019397816432217398, + "loss": 1.0239, + "step": 1935 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019394732172311503, + "loss": 1.0686, + "step": 1940 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019391640280569668, + "loss": 1.1765, + "step": 1945 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019388540759503604, + "loss": 1.0821, + "step": 1950 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019385433611631214, + "loss": 1.1488, + "step": 1955 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019382318839476594, + "loss": 1.1158, + "step": 1960 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019379196445570032, + "loss": 1.099, + "step": 1965 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001937606643244803, + "loss": 1.1725, + "step": 1970 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019372928802653252, + "loss": 1.1067, + "step": 1975 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019369783558734565, + "loss": 1.1181, + "step": 1980 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001936663070324701, + "loss": 1.1236, + "step": 1985 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019363470238751825, + "loss": 1.0417, + "step": 1990 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001936030216781641, + "loss": 1.1227, + "step": 1995 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019357126493014368, + "loss": 1.1774, + "step": 2000 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019353943216925465, + "loss": 1.1386, + "step": 2005 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019350752342135638, + "loss": 1.1066, + "step": 2010 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019347553871237001, + "loss": 1.1344, + "step": 2015 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001934434780682785, + "loss": 1.1089, + "step": 2020 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001934113415151264, + "loss": 1.1433, + "step": 2025 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019337912907901978, + "loss": 1.1276, + "step": 2030 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019334684078612662, + "loss": 1.1633, + "step": 2035 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019331447666267644, + "loss": 1.1863, + "step": 2040 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019328203673496023, + "loss": 1.1657, + "step": 2045 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001932495210293307, + "loss": 1.1807, + "step": 2050 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019321692957220208, + "loss": 1.1703, + "step": 2055 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001931842623900501, + "loss": 1.1008, + "step": 2060 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019315151950941202, + "loss": 1.1054, + "step": 2065 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019311870095688665, + "loss": 1.0917, + "step": 2070 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001930858067591342, + "loss": 1.1134, + "step": 2075 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019305283694287634, + "loss": 1.1339, + "step": 2080 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001930197915348962, + "loss": 1.1417, + "step": 2085 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019298667056203832, + "loss": 1.0491, + "step": 2090 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019295347405120868, + "loss": 1.0736, + "step": 2095 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019292020202937442, + "loss": 1.1395, + "step": 2100 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019288685452356424, + "loss": 1.0608, + "step": 2105 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019285343156086803, + "loss": 1.1576, + "step": 2110 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001928199331684371, + "loss": 1.1131, + "step": 2115 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019278635937348393, + "loss": 1.0297, + "step": 2120 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019275271020328225, + "loss": 1.1325, + "step": 2125 + }, + { + "epoch": 0.12, + "learning_rate": 0.000192718985685167, + "loss": 1.1304, + "step": 2130 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001926851858465345, + "loss": 1.08, + "step": 2135 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019265131071484213, + "loss": 1.0682, + "step": 2140 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019261736031760837, + "loss": 1.1576, + "step": 2145 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019258333468241296, + "loss": 1.128, + "step": 2150 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019254923383689665, + "loss": 1.05, + "step": 2155 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019251505780876148, + "loss": 1.1599, + "step": 2160 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019248080662577036, + "loss": 1.1629, + "step": 2165 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001924464803157473, + "loss": 1.1082, + "step": 2170 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001924120789065774, + "loss": 1.132, + "step": 2175 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001923776024262068, + "loss": 1.0797, + "step": 2180 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001923430509026425, + "loss": 1.1171, + "step": 2185 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019230842436395248, + "loss": 1.1678, + "step": 2190 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019227372283826583, + "loss": 1.1186, + "step": 2195 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019223894635377232, + "loss": 1.2379, + "step": 2200 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019220409493872275, + "loss": 1.1325, + "step": 2205 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001921691686214288, + "loss": 1.0757, + "step": 2210 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019213416743026285, + "loss": 1.2045, + "step": 2215 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019209909139365834, + "loss": 1.1018, + "step": 2220 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001920639405401093, + "loss": 1.123, + "step": 2225 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001920287148981707, + "loss": 1.1013, + "step": 2230 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001919934144964581, + "loss": 1.1402, + "step": 2235 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019195803936364791, + "loss": 1.1148, + "step": 2240 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019192258952847717, + "loss": 1.1465, + "step": 2245 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019188706501974373, + "loss": 1.1607, + "step": 2250 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019185146586630594, + "loss": 1.1231, + "step": 2255 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019181579209708288, + "loss": 1.1591, + "step": 2260 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001917800437410543, + "loss": 1.1014, + "step": 2265 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019174422082726043, + "loss": 1.236, + "step": 2270 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001917083233848021, + "loss": 1.1514, + "step": 2275 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019167235144284072, + "loss": 1.0706, + "step": 2280 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001916363050305982, + "loss": 1.0455, + "step": 2285 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019160018417735693, + "loss": 1.1022, + "step": 2290 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019156398891245974, + "loss": 1.1167, + "step": 2295 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019152771926531, + "loss": 1.1118, + "step": 2300 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019149137526537142, + "loss": 1.0846, + "step": 2305 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001914549569421682, + "loss": 1.0916, + "step": 2310 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001914184643252848, + "loss": 1.1376, + "step": 2315 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019138189744436613, + "loss": 1.1085, + "step": 2320 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019134525632911735, + "loss": 1.1026, + "step": 2325 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019130854100930404, + "loss": 1.0874, + "step": 2330 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019127175151475195, + "loss": 1.1252, + "step": 2335 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019123488787534714, + "loss": 1.14, + "step": 2340 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019119795012103588, + "loss": 1.1472, + "step": 2345 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019116093828182464, + "loss": 1.1675, + "step": 2350 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019112385238778012, + "loss": 1.1193, + "step": 2355 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019108669246902915, + "loss": 1.139, + "step": 2360 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019104945855575862, + "loss": 1.1458, + "step": 2365 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019101215067821565, + "loss": 1.1479, + "step": 2370 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019097476886670737, + "loss": 1.107, + "step": 2375 + }, + { + "epoch": 0.13, + "learning_rate": 0.000190937313151601, + "loss": 1.09, + "step": 2380 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019089978356332386, + "loss": 1.2123, + "step": 2385 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019086218013236313, + "loss": 1.166, + "step": 2390 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019082450288926615, + "loss": 1.1012, + "step": 2395 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019078675186464002, + "loss": 1.0627, + "step": 2400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001907489270891519, + "loss": 1.1408, + "step": 2405 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001907110285935289, + "loss": 1.0716, + "step": 2410 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019067305640855793, + "loss": 1.0738, + "step": 2415 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001906350105650858, + "loss": 1.1093, + "step": 2420 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019059689109401915, + "loss": 1.1295, + "step": 2425 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019055869802632447, + "loss": 1.1377, + "step": 2430 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019052043139302795, + "loss": 1.1059, + "step": 2435 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019048209122521562, + "loss": 1.107, + "step": 2440 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001904436775540333, + "loss": 1.1829, + "step": 2445 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001904051904106863, + "loss": 1.1255, + "step": 2450 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001903666298264398, + "loss": 1.1854, + "step": 2455 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019032799583261866, + "loss": 1.1515, + "step": 2460 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019028928846060721, + "loss": 1.0586, + "step": 2465 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019025050774184953, + "loss": 1.1042, + "step": 2470 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019021165370784922, + "loss": 1.066, + "step": 2475 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019017272639016954, + "loss": 1.1116, + "step": 2480 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019013372582043312, + "loss": 1.1261, + "step": 2485 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019009465203032223, + "loss": 1.1608, + "step": 2490 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019005550505157848, + "loss": 1.1959, + "step": 2495 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019001628491600314, + "loss": 1.1328, + "step": 2500 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018997699165545676, + "loss": 1.133, + "step": 2505 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018993762530185925, + "loss": 1.1307, + "step": 2510 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018989818588719003, + "loss": 1.0921, + "step": 2515 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018985867344348782, + "loss": 1.1079, + "step": 2520 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018981908800285065, + "loss": 1.1946, + "step": 2525 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018977942959743583, + "loss": 1.0337, + "step": 2530 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018973969825946007, + "loss": 1.1972, + "step": 2535 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018969989402119913, + "loss": 1.0439, + "step": 2540 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018966001691498807, + "loss": 1.1402, + "step": 2545 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018962006697322123, + "loss": 1.1336, + "step": 2550 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001895800442283521, + "loss": 1.1067, + "step": 2555 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001895399487128932, + "loss": 1.1201, + "step": 2560 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001894997804594162, + "loss": 1.1701, + "step": 2565 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018945953950055187, + "loss": 1.1359, + "step": 2570 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018941922586899015, + "loss": 1.1092, + "step": 2575 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018937883959747992, + "loss": 1.0772, + "step": 2580 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018933838071882904, + "loss": 1.1807, + "step": 2585 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018929784926590435, + "loss": 1.1507, + "step": 2590 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018925724527163177, + "loss": 1.0999, + "step": 2595 + }, + { + "epoch": 0.14, + "learning_rate": 0.000189216568768996, + "loss": 1.1505, + "step": 2600 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018917581979104072, + "loss": 1.1033, + "step": 2605 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018913499837086852, + "loss": 1.0842, + "step": 2610 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018909410454164073, + "loss": 1.0672, + "step": 2615 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001890531383365776, + "loss": 1.1222, + "step": 2620 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001890120997889581, + "loss": 1.1143, + "step": 2625 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018897098893212, + "loss": 1.1198, + "step": 2630 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018892980579945985, + "loss": 1.2037, + "step": 2635 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001888885504244329, + "loss": 1.1384, + "step": 2640 + }, + { + "epoch": 0.14, + "learning_rate": 0.00018884722284055307, + "loss": 1.1693, + "step": 2645 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018880582308139284, + "loss": 1.1019, + "step": 2650 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018876435118058348, + "loss": 1.0825, + "step": 2655 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018872280717181477, + "loss": 1.0906, + "step": 2660 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018868119108883516, + "loss": 1.1341, + "step": 2665 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018863950296545148, + "loss": 1.0723, + "step": 2670 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018859774283552932, + "loss": 1.1234, + "step": 2675 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001885559107329926, + "loss": 1.1248, + "step": 2680 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018851400669182367, + "loss": 1.0689, + "step": 2685 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018847203074606335, + "loss": 1.1267, + "step": 2690 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018842998292981108, + "loss": 1.1193, + "step": 2695 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018838786327722443, + "loss": 1.0327, + "step": 2700 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001883456718225194, + "loss": 1.1111, + "step": 2705 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018830340859997036, + "loss": 1.1441, + "step": 2710 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018826107364390993, + "loss": 1.1243, + "step": 2715 + }, + { + "epoch": 0.15, + "learning_rate": 0.000188218666988729, + "loss": 1.0446, + "step": 2720 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001881761886688768, + "loss": 1.1997, + "step": 2725 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018813363871886065, + "loss": 1.1008, + "step": 2730 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018809101717324618, + "loss": 1.1542, + "step": 2735 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018804832406665707, + "loss": 1.2327, + "step": 2740 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018800555943377514, + "loss": 1.1403, + "step": 2745 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001879627233093404, + "loss": 1.1483, + "step": 2750 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018791981572815087, + "loss": 1.122, + "step": 2755 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018787683672506265, + "loss": 1.0624, + "step": 2760 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001878337863349899, + "loss": 1.0879, + "step": 2765 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018779066459290468, + "loss": 1.1375, + "step": 2770 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018774747153383703, + "loss": 1.0634, + "step": 2775 + }, + { + "epoch": 0.15, + "learning_rate": 0.000187704207192875, + "loss": 1.0975, + "step": 2780 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018766087160516446, + "loss": 1.1125, + "step": 2785 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018761746480590915, + "loss": 1.0661, + "step": 2790 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001875739868303708, + "loss": 1.1257, + "step": 2795 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018753043771386877, + "loss": 1.1633, + "step": 2800 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018748681749178035, + "loss": 1.1008, + "step": 2805 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001874431261995406, + "loss": 1.1241, + "step": 2810 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018739936387264207, + "loss": 1.1435, + "step": 2815 + }, + { + "epoch": 0.15, + "learning_rate": 0.00018735553054663538, + "loss": 1.1364, + "step": 2820 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001873116262571286, + "loss": 1.0481, + "step": 2825 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018726765103978745, + "loss": 1.072, + "step": 2830 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018722360493033538, + "loss": 1.1007, + "step": 2835 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001871794879645533, + "loss": 1.094, + "step": 2840 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001871353001782798, + "loss": 1.1331, + "step": 2845 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018709104160741097, + "loss": 1.0766, + "step": 2850 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018704671228790036, + "loss": 1.1216, + "step": 2855 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018700231225575892, + "loss": 1.1281, + "step": 2860 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018695784154705523, + "loss": 1.1036, + "step": 2865 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001869133001979152, + "loss": 1.0473, + "step": 2870 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018686868824452208, + "loss": 1.0983, + "step": 2875 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018682400572311652, + "loss": 1.1571, + "step": 2880 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018677925266999645, + "loss": 1.2094, + "step": 2885 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001867344291215172, + "loss": 1.157, + "step": 2890 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018668953511409125, + "loss": 1.1718, + "step": 2895 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018664457068418835, + "loss": 1.1758, + "step": 2900 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018659953586833557, + "loss": 1.1578, + "step": 2905 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018655443070311696, + "loss": 1.1511, + "step": 2910 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018650925522517388, + "loss": 1.1209, + "step": 2915 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018646400947120473, + "loss": 1.1243, + "step": 2920 + }, + { + "epoch": 0.16, + "learning_rate": 0.000186418693477965, + "loss": 1.1156, + "step": 2925 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018637330728226727, + "loss": 1.1021, + "step": 2930 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001863278509209812, + "loss": 1.1547, + "step": 2935 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001862823244310333, + "loss": 1.1437, + "step": 2940 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018623672784940723, + "loss": 1.1695, + "step": 2945 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018619106121314343, + "loss": 1.1239, + "step": 2950 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018614532455933934, + "loss": 1.1569, + "step": 2955 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018609951792514923, + "loss": 1.0326, + "step": 2960 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001860536413477842, + "loss": 1.1536, + "step": 2965 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001860076948645123, + "loss": 1.0572, + "step": 2970 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018596167851265823, + "loss": 1.1142, + "step": 2975 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018591559232960354, + "loss": 1.1039, + "step": 2980 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001858694363527864, + "loss": 1.1157, + "step": 2985 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018582321061970177, + "loss": 1.1308, + "step": 2990 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018577691516790125, + "loss": 1.1011, + "step": 2995 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001857305500349931, + "loss": 1.1176, + "step": 3000 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018568411525864218, + "loss": 1.1115, + "step": 3005 + }, + { + "epoch": 0.16, + "learning_rate": 0.00018563761087656979, + "loss": 1.0932, + "step": 3010 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018559103692655405, + "loss": 1.1332, + "step": 3015 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018554439344642932, + "loss": 1.1191, + "step": 3020 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001854976804740866, + "loss": 1.1141, + "step": 3025 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018545089804747325, + "loss": 1.1157, + "step": 3030 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018540404620459315, + "loss": 1.0612, + "step": 3035 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001853571249835065, + "loss": 1.1196, + "step": 3040 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001853101344223299, + "loss": 1.1427, + "step": 3045 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018526307455923622, + "loss": 1.107, + "step": 3050 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018521594543245465, + "loss": 1.0782, + "step": 3055 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001851687470802708, + "loss": 1.1291, + "step": 3060 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018512147954102622, + "loss": 1.1437, + "step": 3065 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018507414285311893, + "loss": 1.1083, + "step": 3070 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001850267370550029, + "loss": 1.1624, + "step": 3075 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018497926218518846, + "loss": 1.1455, + "step": 3080 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001849317182822419, + "loss": 1.1393, + "step": 3085 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018488410538478566, + "loss": 1.1823, + "step": 3090 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001848364235314982, + "loss": 1.1268, + "step": 3095 + }, + { + "epoch": 0.17, + "learning_rate": 0.000184788672761114, + "loss": 1.0425, + "step": 3100 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018474085311242356, + "loss": 1.0912, + "step": 3105 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018469296462427323, + "loss": 1.1366, + "step": 3110 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018464500733556536, + "loss": 1.1343, + "step": 3115 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018459698128525827, + "loss": 1.1269, + "step": 3120 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018454888651236602, + "loss": 1.085, + "step": 3125 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018450072305595844, + "loss": 1.2009, + "step": 3130 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018445249095516128, + "loss": 1.0964, + "step": 3135 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018440419024915607, + "loss": 1.1326, + "step": 3140 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018435582097717993, + "loss": 1.1127, + "step": 3145 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018430738317852592, + "loss": 1.1855, + "step": 3150 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018425887689254246, + "loss": 1.1376, + "step": 3155 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018421030215863383, + "loss": 1.0792, + "step": 3160 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018416165901625984, + "loss": 1.1013, + "step": 3165 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001841129475049359, + "loss": 1.1642, + "step": 3170 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001840641676642329, + "loss": 1.0473, + "step": 3175 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001840153195337773, + "loss": 1.1308, + "step": 3180 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018396640315325102, + "loss": 1.0742, + "step": 3185 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001839174185623914, + "loss": 1.1699, + "step": 3190 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018386836580099112, + "loss": 1.0544, + "step": 3195 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018381924490889841, + "loss": 1.2204, + "step": 3200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001837700559260167, + "loss": 1.1262, + "step": 3205 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001837207988923049, + "loss": 1.1881, + "step": 3210 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018367147384777704, + "loss": 1.1342, + "step": 3215 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018362208083250246, + "loss": 1.1371, + "step": 3220 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018357261988660568, + "loss": 1.1405, + "step": 3225 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018352309105026647, + "loss": 1.1261, + "step": 3230 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018347349436371966, + "loss": 1.106, + "step": 3235 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001834238298672553, + "loss": 1.1044, + "step": 3240 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018337409760121846, + "loss": 1.1062, + "step": 3245 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018332429760600926, + "loss": 1.1224, + "step": 3250 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018327442992208289, + "loss": 1.1594, + "step": 3255 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001832244945899495, + "loss": 1.0388, + "step": 3260 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001831744916501742, + "loss": 1.0519, + "step": 3265 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018312442114337696, + "loss": 1.1716, + "step": 3270 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018307428311023272, + "loss": 1.1017, + "step": 3275 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018302407759147128, + "loss": 1.1298, + "step": 3280 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001829738046278772, + "loss": 1.1033, + "step": 3285 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018292346426028984, + "loss": 1.2006, + "step": 3290 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001828730565296033, + "loss": 1.1079, + "step": 3295 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018282258147676655, + "loss": 1.1674, + "step": 3300 + }, + { + "epoch": 0.18, + "learning_rate": 0.000182772039142783, + "loss": 1.0647, + "step": 3305 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018272142956871098, + "loss": 1.1953, + "step": 3310 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018267075279566317, + "loss": 1.1535, + "step": 3315 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001826200088648071, + "loss": 1.0722, + "step": 3320 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001825691978173647, + "loss": 1.1739, + "step": 3325 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018251831969461245, + "loss": 1.1158, + "step": 3330 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018246737453788135, + "loss": 1.1133, + "step": 3335 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018241636238855682, + "loss": 1.1238, + "step": 3340 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018236528328807874, + "loss": 1.0619, + "step": 3345 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018231413727794134, + "loss": 1.0751, + "step": 3350 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001822629243996932, + "loss": 1.1201, + "step": 3355 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018221164469493727, + "loss": 1.1791, + "step": 3360 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018216029820533074, + "loss": 1.1038, + "step": 3365 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001821088849725851, + "loss": 1.0982, + "step": 3370 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018205740503846596, + "loss": 1.1717, + "step": 3375 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018200585844479325, + "loss": 1.0518, + "step": 3380 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018195424523344094, + "loss": 1.0946, + "step": 3385 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018190256544633717, + "loss": 1.1263, + "step": 3390 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001818508191254642, + "loss": 1.1184, + "step": 3395 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018179900631285823, + "loss": 1.1946, + "step": 3400 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018174712705060957, + "loss": 1.0804, + "step": 3405 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018169518138086245, + "loss": 1.1199, + "step": 3410 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001816431693458151, + "loss": 1.1402, + "step": 3415 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018159109098771963, + "loss": 1.0885, + "step": 3420 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018153894634888204, + "loss": 1.1906, + "step": 3425 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018148673547166216, + "loss": 1.1324, + "step": 3430 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018143445839847368, + "loss": 1.1636, + "step": 3435 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018138211517178395, + "loss": 1.1203, + "step": 3440 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018132970583411418, + "loss": 1.133, + "step": 3445 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018127723042803923, + "loss": 1.1441, + "step": 3450 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001812246889961876, + "loss": 1.1346, + "step": 3455 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001811720815812415, + "loss": 1.0715, + "step": 3460 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018111940822593668, + "loss": 1.1466, + "step": 3465 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018106666897306254, + "loss": 1.0836, + "step": 3470 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018101386386546188, + "loss": 1.2009, + "step": 3475 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001809609929460311, + "loss": 1.079, + "step": 3480 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018090805625772002, + "loss": 1.0886, + "step": 3485 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018085505384353193, + "loss": 1.0999, + "step": 3490 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001808019857465235, + "loss": 1.0753, + "step": 3495 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018074885200980472, + "loss": 1.0931, + "step": 3500 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018069565267653888, + "loss": 1.1192, + "step": 3505 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001806423877899426, + "loss": 1.1133, + "step": 3510 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018058905739328583, + "loss": 1.0897, + "step": 3515 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001805356615298916, + "loss": 1.1205, + "step": 3520 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018048220024313618, + "loss": 1.0214, + "step": 3525 + }, + { + "epoch": 0.19, + "learning_rate": 0.000180428673576449, + "loss": 1.1051, + "step": 3530 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018037508157331267, + "loss": 1.0827, + "step": 3535 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018032142427726268, + "loss": 1.1091, + "step": 3540 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018026770173188778, + "loss": 1.1486, + "step": 3545 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018021391398082953, + "loss": 1.1414, + "step": 3550 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018016006106778265, + "loss": 1.0697, + "step": 3555 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001801061430364946, + "loss": 1.1202, + "step": 3560 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001800521599307659, + "loss": 1.0858, + "step": 3565 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017999811179444984, + "loss": 1.0955, + "step": 3570 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001799439986714526, + "loss": 1.1031, + "step": 3575 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017988982060573313, + "loss": 1.0493, + "step": 3580 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017983557764130308, + "loss": 1.1149, + "step": 3585 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001797812698222269, + "loss": 1.1113, + "step": 3590 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017972689719262174, + "loss": 1.0224, + "step": 3595 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017967245979665723, + "loss": 1.0629, + "step": 3600 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017961795767855584, + "loss": 1.0922, + "step": 3605 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017956339088259248, + "loss": 1.0676, + "step": 3610 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017950875945309459, + "loss": 1.1144, + "step": 3615 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017945406343444225, + "loss": 1.0925, + "step": 3620 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017939930287106778, + "loss": 1.1248, + "step": 3625 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017934447780745623, + "loss": 1.1524, + "step": 3630 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001792895882881448, + "loss": 1.1551, + "step": 3635 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017923463435772316, + "loss": 1.1304, + "step": 3640 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017917961606083327, + "loss": 1.1746, + "step": 3645 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017912453344216936, + "loss": 1.1166, + "step": 3650 + }, + { + "epoch": 0.2, + "learning_rate": 0.000179069386546478, + "loss": 1.1669, + "step": 3655 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001790141754185579, + "loss": 1.079, + "step": 3660 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017895890010325991, + "loss": 1.1902, + "step": 3665 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001789035606454872, + "loss": 1.1527, + "step": 3670 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017884815709019484, + "loss": 1.1386, + "step": 3675 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001787926894823901, + "loss": 1.1751, + "step": 3680 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001787371578671322, + "loss": 1.0432, + "step": 3685 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017868156228953243, + "loss": 1.0398, + "step": 3690 + }, + { + "epoch": 0.2, + "learning_rate": 0.000178625902794754, + "loss": 1.1049, + "step": 3695 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017857017942801207, + "loss": 1.0748, + "step": 3700 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001785143922345736, + "loss": 1.1651, + "step": 3705 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001784585412597575, + "loss": 1.1152, + "step": 3710 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001784026265489345, + "loss": 1.0759, + "step": 3715 + }, + { + "epoch": 0.2, + "learning_rate": 0.000178346648147527, + "loss": 1.1675, + "step": 3720 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017829060610100925, + "loss": 1.1433, + "step": 3725 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001782345004549072, + "loss": 1.097, + "step": 3730 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017817833125479836, + "loss": 1.1473, + "step": 3735 + }, + { + "epoch": 0.2, + "learning_rate": 0.00017812209854631194, + "loss": 1.1478, + "step": 3740 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017806580237512867, + "loss": 1.0655, + "step": 3745 + }, + { + "epoch": 0.21, + "learning_rate": 0.000178009442786981, + "loss": 1.1638, + "step": 3750 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001779530198276528, + "loss": 1.1192, + "step": 3755 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001778965335429793, + "loss": 1.1106, + "step": 3760 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001778399839788474, + "loss": 1.1277, + "step": 3765 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017778337118119517, + "loss": 1.0939, + "step": 3770 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001777266951960123, + "loss": 1.1052, + "step": 3775 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017766995606933956, + "loss": 1.0946, + "step": 3780 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001776131538472692, + "loss": 1.075, + "step": 3785 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017755628857594465, + "loss": 1.0893, + "step": 3790 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001774993603015606, + "loss": 1.1242, + "step": 3795 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001774423690703628, + "loss": 1.0987, + "step": 3800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001773853149286483, + "loss": 1.1195, + "step": 3805 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017732819792276515, + "loss": 1.2353, + "step": 3810 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017727101809911257, + "loss": 1.11, + "step": 3815 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017721377550414065, + "loss": 1.0743, + "step": 3820 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017715647018435064, + "loss": 1.0919, + "step": 3825 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017709910218629462, + "loss": 1.1631, + "step": 3830 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017704167155657564, + "loss": 1.1065, + "step": 3835 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017698417834184762, + "loss": 1.1687, + "step": 3840 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017692662258881537, + "loss": 1.1229, + "step": 3845 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017686900434423452, + "loss": 1.1302, + "step": 3850 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017681132365491122, + "loss": 1.1169, + "step": 3855 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017675358056770267, + "loss": 1.1913, + "step": 3860 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001766957751295166, + "loss": 1.1608, + "step": 3865 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017663790738731142, + "loss": 1.0848, + "step": 3870 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001765799773880961, + "loss": 1.1747, + "step": 3875 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017652198517893026, + "loss": 1.1902, + "step": 3880 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017646393080692404, + "loss": 1.0914, + "step": 3885 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017640581431923807, + "loss": 1.2147, + "step": 3890 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017634763576308344, + "loss": 1.1117, + "step": 3895 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017628939518572157, + "loss": 1.1725, + "step": 3900 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017623109263446442, + "loss": 1.1705, + "step": 3905 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001761727281566743, + "loss": 1.1261, + "step": 3910 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001761143017997636, + "loss": 1.1658, + "step": 3915 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001760558136111952, + "loss": 1.1473, + "step": 3920 + }, + { + "epoch": 0.21, + "learning_rate": 0.00017599726363848214, + "loss": 1.121, + "step": 3925 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017593865192918766, + "loss": 1.1463, + "step": 3930 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001758799785309251, + "loss": 1.0691, + "step": 3935 + }, + { + "epoch": 0.22, + "learning_rate": 0.000175821243491358, + "loss": 1.061, + "step": 3940 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017576244685819993, + "loss": 1.1438, + "step": 3945 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001757035886792144, + "loss": 1.1461, + "step": 3950 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001756446690022151, + "loss": 1.047, + "step": 3955 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017558568787506555, + "loss": 1.2109, + "step": 3960 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017552664534567923, + "loss": 1.1237, + "step": 3965 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017546754146201954, + "loss": 1.0917, + "step": 3970 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001754083762720996, + "loss": 1.0407, + "step": 3975 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001753491498239825, + "loss": 1.0925, + "step": 3980 + }, + { + "epoch": 0.22, + "learning_rate": 0.000175289862165781, + "loss": 1.1538, + "step": 3985 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017523051334565753, + "loss": 1.0669, + "step": 3990 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001751711034118243, + "loss": 1.0704, + "step": 3995 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017511163241254308, + "loss": 1.1259, + "step": 4000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001750521003961253, + "loss": 1.1539, + "step": 4005 + }, + { + "epoch": 0.22, + "learning_rate": 0.000174992507410932, + "loss": 1.1291, + "step": 4010 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017493285350537367, + "loss": 1.2024, + "step": 4015 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017487313872791035, + "loss": 1.0821, + "step": 4020 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017481336312705144, + "loss": 1.1142, + "step": 4025 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017475352675135595, + "loss": 1.0999, + "step": 4030 + }, + { + "epoch": 0.22, + "learning_rate": 0.000174693629649432, + "loss": 1.1705, + "step": 4035 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001746336718699372, + "loss": 1.0969, + "step": 4040 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017457365346157836, + "loss": 1.09, + "step": 4045 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017451357447311163, + "loss": 1.168, + "step": 4050 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001744534349533424, + "loss": 1.1579, + "step": 4055 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017439323495112508, + "loss": 1.1576, + "step": 4060 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017433297451536331, + "loss": 1.1181, + "step": 4065 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001742726536950099, + "loss": 1.155, + "step": 4070 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017421227253906654, + "loss": 1.1496, + "step": 4075 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001741518310965841, + "loss": 1.1455, + "step": 4080 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001740913294166623, + "loss": 1.0682, + "step": 4085 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001740307675484498, + "loss": 1.0999, + "step": 4090 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001739701455411443, + "loss": 1.1136, + "step": 4095 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017390946344399223, + "loss": 1.0865, + "step": 4100 + }, + { + "epoch": 0.22, + "learning_rate": 0.00017384872130628875, + "loss": 1.1859, + "step": 4105 + }, + { + "epoch": 0.23, + "learning_rate": 0.000173787919177378, + "loss": 1.1327, + "step": 4110 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017372705710665275, + "loss": 1.0803, + "step": 4115 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017366613514355442, + "loss": 1.1387, + "step": 4120 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017360515333757312, + "loss": 1.1616, + "step": 4125 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017354411173824762, + "loss": 1.1676, + "step": 4130 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001734830103951653, + "loss": 1.0807, + "step": 4135 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017342184935796186, + "loss": 1.1346, + "step": 4140 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017336062867632176, + "loss": 1.1262, + "step": 4145 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017329934839997765, + "loss": 1.18, + "step": 4150 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017323800857871083, + "loss": 1.1422, + "step": 4155 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017317660926235084, + "loss": 1.1332, + "step": 4160 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001731151505007756, + "loss": 1.1504, + "step": 4165 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017305363234391123, + "loss": 1.0972, + "step": 4170 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017299205484173224, + "loss": 1.0445, + "step": 4175 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017293041804426125, + "loss": 1.1446, + "step": 4180 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001728687220015691, + "loss": 1.1206, + "step": 4185 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001728069667637747, + "loss": 1.1307, + "step": 4190 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001727451523810451, + "loss": 1.1246, + "step": 4195 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017268327890359542, + "loss": 1.0155, + "step": 4200 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017262134638168868, + "loss": 1.1233, + "step": 4205 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017255935486563598, + "loss": 1.146, + "step": 4210 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017249730440579628, + "loss": 1.1842, + "step": 4215 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017243519505257644, + "loss": 1.1238, + "step": 4220 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017237302685643109, + "loss": 1.0993, + "step": 4225 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017231079986786287, + "loss": 1.2037, + "step": 4230 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017224851413742194, + "loss": 1.1127, + "step": 4235 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017218616971570634, + "loss": 1.1249, + "step": 4240 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017212376665336166, + "loss": 1.1526, + "step": 4245 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017206130500108124, + "loss": 1.0851, + "step": 4250 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017199878480960595, + "loss": 1.1375, + "step": 4255 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017193620612972427, + "loss": 1.1092, + "step": 4260 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017187356901227218, + "loss": 1.1143, + "step": 4265 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017181087350813306, + "loss": 1.1178, + "step": 4270 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017174811966823777, + "loss": 1.052, + "step": 4275 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017168530754356452, + "loss": 1.1377, + "step": 4280 + }, + { + "epoch": 0.23, + "learning_rate": 0.000171622437185139, + "loss": 1.1928, + "step": 4285 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017155950864403407, + "loss": 1.1823, + "step": 4290 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017149652197136987, + "loss": 1.1616, + "step": 4295 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017143347721831383, + "loss": 1.1134, + "step": 4300 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017137037443608054, + "loss": 1.1108, + "step": 4305 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001713072136759317, + "loss": 1.1527, + "step": 4310 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017124399498917605, + "loss": 1.1626, + "step": 4315 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001711807184271696, + "loss": 1.1243, + "step": 4320 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001711173840413152, + "loss": 1.1742, + "step": 4325 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001710539918830626, + "loss": 1.1432, + "step": 4330 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017099054200390867, + "loss": 1.1149, + "step": 4335 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001709270344553971, + "loss": 1.12, + "step": 4340 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017086346928911838, + "loss": 1.1046, + "step": 4345 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001707998465567099, + "loss": 1.1193, + "step": 4350 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001707361663098557, + "loss": 1.0834, + "step": 4355 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017067242860028663, + "loss": 1.1611, + "step": 4360 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017060863347978015, + "loss": 1.1091, + "step": 4365 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017054478100016044, + "loss": 1.132, + "step": 4370 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017048087121329823, + "loss": 1.1149, + "step": 4375 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017041690417111084, + "loss": 1.1488, + "step": 4380 + }, + { + "epoch": 0.24, + "learning_rate": 0.000170352879925562, + "loss": 1.0606, + "step": 4385 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017028879852866197, + "loss": 1.0512, + "step": 4390 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017022466003246747, + "loss": 1.1841, + "step": 4395 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017016046448908165, + "loss": 1.1139, + "step": 4400 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017009621195065387, + "loss": 1.1809, + "step": 4405 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001700319024693799, + "loss": 1.0092, + "step": 4410 + }, + { + "epoch": 0.24, + "learning_rate": 0.00016996753609750163, + "loss": 1.0473, + "step": 4415 + }, + { + "epoch": 0.24, + "learning_rate": 0.00016990311288730733, + "loss": 1.1673, + "step": 4420 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001698386328911313, + "loss": 1.1659, + "step": 4425 + }, + { + "epoch": 0.24, + "learning_rate": 0.00016977409616135408, + "loss": 1.118, + "step": 4430 + }, + { + "epoch": 0.24, + "learning_rate": 0.00016970950275040227, + "loss": 1.1553, + "step": 4435 + }, + { + "epoch": 0.24, + "learning_rate": 0.00016964485271074844, + "loss": 1.1502, + "step": 4440 + }, + { + "epoch": 0.24, + "learning_rate": 0.00016958014609491133, + "loss": 1.1436, + "step": 4445 + }, + { + "epoch": 0.24, + "learning_rate": 0.00016951538295545545, + "loss": 1.1698, + "step": 4450 + }, + { + "epoch": 0.24, + "learning_rate": 0.00016945056334499134, + "loss": 1.09, + "step": 4455 + }, + { + "epoch": 0.24, + "learning_rate": 0.00016938568731617536, + "loss": 1.2259, + "step": 4460 + }, + { + "epoch": 0.24, + "learning_rate": 0.00016932075492170974, + "loss": 1.1838, + "step": 4465 + }, + { + "epoch": 0.24, + "learning_rate": 0.00016925576621434248, + "loss": 1.108, + "step": 4470 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016919072124686732, + "loss": 1.0844, + "step": 4475 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016912562007212378, + "loss": 1.101, + "step": 4480 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016906046274299687, + "loss": 1.1597, + "step": 4485 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016899524931241746, + "loss": 1.1253, + "step": 4490 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016892997983336173, + "loss": 1.1217, + "step": 4495 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016886465435885154, + "loss": 1.1203, + "step": 4500 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016879927294195427, + "loss": 1.1046, + "step": 4505 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016873383563578267, + "loss": 1.1332, + "step": 4510 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016866834249349487, + "loss": 1.1025, + "step": 4515 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016860279356829445, + "loss": 1.1143, + "step": 4520 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016853718891343027, + "loss": 1.1672, + "step": 4525 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016847152858219638, + "loss": 1.0842, + "step": 4530 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001684058126279321, + "loss": 1.1651, + "step": 4535 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016834004110402207, + "loss": 1.1487, + "step": 4540 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016827421406389592, + "loss": 1.046, + "step": 4545 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001682083315610284, + "loss": 1.1106, + "step": 4550 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016814239364893935, + "loss": 1.1432, + "step": 4555 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016807640038119363, + "loss": 1.1026, + "step": 4560 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016801035181140101, + "loss": 1.1978, + "step": 4565 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001679442479932163, + "loss": 1.0761, + "step": 4570 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016787808898033901, + "loss": 1.1077, + "step": 4575 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016781187482651364, + "loss": 1.0634, + "step": 4580 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016774560558552946, + "loss": 1.1863, + "step": 4585 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016767928131122047, + "loss": 1.1161, + "step": 4590 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016761290205746535, + "loss": 1.0533, + "step": 4595 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016754646787818747, + "loss": 1.1095, + "step": 4600 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016747997882735478, + "loss": 1.136, + "step": 4605 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001674134349589799, + "loss": 1.0895, + "step": 4610 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016734683632711986, + "loss": 1.1445, + "step": 4615 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016728018298587622, + "loss": 1.1301, + "step": 4620 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016721347498939513, + "loss": 1.1773, + "step": 4625 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016714671239186692, + "loss": 1.1557, + "step": 4630 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016707989524752636, + "loss": 1.1129, + "step": 4635 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016701302361065253, + "loss": 1.1682, + "step": 4640 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016694609753556885, + "loss": 1.0927, + "step": 4645 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016687911707664275, + "loss": 1.1254, + "step": 4650 + }, + { + "epoch": 0.25, + "learning_rate": 0.00016681208228828612, + "loss": 1.0745, + "step": 4655 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001667449932249548, + "loss": 1.1282, + "step": 4660 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016667784994114882, + "loss": 1.147, + "step": 4665 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016661065249141217, + "loss": 1.0956, + "step": 4670 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001665434009303329, + "loss": 1.174, + "step": 4675 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016647609531254294, + "loss": 1.1663, + "step": 4680 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016640873569271825, + "loss": 1.155, + "step": 4685 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016634132212557862, + "loss": 1.1717, + "step": 4690 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016627385466588755, + "loss": 1.0635, + "step": 4695 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016620633336845254, + "loss": 1.097, + "step": 4700 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001661387582881246, + "loss": 1.0957, + "step": 4705 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016607112947979852, + "loss": 1.1087, + "step": 4710 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016600344699841277, + "loss": 1.1274, + "step": 4715 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016593571089894938, + "loss": 1.1693, + "step": 4720 + }, + { + "epoch": 0.26, + "learning_rate": 0.000165867921236434, + "loss": 1.0656, + "step": 4725 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001658000780659357, + "loss": 1.143, + "step": 4730 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016573218144256708, + "loss": 1.13, + "step": 4735 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001656642314214841, + "loss": 1.1204, + "step": 4740 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016559622805788622, + "loss": 1.113, + "step": 4745 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016552817140701615, + "loss": 1.0681, + "step": 4750 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016546006152415985, + "loss": 1.1235, + "step": 4755 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001653918984646466, + "loss": 1.1158, + "step": 4760 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016532368228384877, + "loss": 1.2022, + "step": 4765 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016525541303718204, + "loss": 1.1223, + "step": 4770 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001651870907801051, + "loss": 1.1205, + "step": 4775 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001651187155681197, + "loss": 1.117, + "step": 4780 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016505028745677063, + "loss": 1.1233, + "step": 4785 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016498180650164566, + "loss": 1.1003, + "step": 4790 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016491327275837547, + "loss": 1.1296, + "step": 4795 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016484468628263362, + "loss": 1.1567, + "step": 4800 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016477604713013657, + "loss": 1.0846, + "step": 4805 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016470735535664346, + "loss": 1.1573, + "step": 4810 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016463861101795627, + "loss": 1.1431, + "step": 4815 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016456981416991965, + "loss": 1.0639, + "step": 4820 + }, + { + "epoch": 0.26, + "learning_rate": 0.00016450096486842085, + "loss": 1.0252, + "step": 4825 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001644320631693898, + "loss": 1.074, + "step": 4830 + }, + { + "epoch": 0.26, + "learning_rate": 0.000164363109128799, + "loss": 1.1785, + "step": 4835 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001642941028026635, + "loss": 1.1214, + "step": 4840 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016422504424704079, + "loss": 1.0599, + "step": 4845 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001641559335180307, + "loss": 1.0839, + "step": 4850 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001640867706717755, + "loss": 1.1294, + "step": 4855 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016401755576445982, + "loss": 1.1001, + "step": 4860 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016394828885231064, + "loss": 1.1101, + "step": 4865 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016387896999159706, + "loss": 1.1239, + "step": 4870 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016380959923863045, + "loss": 1.1619, + "step": 4875 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016374017664976437, + "loss": 1.1858, + "step": 4880 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001636707022813944, + "loss": 1.1114, + "step": 4885 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016360117618995822, + "loss": 1.0545, + "step": 4890 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016353159843193564, + "loss": 1.0748, + "step": 4895 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001634619690638482, + "loss": 1.1413, + "step": 4900 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016339228814225962, + "loss": 1.1231, + "step": 4905 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016332255572377533, + "loss": 1.0801, + "step": 4910 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016325277186504268, + "loss": 1.1022, + "step": 4915 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016318293662275068, + "loss": 1.1441, + "step": 4920 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016311305005363032, + "loss": 1.0301, + "step": 4925 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016304311221445404, + "loss": 1.0874, + "step": 4930 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016297312316203603, + "loss": 1.1444, + "step": 4935 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016290308295323212, + "loss": 1.1124, + "step": 4940 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016283299164493965, + "loss": 1.0805, + "step": 4945 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016276284929409753, + "loss": 1.0188, + "step": 4950 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016269265595768604, + "loss": 1.1557, + "step": 4955 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016262241169272688, + "loss": 1.1399, + "step": 4960 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016255211655628326, + "loss": 1.1706, + "step": 4965 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016248177060545953, + "loss": 1.1087, + "step": 4970 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016241137389740148, + "loss": 1.062, + "step": 4975 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016234092648929614, + "loss": 1.1184, + "step": 4980 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016227042843837148, + "loss": 1.0532, + "step": 4985 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001621998798018969, + "loss": 1.1242, + "step": 4990 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016212928063718277, + "loss": 1.1815, + "step": 4995 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016205863100158044, + "loss": 1.1122, + "step": 5000 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016198793095248239, + "loss": 1.064, + "step": 5005 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016191718054732189, + "loss": 1.0185, + "step": 5010 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016184637984357337, + "loss": 1.1035, + "step": 5015 + }, + { + "epoch": 0.27, + "learning_rate": 0.00016177552889875188, + "loss": 1.1197, + "step": 5020 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016170462777041338, + "loss": 1.1555, + "step": 5025 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016163367651615465, + "loss": 1.1192, + "step": 5030 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016156267519361304, + "loss": 1.1748, + "step": 5035 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016149162386046677, + "loss": 1.1335, + "step": 5040 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016142052257443455, + "loss": 1.1156, + "step": 5045 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016134937139327567, + "loss": 1.1977, + "step": 5050 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016127817037479004, + "loss": 1.182, + "step": 5055 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016120691957681796, + "loss": 1.1381, + "step": 5060 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001611356190572402, + "loss": 1.1374, + "step": 5065 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016106426887397787, + "loss": 1.1175, + "step": 5070 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001609928690849926, + "loss": 1.0858, + "step": 5075 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016092141974828614, + "loss": 1.0358, + "step": 5080 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016084992092190052, + "loss": 1.1384, + "step": 5085 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016077837266391807, + "loss": 1.1729, + "step": 5090 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001607067750324612, + "loss": 1.1303, + "step": 5095 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016063512808569243, + "loss": 1.078, + "step": 5100 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016056343188181432, + "loss": 1.0699, + "step": 5105 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001604916864790695, + "loss": 1.033, + "step": 5110 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016041989193574054, + "loss": 1.1381, + "step": 5115 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001603480483101499, + "loss": 1.0595, + "step": 5120 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016027615566066, + "loss": 1.1194, + "step": 5125 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016020421404567298, + "loss": 1.119, + "step": 5130 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016013222352363082, + "loss": 1.1099, + "step": 5135 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016006018415301517, + "loss": 1.1056, + "step": 5140 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001599880959923475, + "loss": 1.1328, + "step": 5145 + }, + { + "epoch": 0.28, + "learning_rate": 0.00015991595910018873, + "loss": 1.0698, + "step": 5150 + }, + { + "epoch": 0.28, + "learning_rate": 0.00015984377353513951, + "loss": 1.2037, + "step": 5155 + }, + { + "epoch": 0.28, + "learning_rate": 0.00015977153935584, + "loss": 1.125, + "step": 5160 + }, + { + "epoch": 0.28, + "learning_rate": 0.00015969925662096977, + "loss": 1.1111, + "step": 5165 + }, + { + "epoch": 0.28, + "learning_rate": 0.00015962692538924793, + "loss": 1.1391, + "step": 5170 + }, + { + "epoch": 0.28, + "learning_rate": 0.00015955454571943292, + "loss": 1.1056, + "step": 5175 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001594821176703226, + "loss": 1.1203, + "step": 5180 + }, + { + "epoch": 0.28, + "learning_rate": 0.00015940964130075404, + "loss": 1.0924, + "step": 5185 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001593371166696037, + "loss": 1.0482, + "step": 5190 + }, + { + "epoch": 0.28, + "learning_rate": 0.00015926454383578708, + "loss": 1.1541, + "step": 5195 + }, + { + "epoch": 0.28, + "learning_rate": 0.00015919192285825896, + "loss": 1.1934, + "step": 5200 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015911925379601317, + "loss": 1.122, + "step": 5205 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001590465367080826, + "loss": 1.1165, + "step": 5210 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015897377165353916, + "loss": 1.1322, + "step": 5215 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015890095869149377, + "loss": 1.1082, + "step": 5220 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001588280978810962, + "loss": 1.1623, + "step": 5225 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015875518928153525, + "loss": 1.1047, + "step": 5230 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015868223295203824, + "loss": 1.0627, + "step": 5235 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015860922895187155, + "loss": 1.1736, + "step": 5240 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015853617734034014, + "loss": 1.135, + "step": 5245 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001584630781767877, + "loss": 1.1172, + "step": 5250 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015838993152059654, + "loss": 1.1151, + "step": 5255 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001583167374311875, + "loss": 1.0156, + "step": 5260 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015824349596802004, + "loss": 1.1202, + "step": 5265 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015817020719059206, + "loss": 1.1134, + "step": 5270 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015809687115843983, + "loss": 1.0793, + "step": 5275 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015802348793113815, + "loss": 1.1246, + "step": 5280 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001579500575683, + "loss": 1.1475, + "step": 5285 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001578765801295768, + "loss": 1.1283, + "step": 5290 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001578030556746582, + "loss": 1.1768, + "step": 5295 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015772948426327188, + "loss": 1.0936, + "step": 5300 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001576558659551838, + "loss": 1.1402, + "step": 5305 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015758220081019798, + "loss": 1.0974, + "step": 5310 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015750848888815654, + "loss": 1.1174, + "step": 5315 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015743473024893952, + "loss": 1.1125, + "step": 5320 + }, + { + "epoch": 0.29, + "learning_rate": 0.000157360924952465, + "loss": 1.1624, + "step": 5325 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015728707305868886, + "loss": 1.1312, + "step": 5330 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015721317462760493, + "loss": 1.0624, + "step": 5335 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015713922971924475, + "loss": 1.0681, + "step": 5340 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015706523839367767, + "loss": 1.1555, + "step": 5345 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001569912007110108, + "loss": 1.1092, + "step": 5350 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015691711673138876, + "loss": 1.1266, + "step": 5355 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015684298651499394, + "loss": 1.1312, + "step": 5360 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015676881012204627, + "loss": 1.0635, + "step": 5365 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015669458761280298, + "loss": 1.0268, + "step": 5370 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015662031904755902, + "loss": 1.1884, + "step": 5375 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015654600448664665, + "loss": 1.0921, + "step": 5380 + }, + { + "epoch": 0.29, + "learning_rate": 0.00015647164399043556, + "loss": 1.0646, + "step": 5385 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015639723761933264, + "loss": 1.098, + "step": 5390 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001563227854337821, + "loss": 1.1172, + "step": 5395 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001562482874942654, + "loss": 1.1878, + "step": 5400 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015617374386130113, + "loss": 1.104, + "step": 5405 + }, + { + "epoch": 0.3, + "learning_rate": 0.000156099154595445, + "loss": 1.1744, + "step": 5410 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015602451975728977, + "loss": 1.2066, + "step": 5415 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015594983940746534, + "loss": 1.2062, + "step": 5420 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015587511360663845, + "loss": 1.0693, + "step": 5425 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015580034241551274, + "loss": 1.1161, + "step": 5430 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015572552589482896, + "loss": 1.1196, + "step": 5435 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015565066410536433, + "loss": 1.1331, + "step": 5440 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001555757571079331, + "loss": 1.1623, + "step": 5445 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015550080496338616, + "loss": 1.1009, + "step": 5450 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015542580773261113, + "loss": 1.1545, + "step": 5455 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015535076547653216, + "loss": 1.0772, + "step": 5460 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015527567825611004, + "loss": 1.186, + "step": 5465 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015520054613234207, + "loss": 1.0907, + "step": 5470 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015512536916626203, + "loss": 1.095, + "step": 5475 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015505014741894017, + "loss": 1.1298, + "step": 5480 + }, + { + "epoch": 0.3, + "learning_rate": 0.000154974880951483, + "loss": 1.0656, + "step": 5485 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015489956982503352, + "loss": 1.1046, + "step": 5490 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015482421410077088, + "loss": 1.1659, + "step": 5495 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015474881383991055, + "loss": 1.1071, + "step": 5500 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015467336910370406, + "loss": 1.124, + "step": 5505 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015459787995343919, + "loss": 1.1196, + "step": 5510 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001545223464504398, + "loss": 1.107, + "step": 5515 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015444676865606567, + "loss": 1.0854, + "step": 5520 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015437114663171265, + "loss": 1.1515, + "step": 5525 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015429548043881244, + "loss": 1.0814, + "step": 5530 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015421977013883273, + "loss": 1.096, + "step": 5535 + }, + { + "epoch": 0.3, + "learning_rate": 0.000154144015793277, + "loss": 1.1135, + "step": 5540 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015406821746368445, + "loss": 1.0747, + "step": 5545 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015399237521163012, + "loss": 1.1497, + "step": 5550 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001539164890987246, + "loss": 1.0892, + "step": 5555 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015384055918661423, + "loss": 1.1524, + "step": 5560 + }, + { + "epoch": 0.3, + "learning_rate": 0.00015376458553698083, + "loss": 1.0494, + "step": 5565 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015368856821154186, + "loss": 1.1091, + "step": 5570 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001536125072720501, + "loss": 1.1346, + "step": 5575 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015353640278029397, + "loss": 1.134, + "step": 5580 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015346025479809705, + "loss": 1.1815, + "step": 5585 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015338406338731843, + "loss": 1.1357, + "step": 5590 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001533078286098524, + "loss": 1.088, + "step": 5595 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001532315505276285, + "loss": 1.1526, + "step": 5600 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001531552292026114, + "loss": 1.066, + "step": 5605 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001530788646968009, + "loss": 1.1866, + "step": 5610 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015300245707223196, + "loss": 1.123, + "step": 5615 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015292600639097453, + "loss": 1.1146, + "step": 5620 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015284951271513355, + "loss": 1.1022, + "step": 5625 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015277297610684883, + "loss": 1.1311, + "step": 5630 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015269639662829506, + "loss": 1.12, + "step": 5635 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015261977434168186, + "loss": 1.0553, + "step": 5640 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015254310930925348, + "loss": 1.1149, + "step": 5645 + }, + { + "epoch": 0.31, + "learning_rate": 0.000152466401593289, + "loss": 1.1002, + "step": 5650 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015238965125610217, + "loss": 1.1554, + "step": 5655 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015231285836004126, + "loss": 1.1515, + "step": 5660 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001522360229674892, + "loss": 1.0392, + "step": 5665 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001521591451408634, + "loss": 1.1207, + "step": 5670 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015208222494261586, + "loss": 1.0967, + "step": 5675 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015200526243523287, + "loss": 1.1501, + "step": 5680 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001519282576812351, + "loss": 1.1713, + "step": 5685 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001518512107431776, + "loss": 1.2065, + "step": 5690 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015177412168364962, + "loss": 1.2021, + "step": 5695 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015169699056527474, + "loss": 1.1082, + "step": 5700 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015161981745071056, + "loss": 1.1254, + "step": 5705 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001515426024026489, + "loss": 1.1249, + "step": 5710 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015146534548381562, + "loss": 1.1814, + "step": 5715 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015138804675697055, + "loss": 1.2072, + "step": 5720 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015131070628490753, + "loss": 1.114, + "step": 5725 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001512333241304543, + "loss": 1.0971, + "step": 5730 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015115590035647247, + "loss": 1.1448, + "step": 5735 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015107843502585748, + "loss": 1.1395, + "step": 5740 + }, + { + "epoch": 0.31, + "learning_rate": 0.00015100092820153848, + "loss": 1.0912, + "step": 5745 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001509233799464783, + "loss": 1.1447, + "step": 5750 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015084579032367354, + "loss": 1.1428, + "step": 5755 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015076815939615428, + "loss": 1.1153, + "step": 5760 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001506904872269843, + "loss": 1.0457, + "step": 5765 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015061277387926072, + "loss": 1.1668, + "step": 5770 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015053501941611418, + "loss": 1.0989, + "step": 5775 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015045722390070874, + "loss": 1.0823, + "step": 5780 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015037938739624184, + "loss": 1.1403, + "step": 5785 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015030150996594413, + "loss": 1.1209, + "step": 5790 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015022359167307956, + "loss": 1.1237, + "step": 5795 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015014563258094527, + "loss": 1.1439, + "step": 5800 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015006763275287154, + "loss": 1.0948, + "step": 5805 + }, + { + "epoch": 0.32, + "learning_rate": 0.00014998959225222177, + "loss": 1.1501, + "step": 5810 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001499115111423923, + "loss": 1.1745, + "step": 5815 + }, + { + "epoch": 0.32, + "learning_rate": 0.00014983338948681255, + "loss": 1.1036, + "step": 5820 + }, + { + "epoch": 0.32, + "learning_rate": 0.00014975522734894488, + "loss": 1.1006, + "step": 5825 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001496770247922845, + "loss": 1.0889, + "step": 5830 + }, + { + "epoch": 0.32, + "learning_rate": 0.00014959878188035945, + "loss": 1.1197, + "step": 5835 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001495204986767306, + "loss": 1.1228, + "step": 5840 + }, + { + "epoch": 0.32, + "learning_rate": 0.00014944217524499154, + "loss": 1.1758, + "step": 5845 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001493638116487685, + "loss": 1.1087, + "step": 5850 + }, + { + "epoch": 0.32, + "learning_rate": 0.00014928540795172032, + "loss": 1.0652, + "step": 5855 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001492069642175385, + "loss": 1.1196, + "step": 5860 + }, + { + "epoch": 0.32, + "learning_rate": 0.000149128480509947, + "loss": 1.1953, + "step": 5865 + }, + { + "epoch": 0.32, + "learning_rate": 0.00014904995689270223, + "loss": 1.1149, + "step": 5870 + }, + { + "epoch": 0.32, + "learning_rate": 0.00014897139342959318, + "loss": 1.1315, + "step": 5875 + }, + { + "epoch": 0.32, + "learning_rate": 0.00014889279018444102, + "loss": 1.0991, + "step": 5880 + }, + { + "epoch": 0.32, + "learning_rate": 0.00014881414722109929, + "loss": 1.0311, + "step": 5885 + }, + { + "epoch": 0.32, + "learning_rate": 0.00014873546460345376, + "loss": 1.1245, + "step": 5890 + }, + { + "epoch": 0.32, + "learning_rate": 0.00014865674239542256, + "loss": 1.0856, + "step": 5895 + }, + { + "epoch": 0.32, + "learning_rate": 0.00014857798066095584, + "loss": 1.126, + "step": 5900 + }, + { + "epoch": 0.32, + "learning_rate": 0.00014849917946403596, + "loss": 1.1151, + "step": 5905 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001484203388686773, + "loss": 1.1623, + "step": 5910 + }, + { + "epoch": 0.32, + "learning_rate": 0.00014834145893892614, + "loss": 1.1678, + "step": 5915 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001482625397388608, + "loss": 1.1427, + "step": 5920 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001481835813325916, + "loss": 1.1141, + "step": 5925 + }, + { + "epoch": 0.32, + "learning_rate": 0.00014810458378426055, + "loss": 1.0961, + "step": 5930 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014802554715804157, + "loss": 1.1634, + "step": 5935 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014794647151814022, + "loss": 1.1194, + "step": 5940 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001478673569287938, + "loss": 1.155, + "step": 5945 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001477882034542713, + "loss": 1.1072, + "step": 5950 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001477090111588732, + "loss": 1.078, + "step": 5955 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014762978010693167, + "loss": 1.1677, + "step": 5960 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014755051036281013, + "loss": 1.181, + "step": 5965 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014747120199090375, + "loss": 1.1694, + "step": 5970 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001473918550556388, + "loss": 1.1733, + "step": 5975 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014731246962147292, + "loss": 1.114, + "step": 5980 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014723304575289516, + "loss": 1.1161, + "step": 5985 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014715358351442573, + "loss": 1.0877, + "step": 5990 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014707408297061591, + "loss": 1.1886, + "step": 5995 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001469945441860483, + "loss": 1.111, + "step": 6000 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014691496722533633, + "loss": 1.1828, + "step": 6005 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001468353521531246, + "loss": 1.0762, + "step": 6010 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014675569903408868, + "loss": 1.1449, + "step": 6015 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014667600793293492, + "loss": 1.1499, + "step": 6020 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014659627891440064, + "loss": 1.0797, + "step": 6025 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014651651204325384, + "loss": 1.1069, + "step": 6030 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014643670738429348, + "loss": 1.1029, + "step": 6035 + }, + { + "epoch": 0.33, + "learning_rate": 0.000146356865002349, + "loss": 1.1036, + "step": 6040 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014627698496228063, + "loss": 1.1002, + "step": 6045 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001461970673289791, + "loss": 1.0889, + "step": 6050 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014611711216736572, + "loss": 1.0716, + "step": 6055 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014603711954239227, + "loss": 1.1705, + "step": 6060 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014595708951904102, + "loss": 1.0456, + "step": 6065 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014587702216232453, + "loss": 1.0921, + "step": 6070 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014579691753728572, + "loss": 1.1518, + "step": 6075 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014571677570899782, + "loss": 1.0895, + "step": 6080 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014563659674256428, + "loss": 1.0871, + "step": 6085 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001455563807031187, + "loss": 1.163, + "step": 6090 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014547612765582473, + "loss": 1.1374, + "step": 6095 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014539583766587624, + "loss": 1.1316, + "step": 6100 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014531551079849695, + "loss": 1.1371, + "step": 6105 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014523514711894066, + "loss": 1.1409, + "step": 6110 + }, + { + "epoch": 0.33, + "learning_rate": 0.00014515474669249095, + "loss": 1.0427, + "step": 6115 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014507430958446142, + "loss": 1.1107, + "step": 6120 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014499383586019524, + "loss": 1.2219, + "step": 6125 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001449133255850656, + "loss": 1.142, + "step": 6130 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014483277882447508, + "loss": 1.0851, + "step": 6135 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014475219564385623, + "loss": 1.1343, + "step": 6140 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014467157610867094, + "loss": 1.1167, + "step": 6145 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014459092028441069, + "loss": 1.16, + "step": 6150 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014451022823659646, + "loss": 1.1601, + "step": 6155 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014442950003077862, + "loss": 1.0655, + "step": 6160 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014434873573253697, + "loss": 1.1391, + "step": 6165 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014426793540748062, + "loss": 1.1291, + "step": 6170 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014418709912124796, + "loss": 1.1682, + "step": 6175 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001441062269395065, + "loss": 1.1188, + "step": 6180 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014402531892795305, + "loss": 1.0427, + "step": 6185 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014394437515231336, + "loss": 1.1258, + "step": 6190 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014386339567834243, + "loss": 1.0512, + "step": 6195 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014378238057182404, + "loss": 1.1841, + "step": 6200 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014370132989857112, + "loss": 1.105, + "step": 6205 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014362024372442534, + "loss": 1.1123, + "step": 6210 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001435391221152573, + "loss": 1.1275, + "step": 6215 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014345796513696638, + "loss": 1.0553, + "step": 6220 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014337677285548059, + "loss": 1.1397, + "step": 6225 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001432955453367568, + "loss": 1.0851, + "step": 6230 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014321428264678026, + "loss": 1.1905, + "step": 6235 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001431329848515651, + "loss": 1.0644, + "step": 6240 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014305165201715372, + "loss": 1.1171, + "step": 6245 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014297028420961713, + "loss": 1.1036, + "step": 6250 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014288888149505458, + "loss": 1.1324, + "step": 6255 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014280744393959386, + "loss": 1.1594, + "step": 6260 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001427259716093909, + "loss": 1.1471, + "step": 6265 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014264446457063013, + "loss": 1.103, + "step": 6270 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014256292288952388, + "loss": 1.1004, + "step": 6275 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014248134663231274, + "loss": 1.171, + "step": 6280 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001423997358652655, + "loss": 1.1521, + "step": 6285 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014231809065467883, + "loss": 1.1031, + "step": 6290 + }, + { + "epoch": 0.34, + "learning_rate": 0.00014223641106687746, + "loss": 1.1652, + "step": 6295 + }, + { + "epoch": 0.35, + "learning_rate": 0.000142154697168214, + "loss": 1.1709, + "step": 6300 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014207294902506898, + "loss": 1.0423, + "step": 6305 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014199116670385068, + "loss": 1.1511, + "step": 6310 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014190935027099522, + "loss": 1.1246, + "step": 6315 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014182749979296634, + "loss": 1.0853, + "step": 6320 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001417456153362556, + "loss": 1.069, + "step": 6325 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014166369696738194, + "loss": 1.1839, + "step": 6330 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001415817447528921, + "loss": 1.0938, + "step": 6335 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014149975875936012, + "loss": 1.2167, + "step": 6340 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014141773905338752, + "loss": 1.0523, + "step": 6345 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014133568570160327, + "loss": 1.0932, + "step": 6350 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014125359877066362, + "loss": 1.076, + "step": 6355 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014117147832725207, + "loss": 1.1642, + "step": 6360 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001410893244380794, + "loss": 1.1171, + "step": 6365 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001410071371698836, + "loss": 1.1356, + "step": 6370 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014092491658942958, + "loss": 1.2037, + "step": 6375 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001408426627635096, + "loss": 1.0983, + "step": 6380 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001407603757589427, + "loss": 1.0695, + "step": 6385 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014067805564257503, + "loss": 1.0783, + "step": 6390 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014059570248127942, + "loss": 1.1243, + "step": 6395 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014051331634195582, + "loss": 1.1064, + "step": 6400 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014043089729153075, + "loss": 1.0942, + "step": 6405 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014034844539695753, + "loss": 1.0615, + "step": 6410 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014026596072521622, + "loss": 1.1113, + "step": 6415 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014018344334331345, + "loss": 1.0846, + "step": 6420 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014010089331828248, + "loss": 1.1403, + "step": 6425 + }, + { + "epoch": 0.35, + "learning_rate": 0.000140018310717183, + "loss": 1.1468, + "step": 6430 + }, + { + "epoch": 0.35, + "learning_rate": 0.00013993569560710118, + "loss": 1.167, + "step": 6435 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001398530480551497, + "loss": 1.0327, + "step": 6440 + }, + { + "epoch": 0.35, + "learning_rate": 0.00013977036812846751, + "loss": 1.1259, + "step": 6445 + }, + { + "epoch": 0.35, + "learning_rate": 0.00013968765589421982, + "loss": 1.1125, + "step": 6450 + }, + { + "epoch": 0.35, + "learning_rate": 0.00013960491141959817, + "loss": 1.0698, + "step": 6455 + }, + { + "epoch": 0.35, + "learning_rate": 0.00013952213477182025, + "loss": 1.1647, + "step": 6460 + }, + { + "epoch": 0.35, + "learning_rate": 0.00013943932601812986, + "loss": 1.0381, + "step": 6465 + }, + { + "epoch": 0.35, + "learning_rate": 0.00013935648522579695, + "loss": 1.0488, + "step": 6470 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001392736124621175, + "loss": 1.1777, + "step": 6475 + }, + { + "epoch": 0.35, + "learning_rate": 0.00013919070779441333, + "loss": 1.1648, + "step": 6480 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013910777129003235, + "loss": 1.1034, + "step": 6485 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013902480301634818, + "loss": 1.0858, + "step": 6490 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001389418030407604, + "loss": 1.2123, + "step": 6495 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001388587714306942, + "loss": 1.0523, + "step": 6500 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013877570825360057, + "loss": 1.1354, + "step": 6505 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001386926135769561, + "loss": 1.1546, + "step": 6510 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013860948746826295, + "loss": 1.1443, + "step": 6515 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001385263299950489, + "loss": 1.1738, + "step": 6520 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013844314122486704, + "loss": 1.1277, + "step": 6525 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001383599212252961, + "loss": 1.1733, + "step": 6530 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013827667006393994, + "loss": 1.139, + "step": 6535 + }, + { + "epoch": 0.36, + "learning_rate": 0.000138193387808428, + "loss": 1.1888, + "step": 6540 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001381100745264148, + "loss": 1.1492, + "step": 6545 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013802673028558005, + "loss": 1.0426, + "step": 6550 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013794335515362877, + "loss": 1.0669, + "step": 6555 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001378599491982909, + "loss": 1.1074, + "step": 6560 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013777651248732148, + "loss": 1.1361, + "step": 6565 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013769304508850063, + "loss": 1.1386, + "step": 6570 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013760954706963324, + "loss": 1.1578, + "step": 6575 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001375260184985491, + "loss": 1.0831, + "step": 6580 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013744245944310305, + "loss": 1.1934, + "step": 6585 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013735886997117439, + "loss": 1.0961, + "step": 6590 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013727525015066726, + "loss": 1.0284, + "step": 6595 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013719160004951045, + "loss": 1.1238, + "step": 6600 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013710791973565734, + "loss": 1.1232, + "step": 6605 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013702420927708586, + "loss": 1.1575, + "step": 6610 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013694046874179844, + "loss": 1.065, + "step": 6615 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013685669819782196, + "loss": 1.2215, + "step": 6620 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013677289771320755, + "loss": 1.1194, + "step": 6625 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001366890673560309, + "loss": 1.1326, + "step": 6630 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013660520719439165, + "loss": 1.0888, + "step": 6635 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013652131729641393, + "loss": 1.0788, + "step": 6640 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001364373977302459, + "loss": 1.1553, + "step": 6645 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013635344856405995, + "loss": 1.21, + "step": 6650 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013626946986605224, + "loss": 1.202, + "step": 6655 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013618546170444322, + "loss": 1.0331, + "step": 6660 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013610142414747709, + "loss": 1.1465, + "step": 6665 + }, + { + "epoch": 0.37, + "learning_rate": 0.000136017357263422, + "loss": 1.0962, + "step": 6670 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013593326112056988, + "loss": 1.1186, + "step": 6675 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001358491357872365, + "loss": 1.1885, + "step": 6680 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013576498133176126, + "loss": 1.2042, + "step": 6685 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001356807978225073, + "loss": 1.1453, + "step": 6690 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001355965853278613, + "loss": 1.054, + "step": 6695 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001355123439162335, + "loss": 1.0556, + "step": 6700 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013542807365605764, + "loss": 1.1027, + "step": 6705 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013534377461579093, + "loss": 1.1701, + "step": 6710 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013525944686391386, + "loss": 1.0448, + "step": 6715 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001351750904689303, + "loss": 1.1077, + "step": 6720 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013509070549936746, + "loss": 1.1626, + "step": 6725 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013500629202377565, + "loss": 1.1037, + "step": 6730 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001349218501107284, + "loss": 1.0966, + "step": 6735 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013483737982882235, + "loss": 1.1237, + "step": 6740 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001347528812466771, + "loss": 1.1236, + "step": 6745 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001346683544329354, + "loss": 1.095, + "step": 6750 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013458379945626274, + "loss": 1.0883, + "step": 6755 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013449921638534763, + "loss": 1.1111, + "step": 6760 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013441460528890127, + "loss": 1.152, + "step": 6765 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013432996623565782, + "loss": 1.1415, + "step": 6770 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013424529929437397, + "loss": 1.1539, + "step": 6775 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001341606045338292, + "loss": 1.1401, + "step": 6780 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001340758820228255, + "loss": 1.061, + "step": 6785 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013399113183018744, + "loss": 1.1434, + "step": 6790 + }, + { + "epoch": 0.37, + "learning_rate": 0.000133906354024762, + "loss": 1.114, + "step": 6795 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013382154867541874, + "loss": 1.0926, + "step": 6800 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013373671585104952, + "loss": 1.1791, + "step": 6805 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013365185562056844, + "loss": 1.1145, + "step": 6810 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013356696805291198, + "loss": 1.1149, + "step": 6815 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013348205321703875, + "loss": 1.1729, + "step": 6820 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001333971111819296, + "loss": 1.0368, + "step": 6825 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013331214201658736, + "loss": 1.0923, + "step": 6830 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013322714579003703, + "loss": 1.0915, + "step": 6835 + }, + { + "epoch": 0.37, + "learning_rate": 0.00013314212257132542, + "loss": 1.1232, + "step": 6840 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001330570724295215, + "loss": 1.0825, + "step": 6845 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001329719954337159, + "loss": 1.1598, + "step": 6850 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001328868916530212, + "loss": 1.1328, + "step": 6855 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013280176115657163, + "loss": 1.1662, + "step": 6860 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013271660401352318, + "loss": 1.0361, + "step": 6865 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013263142029305348, + "loss": 1.1472, + "step": 6870 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013254621006436178, + "loss": 1.1309, + "step": 6875 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013246097339666884, + "loss": 1.1358, + "step": 6880 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013237571035921693, + "loss": 1.0482, + "step": 6885 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001322904210212696, + "loss": 1.1341, + "step": 6890 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013220510545211198, + "loss": 1.0845, + "step": 6895 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013211976372105036, + "loss": 1.1466, + "step": 6900 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001320343958974123, + "loss": 1.1789, + "step": 6905 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013194900205054666, + "loss": 1.134, + "step": 6910 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013186358224982325, + "loss": 1.1166, + "step": 6915 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013177813656463312, + "loss": 1.1139, + "step": 6920 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001316926650643883, + "loss": 1.1691, + "step": 6925 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013160716781852186, + "loss": 1.1846, + "step": 6930 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013152164489648767, + "loss": 1.103, + "step": 6935 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013143609636776053, + "loss": 1.1676, + "step": 6940 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013135052230183594, + "loss": 1.1054, + "step": 6945 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001312649227682303, + "loss": 1.1226, + "step": 6950 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001311792978364806, + "loss": 1.0258, + "step": 6955 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013109364757614441, + "loss": 1.1132, + "step": 6960 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013100797205680008, + "loss": 1.0548, + "step": 6965 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013092227134804628, + "loss": 1.0858, + "step": 6970 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013083654551950224, + "loss": 1.1066, + "step": 6975 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013075079464080745, + "loss": 1.1426, + "step": 6980 + }, + { + "epoch": 0.38, + "learning_rate": 0.000130665018781622, + "loss": 1.1513, + "step": 6985 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013057921801162614, + "loss": 1.1525, + "step": 6990 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013049339240052028, + "loss": 1.074, + "step": 6995 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013040754201802508, + "loss": 1.1259, + "step": 7000 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013032166693388144, + "loss": 1.0973, + "step": 7005 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013023576721785013, + "loss": 1.0996, + "step": 7010 + }, + { + "epoch": 0.38, + "learning_rate": 0.000130149842939712, + "loss": 1.0364, + "step": 7015 + }, + { + "epoch": 0.38, + "learning_rate": 0.00013006389416926793, + "loss": 1.1164, + "step": 7020 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001299779209763386, + "loss": 1.0655, + "step": 7025 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012989192343076453, + "loss": 1.0597, + "step": 7030 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001298059016024061, + "loss": 1.0968, + "step": 7035 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001297198555611434, + "loss": 1.1435, + "step": 7040 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012963378537687612, + "loss": 1.0785, + "step": 7045 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012954769111952363, + "loss": 1.1942, + "step": 7050 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012946157285902474, + "loss": 1.1062, + "step": 7055 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012937543066533804, + "loss": 1.0923, + "step": 7060 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012928926460844124, + "loss": 1.1107, + "step": 7065 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001292030747583316, + "loss": 1.1233, + "step": 7070 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001291168611850256, + "loss": 1.1603, + "step": 7075 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001290306239585592, + "loss": 1.0632, + "step": 7080 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012894436314898734, + "loss": 1.1127, + "step": 7085 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012885807882638428, + "loss": 1.139, + "step": 7090 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001287717710608433, + "loss": 1.0544, + "step": 7095 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012868543992247672, + "loss": 1.1379, + "step": 7100 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001285990854814159, + "loss": 1.1058, + "step": 7105 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012851270780781112, + "loss": 1.1533, + "step": 7110 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012842630697183145, + "loss": 1.1677, + "step": 7115 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001283398830436649, + "loss": 1.1072, + "step": 7120 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012825343609351822, + "loss": 1.1045, + "step": 7125 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012816696619161668, + "loss": 1.1754, + "step": 7130 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012808047340820452, + "loss": 1.1206, + "step": 7135 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012799395781354426, + "loss": 1.1517, + "step": 7140 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001279074194779171, + "loss": 1.1024, + "step": 7145 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012782085847162273, + "loss": 1.1469, + "step": 7150 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012773427486497912, + "loss": 1.0741, + "step": 7155 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012764766872832285, + "loss": 1.1489, + "step": 7160 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012756104013200847, + "loss": 1.0851, + "step": 7165 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012747438914640906, + "loss": 1.0699, + "step": 7170 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001273877158419158, + "loss": 1.0624, + "step": 7175 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012730102028893792, + "loss": 1.1018, + "step": 7180 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001272143025579028, + "loss": 1.0596, + "step": 7185 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012712756271925584, + "loss": 1.1285, + "step": 7190 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012704080084346037, + "loss": 1.0589, + "step": 7195 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012695401700099765, + "loss": 1.0736, + "step": 7200 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012686721126236672, + "loss": 1.035, + "step": 7205 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012678038369808448, + "loss": 1.141, + "step": 7210 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012669353437868555, + "loss": 1.1144, + "step": 7215 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012660666337472223, + "loss": 1.1234, + "step": 7220 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012651977075676436, + "loss": 1.1572, + "step": 7225 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012643285659539943, + "loss": 1.1423, + "step": 7230 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001263459209612324, + "loss": 1.1231, + "step": 7235 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001262589639248856, + "loss": 1.1774, + "step": 7240 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012617198555699887, + "loss": 1.0522, + "step": 7245 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012608498592822932, + "loss": 1.0903, + "step": 7250 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012599796510925134, + "loss": 1.0749, + "step": 7255 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001259109231707565, + "loss": 1.1955, + "step": 7260 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012582386018345358, + "loss": 1.0992, + "step": 7265 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012573677621806838, + "loss": 1.1025, + "step": 7270 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012564967134534385, + "loss": 1.1453, + "step": 7275 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001255625456360398, + "loss": 1.1364, + "step": 7280 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012547539916093307, + "loss": 1.1715, + "step": 7285 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012538823199081733, + "loss": 1.035, + "step": 7290 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012530104419650312, + "loss": 1.0472, + "step": 7295 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012521383584881767, + "loss": 1.0919, + "step": 7300 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012512660701860485, + "loss": 1.0935, + "step": 7305 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012503935777672526, + "loss": 1.1187, + "step": 7310 + }, + { + "epoch": 0.4, + "learning_rate": 0.000124952088194056, + "loss": 1.0953, + "step": 7315 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012486479834149087, + "loss": 1.1204, + "step": 7320 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012477748828993994, + "loss": 1.1504, + "step": 7325 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001246901581103298, + "loss": 1.1013, + "step": 7330 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012460280787360327, + "loss": 1.0877, + "step": 7335 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012451543765071968, + "loss": 1.0486, + "step": 7340 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012442804751265437, + "loss": 1.015, + "step": 7345 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012434063753039902, + "loss": 1.211, + "step": 7350 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012425320777496136, + "loss": 1.0825, + "step": 7355 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012416575831736512, + "loss": 1.1927, + "step": 7360 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012407828922865016, + "loss": 1.1035, + "step": 7365 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012399080057987218, + "loss": 1.064, + "step": 7370 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001239032924421029, + "loss": 1.1207, + "step": 7375 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012381576488642969, + "loss": 1.0704, + "step": 7380 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012372821798395587, + "loss": 1.0769, + "step": 7385 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012364065180580033, + "loss": 1.1172, + "step": 7390 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012355306642309783, + "loss": 1.1979, + "step": 7395 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012346546190699845, + "loss": 1.1797, + "step": 7400 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012337783832866802, + "loss": 1.1502, + "step": 7405 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012329019575928773, + "loss": 1.141, + "step": 7410 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012320253427005435, + "loss": 1.1396, + "step": 7415 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001231148539321798, + "loss": 1.1164, + "step": 7420 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012302715481689154, + "loss": 1.1889, + "step": 7425 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001229394369954322, + "loss": 1.165, + "step": 7430 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001228517005390595, + "loss": 1.1114, + "step": 7435 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012276394551904646, + "loss": 1.1685, + "step": 7440 + }, + { + "epoch": 0.41, + "learning_rate": 0.000122676172006681, + "loss": 1.1438, + "step": 7445 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012258838007326634, + "loss": 1.0657, + "step": 7450 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012250056979012033, + "loss": 1.1096, + "step": 7455 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012241274122857601, + "loss": 1.2047, + "step": 7460 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012232489445998106, + "loss": 1.1617, + "step": 7465 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001222370295556981, + "loss": 1.1167, + "step": 7470 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012214914658710438, + "loss": 1.2007, + "step": 7475 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012206124562559188, + "loss": 1.111, + "step": 7480 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012197332674256717, + "loss": 1.0913, + "step": 7485 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012188539000945141, + "loss": 1.0143, + "step": 7490 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012179743549768025, + "loss": 1.1422, + "step": 7495 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012170946327870377, + "loss": 1.0883, + "step": 7500 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012162147342398645, + "loss": 1.0899, + "step": 7505 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012153346600500702, + "loss": 1.1782, + "step": 7510 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012144544109325861, + "loss": 1.1643, + "step": 7515 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012135739876024845, + "loss": 1.0894, + "step": 7520 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012126933907749802, + "loss": 1.0882, + "step": 7525 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012118126211654275, + "loss": 1.0165, + "step": 7530 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012109316794893234, + "loss": 1.1583, + "step": 7535 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012100505664623017, + "loss": 1.1101, + "step": 7540 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012091692828001379, + "loss": 1.0364, + "step": 7545 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012082878292187446, + "loss": 1.1058, + "step": 7550 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012074062064341734, + "loss": 1.087, + "step": 7555 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012065244151626128, + "loss": 1.1542, + "step": 7560 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012056424561203881, + "loss": 1.0439, + "step": 7565 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012047603300239615, + "loss": 1.1374, + "step": 7570 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012038780375899301, + "loss": 1.144, + "step": 7575 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001202995579535027, + "loss": 1.0997, + "step": 7580 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001202112956576119, + "loss": 1.0783, + "step": 7585 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012012301694302073, + "loss": 1.2033, + "step": 7590 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012003472188144268, + "loss": 1.1214, + "step": 7595 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011994641054460442, + "loss": 1.0755, + "step": 7600 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011985808300424591, + "loss": 1.1237, + "step": 7605 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011976973933212029, + "loss": 1.0691, + "step": 7610 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001196813795999938, + "loss": 0.9849, + "step": 7615 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011959300387964566, + "loss": 1.1364, + "step": 7620 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011950461224286811, + "loss": 1.168, + "step": 7625 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011941620476146639, + "loss": 1.069, + "step": 7630 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011932778150725853, + "loss": 1.1551, + "step": 7635 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001192393425520754, + "loss": 1.2237, + "step": 7640 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011915088796776064, + "loss": 1.09, + "step": 7645 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011906241782617053, + "loss": 1.028, + "step": 7650 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011897393219917407, + "loss": 1.1026, + "step": 7655 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011888543115865275, + "loss": 1.2062, + "step": 7660 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011879691477650066, + "loss": 1.074, + "step": 7665 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001187083831246243, + "loss": 1.0966, + "step": 7670 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011861983627494263, + "loss": 1.1676, + "step": 7675 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011853127429938691, + "loss": 1.1287, + "step": 7680 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011844269726990065, + "loss": 1.1584, + "step": 7685 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011835410525843971, + "loss": 1.0942, + "step": 7690 + }, + { + "epoch": 0.42, + "learning_rate": 0.000118265498336972, + "loss": 1.0862, + "step": 7695 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011817687657747764, + "loss": 1.1383, + "step": 7700 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001180882400519487, + "loss": 1.2052, + "step": 7705 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011799958883238936, + "loss": 1.0728, + "step": 7710 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011791092299081557, + "loss": 1.0966, + "step": 7715 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011782224259925539, + "loss": 1.0579, + "step": 7720 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011773354772974851, + "loss": 1.1445, + "step": 7725 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011764483845434648, + "loss": 1.0766, + "step": 7730 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011755611484511243, + "loss": 1.0613, + "step": 7735 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011746737697412134, + "loss": 1.1561, + "step": 7740 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011737862491345958, + "loss": 1.1509, + "step": 7745 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011728985873522515, + "loss": 1.1618, + "step": 7750 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011720107851152748, + "loss": 1.1325, + "step": 7755 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011711228431448747, + "loss": 1.0781, + "step": 7760 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011702347621623727, + "loss": 1.1156, + "step": 7765 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011693465428892038, + "loss": 1.0532, + "step": 7770 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011684581860469158, + "loss": 1.1063, + "step": 7775 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011675696923571672, + "loss": 1.1312, + "step": 7780 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011666810625417287, + "loss": 1.0498, + "step": 7785 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001165792297322481, + "loss": 1.0892, + "step": 7790 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011649033974214147, + "loss": 1.1588, + "step": 7795 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011640143635606302, + "loss": 1.1111, + "step": 7800 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011631251964623364, + "loss": 1.0888, + "step": 7805 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011622358968488508, + "loss": 1.0872, + "step": 7810 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011613464654425983, + "loss": 1.1512, + "step": 7815 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011604569029661104, + "loss": 1.0845, + "step": 7820 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011595672101420262, + "loss": 1.1367, + "step": 7825 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011586773876930899, + "loss": 1.1181, + "step": 7830 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011577874363421512, + "loss": 1.0756, + "step": 7835 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011568973568121645, + "loss": 1.1412, + "step": 7840 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011560071498261881, + "loss": 1.0736, + "step": 7845 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011551168161073839, + "loss": 1.1778, + "step": 7850 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011542263563790174, + "loss": 1.0603, + "step": 7855 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011533357713644553, + "loss": 1.1548, + "step": 7860 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011524450617871672, + "loss": 1.1176, + "step": 7865 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011515542283707233, + "loss": 1.1566, + "step": 7870 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011506632718387944, + "loss": 1.154, + "step": 7875 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011497721929151517, + "loss": 1.1619, + "step": 7880 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011488809923236655, + "loss": 1.1674, + "step": 7885 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011479896707883044, + "loss": 1.1822, + "step": 7890 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001147098229033137, + "loss": 1.0471, + "step": 7895 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011462066677823272, + "loss": 1.0724, + "step": 7900 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011453149877601381, + "loss": 1.0871, + "step": 7905 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011444231896909281, + "loss": 1.1505, + "step": 7910 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011435312742991515, + "loss": 1.0695, + "step": 7915 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011426392423093583, + "loss": 1.1652, + "step": 7920 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011417470944461934, + "loss": 1.116, + "step": 7925 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011408548314343948, + "loss": 1.1355, + "step": 7930 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011399624539987953, + "loss": 1.1658, + "step": 7935 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011390699628643199, + "loss": 1.1076, + "step": 7940 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011381773587559867, + "loss": 1.1546, + "step": 7945 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011372846423989039, + "loss": 1.1903, + "step": 7950 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011363918145182732, + "loss": 1.0247, + "step": 7955 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011354988758393852, + "loss": 1.1272, + "step": 7960 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011346058270876213, + "loss": 1.0561, + "step": 7965 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001133712668988452, + "loss": 1.1478, + "step": 7970 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011328194022674359, + "loss": 1.1178, + "step": 7975 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001131926027650222, + "loss": 1.1229, + "step": 7980 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011310325458625445, + "loss": 1.0693, + "step": 7985 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011301389576302265, + "loss": 1.1717, + "step": 7990 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001129245263679176, + "loss": 1.1281, + "step": 7995 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011283514647353882, + "loss": 1.1596, + "step": 8000 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011274575615249428, + "loss": 1.1106, + "step": 8005 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011265635547740047, + "loss": 1.0998, + "step": 8010 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011256694452088229, + "loss": 1.1486, + "step": 8015 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011247752335557291, + "loss": 1.2001, + "step": 8020 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011238809205411388, + "loss": 1.1696, + "step": 8025 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011229865068915498, + "loss": 1.0776, + "step": 8030 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011220919933335415, + "loss": 1.1891, + "step": 8035 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011211973805937738, + "loss": 1.118, + "step": 8040 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011203026693989887, + "loss": 1.0707, + "step": 8045 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011194078604760059, + "loss": 1.1028, + "step": 8050 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011185129545517271, + "loss": 1.0917, + "step": 8055 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011176179523531304, + "loss": 1.0812, + "step": 8060 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011167228546072745, + "loss": 1.0958, + "step": 8065 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011158276620412938, + "loss": 1.1227, + "step": 8070 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011149323753824004, + "loss": 1.1266, + "step": 8075 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011140369953578825, + "loss": 1.1608, + "step": 8080 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011131415226951053, + "loss": 1.1437, + "step": 8085 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011122459581215079, + "loss": 1.1049, + "step": 8090 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011113503023646044, + "loss": 1.058, + "step": 8095 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011104545561519839, + "loss": 1.1221, + "step": 8100 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011095587202113073, + "loss": 1.1238, + "step": 8105 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011086627952703098, + "loss": 1.0795, + "step": 8110 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011077667820567983, + "loss": 1.0916, + "step": 8115 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011068706812986519, + "loss": 1.1748, + "step": 8120 + }, + { + "epoch": 0.45, + "learning_rate": 0.000110597449372382, + "loss": 1.0677, + "step": 8125 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011050782200603235, + "loss": 1.0826, + "step": 8130 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011041818610362524, + "loss": 1.1312, + "step": 8135 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011032854173797663, + "loss": 1.1722, + "step": 8140 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011023888898190938, + "loss": 1.1446, + "step": 8145 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011014922790825312, + "loss": 1.0922, + "step": 8150 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011005955858984429, + "loss": 1.1422, + "step": 8155 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010996988109952601, + "loss": 1.1008, + "step": 8160 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010988019551014796, + "loss": 1.1327, + "step": 8165 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010979050189456655, + "loss": 1.1599, + "step": 8170 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010970080032564465, + "loss": 1.1243, + "step": 8175 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001096110908762515, + "loss": 1.0868, + "step": 8180 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010952137361926283, + "loss": 1.1039, + "step": 8185 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010943164862756068, + "loss": 1.1166, + "step": 8190 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010934191597403346, + "loss": 1.1886, + "step": 8195 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010925217573157567, + "loss": 1.1081, + "step": 8200 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010916242797308802, + "loss": 1.0353, + "step": 8205 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010907267277147739, + "loss": 1.0747, + "step": 8210 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010898291019965666, + "loss": 1.1725, + "step": 8215 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010889314033054463, + "loss": 1.1889, + "step": 8220 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010880336323706617, + "loss": 1.083, + "step": 8225 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010871357899215189, + "loss": 1.0201, + "step": 8230 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010862378766873829, + "loss": 1.1199, + "step": 8235 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010853398933976762, + "loss": 1.0812, + "step": 8240 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010844418407818777, + "loss": 1.0576, + "step": 8245 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001083543719569523, + "loss": 1.1362, + "step": 8250 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010826455304902028, + "loss": 1.1578, + "step": 8255 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010817472742735639, + "loss": 1.1307, + "step": 8260 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010808489516493067, + "loss": 1.0221, + "step": 8265 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010799505633471865, + "loss": 1.084, + "step": 8270 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010790521100970109, + "loss": 1.2108, + "step": 8275 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010781535926286415, + "loss": 1.0533, + "step": 8280 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010772550116719907, + "loss": 1.0747, + "step": 8285 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010763563679570236, + "loss": 1.0919, + "step": 8290 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010754576622137557, + "loss": 1.1159, + "step": 8295 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010745588951722532, + "loss": 1.0903, + "step": 8300 + }, + { + "epoch": 0.45, + "learning_rate": 0.00010736600675626317, + "loss": 1.2006, + "step": 8305 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001072761180115056, + "loss": 1.0875, + "step": 8310 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010718622335597404, + "loss": 1.0446, + "step": 8315 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001070963228626946, + "loss": 1.1178, + "step": 8320 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001070064166046982, + "loss": 1.1285, + "step": 8325 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010691650465502045, + "loss": 1.1103, + "step": 8330 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010682658708670152, + "loss": 1.0727, + "step": 8335 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001067366639727862, + "loss": 1.0388, + "step": 8340 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010664673538632383, + "loss": 1.068, + "step": 8345 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010655680140036806, + "loss": 1.0831, + "step": 8350 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010646686208797709, + "loss": 1.1118, + "step": 8355 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010637691752221326, + "loss": 1.0489, + "step": 8360 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010628696777614337, + "loss": 1.2107, + "step": 8365 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010619701292283831, + "loss": 1.1086, + "step": 8370 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010610705303537314, + "loss": 1.1162, + "step": 8375 + }, + { + "epoch": 0.46, + "learning_rate": 0.000106017088186827, + "loss": 1.1369, + "step": 8380 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010592711845028311, + "loss": 1.0564, + "step": 8385 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010583714389882863, + "loss": 1.0543, + "step": 8390 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010574716460555456, + "loss": 1.0828, + "step": 8395 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010565718064355594, + "loss": 1.1165, + "step": 8400 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010556719208593139, + "loss": 1.0853, + "step": 8405 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010547719900578339, + "loss": 1.1088, + "step": 8410 + }, + { + "epoch": 0.46, + "learning_rate": 0.000105387201476218, + "loss": 1.181, + "step": 8415 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010529719957034506, + "loss": 1.0139, + "step": 8420 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010520719336127777, + "loss": 1.0587, + "step": 8425 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010511718292213296, + "loss": 1.114, + "step": 8430 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010502716832603086, + "loss": 1.0848, + "step": 8435 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010493714964609504, + "loss": 1.0236, + "step": 8440 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010484712695545241, + "loss": 1.0697, + "step": 8445 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010475710032723319, + "loss": 1.139, + "step": 8450 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010466706983457074, + "loss": 1.0962, + "step": 8455 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010457703555060156, + "loss": 1.1206, + "step": 8460 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010448699754846526, + "loss": 1.1129, + "step": 8465 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010439695590130442, + "loss": 1.1232, + "step": 8470 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010430691068226464, + "loss": 1.1069, + "step": 8475 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010421686196449439, + "loss": 1.1377, + "step": 8480 + }, + { + "epoch": 0.46, + "learning_rate": 0.00010412680982114497, + "loss": 1.1074, + "step": 8485 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010403675432537048, + "loss": 1.0847, + "step": 8490 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001039466955503278, + "loss": 1.1333, + "step": 8495 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010385663356917636, + "loss": 1.126, + "step": 8500 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010376656845507828, + "loss": 1.1479, + "step": 8505 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001036765002811982, + "loss": 1.1399, + "step": 8510 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001035864291207032, + "loss": 1.0221, + "step": 8515 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001034963550467629, + "loss": 1.0601, + "step": 8520 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010340627813254912, + "loss": 1.1176, + "step": 8525 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001033161984512362, + "loss": 1.0856, + "step": 8530 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010322611607600051, + "loss": 1.1335, + "step": 8535 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010313603108002078, + "loss": 1.2108, + "step": 8540 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010304594353647775, + "loss": 1.14, + "step": 8545 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010295585351855434, + "loss": 1.1015, + "step": 8550 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010286576109943532, + "loss": 1.1345, + "step": 8555 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010277566635230758, + "loss": 1.198, + "step": 8560 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010268556935035981, + "loss": 1.1173, + "step": 8565 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010259547016678258, + "loss": 1.0916, + "step": 8570 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001025053688747682, + "loss": 1.0415, + "step": 8575 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010241526554751064, + "loss": 1.0853, + "step": 8580 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010232516025820567, + "loss": 1.1686, + "step": 8585 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010223505308005045, + "loss": 1.1566, + "step": 8590 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010214494408624389, + "loss": 1.1284, + "step": 8595 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010205483334998624, + "loss": 1.0405, + "step": 8600 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010196472094447919, + "loss": 1.1414, + "step": 8605 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010187460694292579, + "loss": 1.0284, + "step": 8610 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010178449141853041, + "loss": 1.0877, + "step": 8615 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010169437444449861, + "loss": 1.1489, + "step": 8620 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001016042560940372, + "loss": 1.1652, + "step": 8625 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010151413644035399, + "loss": 1.1412, + "step": 8630 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010142401555665801, + "loss": 1.1032, + "step": 8635 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010133389351615913, + "loss": 1.1724, + "step": 8640 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010124377039206829, + "loss": 1.1178, + "step": 8645 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010115364625759724, + "loss": 1.1241, + "step": 8650 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010106352118595853, + "loss": 1.1038, + "step": 8655 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010097339525036557, + "loss": 1.1397, + "step": 8660 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010088326852403228, + "loss": 1.1775, + "step": 8665 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010079314108017346, + "loss": 1.0928, + "step": 8670 + }, + { + "epoch": 0.48, + "learning_rate": 0.00010070301299200436, + "loss": 1.2399, + "step": 8675 + }, + { + "epoch": 0.48, + "learning_rate": 0.00010061288433274078, + "loss": 1.1488, + "step": 8680 + }, + { + "epoch": 0.48, + "learning_rate": 0.00010052275517559896, + "loss": 1.0828, + "step": 8685 + }, + { + "epoch": 0.48, + "learning_rate": 0.00010043262559379559, + "loss": 1.1745, + "step": 8690 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001003424956605476, + "loss": 1.1657, + "step": 8695 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001002523654490724, + "loss": 1.1242, + "step": 8700 + }, + { + "epoch": 0.48, + "learning_rate": 0.00010016223503258743, + "loss": 1.1512, + "step": 8705 + }, + { + "epoch": 0.48, + "learning_rate": 0.00010007210448431039, + "loss": 1.0913, + "step": 8710 + }, + { + "epoch": 0.48, + "learning_rate": 9.998197387745908e-05, + "loss": 1.1523, + "step": 8715 + }, + { + "epoch": 0.48, + "learning_rate": 9.989184328525132e-05, + "loss": 1.1509, + "step": 8720 + }, + { + "epoch": 0.48, + "learning_rate": 9.9801712780905e-05, + "loss": 1.11, + "step": 8725 + }, + { + "epoch": 0.48, + "learning_rate": 9.97115824376378e-05, + "loss": 1.0863, + "step": 8730 + }, + { + "epoch": 0.48, + "learning_rate": 9.962145232866739e-05, + "loss": 1.1648, + "step": 8735 + }, + { + "epoch": 0.48, + "learning_rate": 9.953132252721119e-05, + "loss": 1.1273, + "step": 8740 + }, + { + "epoch": 0.48, + "learning_rate": 9.944119310648642e-05, + "loss": 1.1452, + "step": 8745 + }, + { + "epoch": 0.48, + "learning_rate": 9.93510641397099e-05, + "loss": 1.158, + "step": 8750 + }, + { + "epoch": 0.48, + "learning_rate": 9.92609357000982e-05, + "loss": 1.1209, + "step": 8755 + }, + { + "epoch": 0.48, + "learning_rate": 9.917080786086734e-05, + "loss": 1.1773, + "step": 8760 + }, + { + "epoch": 0.48, + "learning_rate": 9.908068069523296e-05, + "loss": 1.153, + "step": 8765 + }, + { + "epoch": 0.48, + "learning_rate": 9.899055427641009e-05, + "loss": 1.0285, + "step": 8770 + }, + { + "epoch": 0.48, + "learning_rate": 9.890042867761317e-05, + "loss": 1.0746, + "step": 8775 + }, + { + "epoch": 0.48, + "learning_rate": 9.881030397205597e-05, + "loss": 1.0845, + "step": 8780 + }, + { + "epoch": 0.48, + "learning_rate": 9.872018023295152e-05, + "loss": 1.0393, + "step": 8785 + }, + { + "epoch": 0.48, + "learning_rate": 9.863005753351213e-05, + "loss": 1.004, + "step": 8790 + }, + { + "epoch": 0.48, + "learning_rate": 9.853993594694917e-05, + "loss": 1.1829, + "step": 8795 + }, + { + "epoch": 0.48, + "learning_rate": 9.84498155464732e-05, + "loss": 1.1199, + "step": 8800 + }, + { + "epoch": 0.48, + "learning_rate": 9.83596964052937e-05, + "loss": 1.0783, + "step": 8805 + }, + { + "epoch": 0.48, + "learning_rate": 9.826957859661926e-05, + "loss": 1.1083, + "step": 8810 + }, + { + "epoch": 0.48, + "learning_rate": 9.817946219365726e-05, + "loss": 1.086, + "step": 8815 + }, + { + "epoch": 0.48, + "learning_rate": 9.80893472696141e-05, + "loss": 1.057, + "step": 8820 + }, + { + "epoch": 0.48, + "learning_rate": 9.799923389769476e-05, + "loss": 1.0881, + "step": 8825 + }, + { + "epoch": 0.48, + "learning_rate": 9.790912215110317e-05, + "loss": 1.1337, + "step": 8830 + }, + { + "epoch": 0.48, + "learning_rate": 9.78190121030418e-05, + "loss": 1.1191, + "step": 8835 + }, + { + "epoch": 0.48, + "learning_rate": 9.772890382671183e-05, + "loss": 1.1182, + "step": 8840 + }, + { + "epoch": 0.48, + "learning_rate": 9.763879739531298e-05, + "loss": 1.1848, + "step": 8845 + }, + { + "epoch": 0.48, + "learning_rate": 9.754869288204335e-05, + "loss": 1.1397, + "step": 8850 + }, + { + "epoch": 0.49, + "learning_rate": 9.74585903600997e-05, + "loss": 1.1849, + "step": 8855 + }, + { + "epoch": 0.49, + "learning_rate": 9.736848990267696e-05, + "loss": 1.1042, + "step": 8860 + }, + { + "epoch": 0.49, + "learning_rate": 9.727839158296855e-05, + "loss": 1.1374, + "step": 8865 + }, + { + "epoch": 0.49, + "learning_rate": 9.718829547416604e-05, + "loss": 1.0807, + "step": 8870 + }, + { + "epoch": 0.49, + "learning_rate": 9.70982016494593e-05, + "loss": 1.1148, + "step": 8875 + }, + { + "epoch": 0.49, + "learning_rate": 9.700811018203617e-05, + "loss": 1.157, + "step": 8880 + }, + { + "epoch": 0.49, + "learning_rate": 9.691802114508287e-05, + "loss": 1.0911, + "step": 8885 + }, + { + "epoch": 0.49, + "learning_rate": 9.682793461178332e-05, + "loss": 1.095, + "step": 8890 + }, + { + "epoch": 0.49, + "learning_rate": 9.673785065531967e-05, + "loss": 1.0803, + "step": 8895 + }, + { + "epoch": 0.49, + "learning_rate": 9.664776934887174e-05, + "loss": 1.1453, + "step": 8900 + }, + { + "epoch": 0.49, + "learning_rate": 9.655769076561741e-05, + "loss": 1.1647, + "step": 8905 + }, + { + "epoch": 0.49, + "learning_rate": 9.646761497873227e-05, + "loss": 1.1342, + "step": 8910 + }, + { + "epoch": 0.49, + "learning_rate": 9.637754206138958e-05, + "loss": 1.1248, + "step": 8915 + }, + { + "epoch": 0.49, + "learning_rate": 9.628747208676034e-05, + "loss": 1.0698, + "step": 8920 + }, + { + "epoch": 0.49, + "learning_rate": 9.619740512801313e-05, + "loss": 1.0617, + "step": 8925 + }, + { + "epoch": 0.49, + "learning_rate": 9.610734125831408e-05, + "loss": 1.0975, + "step": 8930 + }, + { + "epoch": 0.49, + "learning_rate": 9.601728055082683e-05, + "loss": 1.0645, + "step": 8935 + }, + { + "epoch": 0.49, + "learning_rate": 9.592722307871245e-05, + "loss": 1.0875, + "step": 8940 + }, + { + "epoch": 0.49, + "learning_rate": 9.58371689151293e-05, + "loss": 1.1371, + "step": 8945 + }, + { + "epoch": 0.49, + "learning_rate": 9.574711813323325e-05, + "loss": 1.0814, + "step": 8950 + }, + { + "epoch": 0.49, + "learning_rate": 9.565707080617716e-05, + "loss": 1.1898, + "step": 8955 + }, + { + "epoch": 0.49, + "learning_rate": 9.55670270071113e-05, + "loss": 1.0478, + "step": 8960 + }, + { + "epoch": 0.49, + "learning_rate": 9.547698680918297e-05, + "loss": 1.0517, + "step": 8965 + }, + { + "epoch": 0.49, + "learning_rate": 9.538695028553656e-05, + "loss": 1.1166, + "step": 8970 + }, + { + "epoch": 0.49, + "learning_rate": 9.529691750931351e-05, + "loss": 1.136, + "step": 8975 + }, + { + "epoch": 0.49, + "learning_rate": 9.520688855365215e-05, + "loss": 1.1232, + "step": 8980 + }, + { + "epoch": 0.49, + "learning_rate": 9.511686349168777e-05, + "loss": 1.1879, + "step": 8985 + }, + { + "epoch": 0.49, + "learning_rate": 9.502684239655242e-05, + "loss": 1.1094, + "step": 8990 + }, + { + "epoch": 0.49, + "learning_rate": 9.493682534137506e-05, + "loss": 1.075, + "step": 8995 + }, + { + "epoch": 0.49, + "learning_rate": 9.484681239928122e-05, + "loss": 1.1481, + "step": 9000 + }, + { + "epoch": 0.49, + "learning_rate": 9.475680364339322e-05, + "loss": 1.0817, + "step": 9005 + }, + { + "epoch": 0.49, + "learning_rate": 9.466679914682984e-05, + "loss": 1.1945, + "step": 9010 + }, + { + "epoch": 0.49, + "learning_rate": 9.457679898270654e-05, + "loss": 1.0667, + "step": 9015 + }, + { + "epoch": 0.49, + "learning_rate": 9.448680322413513e-05, + "loss": 1.0896, + "step": 9020 + }, + { + "epoch": 0.49, + "learning_rate": 9.439681194422398e-05, + "loss": 1.0759, + "step": 9025 + }, + { + "epoch": 0.49, + "learning_rate": 9.430682521607768e-05, + "loss": 1.104, + "step": 9030 + }, + { + "epoch": 0.49, + "learning_rate": 9.421684311279724e-05, + "loss": 1.1019, + "step": 9035 + }, + { + "epoch": 0.5, + "learning_rate": 9.412686570747982e-05, + "loss": 1.1248, + "step": 9040 + }, + { + "epoch": 0.5, + "learning_rate": 9.403689307321886e-05, + "loss": 1.1526, + "step": 9045 + }, + { + "epoch": 0.5, + "learning_rate": 9.394692528310385e-05, + "loss": 1.1475, + "step": 9050 + }, + { + "epoch": 0.5, + "learning_rate": 9.385696241022034e-05, + "loss": 1.0939, + "step": 9055 + }, + { + "epoch": 0.5, + "learning_rate": 9.376700452764995e-05, + "loss": 1.1197, + "step": 9060 + }, + { + "epoch": 0.5, + "learning_rate": 9.367705170847017e-05, + "loss": 1.0349, + "step": 9065 + }, + { + "epoch": 0.5, + "learning_rate": 9.358710402575445e-05, + "loss": 1.1744, + "step": 9070 + }, + { + "epoch": 0.5, + "learning_rate": 9.349716155257198e-05, + "loss": 1.1197, + "step": 9075 + }, + { + "epoch": 0.5, + "learning_rate": 9.340722436198785e-05, + "loss": 1.137, + "step": 9080 + }, + { + "epoch": 0.5, + "learning_rate": 9.331729252706271e-05, + "loss": 1.1218, + "step": 9085 + }, + { + "epoch": 0.5, + "learning_rate": 9.322736612085298e-05, + "loss": 1.1465, + "step": 9090 + }, + { + "epoch": 0.5, + "learning_rate": 9.313744521641057e-05, + "loss": 1.1061, + "step": 9095 + }, + { + "epoch": 0.5, + "learning_rate": 9.3047529886783e-05, + "loss": 1.1017, + "step": 9100 + }, + { + "epoch": 0.5, + "learning_rate": 9.295762020501321e-05, + "loss": 1.129, + "step": 9105 + }, + { + "epoch": 0.5, + "learning_rate": 9.286771624413959e-05, + "loss": 1.1193, + "step": 9110 + }, + { + "epoch": 0.5, + "learning_rate": 9.277781807719585e-05, + "loss": 1.0572, + "step": 9115 + }, + { + "epoch": 0.5, + "learning_rate": 9.268792577721101e-05, + "loss": 1.1119, + "step": 9120 + }, + { + "epoch": 0.5, + "learning_rate": 9.259803941720935e-05, + "loss": 1.1245, + "step": 9125 + }, + { + "epoch": 0.5, + "learning_rate": 9.250815907021021e-05, + "loss": 1.1375, + "step": 9130 + }, + { + "epoch": 0.5, + "learning_rate": 9.241828480922824e-05, + "loss": 1.1915, + "step": 9135 + }, + { + "epoch": 0.5, + "learning_rate": 9.232841670727296e-05, + "loss": 1.2246, + "step": 9140 + }, + { + "epoch": 0.5, + "learning_rate": 9.223855483734902e-05, + "loss": 1.0714, + "step": 9145 + }, + { + "epoch": 0.5, + "learning_rate": 9.21486992724559e-05, + "loss": 1.1096, + "step": 9150 + }, + { + "epoch": 0.5, + "learning_rate": 9.205885008558804e-05, + "loss": 1.1561, + "step": 9155 + }, + { + "epoch": 0.5, + "learning_rate": 9.196900734973467e-05, + "loss": 1.1509, + "step": 9160 + }, + { + "epoch": 0.5, + "learning_rate": 9.187917113787978e-05, + "loss": 1.1323, + "step": 9165 + }, + { + "epoch": 0.5, + "learning_rate": 9.1789341523002e-05, + "loss": 1.2388, + "step": 9170 + }, + { + "epoch": 0.5, + "learning_rate": 9.169951857807473e-05, + "loss": 1.0981, + "step": 9175 + }, + { + "epoch": 0.5, + "learning_rate": 9.160970237606589e-05, + "loss": 1.1694, + "step": 9180 + }, + { + "epoch": 0.5, + "learning_rate": 9.151989298993788e-05, + "loss": 1.1463, + "step": 9185 + }, + { + "epoch": 0.5, + "learning_rate": 9.143009049264758e-05, + "loss": 1.1138, + "step": 9190 + }, + { + "epoch": 0.5, + "learning_rate": 9.13402949571463e-05, + "loss": 1.1081, + "step": 9195 + }, + { + "epoch": 0.5, + "learning_rate": 9.125050645637973e-05, + "loss": 1.1866, + "step": 9200 + }, + { + "epoch": 0.5, + "learning_rate": 9.116072506328772e-05, + "loss": 1.1086, + "step": 9205 + }, + { + "epoch": 0.5, + "learning_rate": 9.107095085080447e-05, + "loss": 1.1476, + "step": 9210 + }, + { + "epoch": 0.5, + "learning_rate": 9.098118389185831e-05, + "loss": 1.165, + "step": 9215 + }, + { + "epoch": 0.51, + "learning_rate": 9.08914242593717e-05, + "loss": 1.0506, + "step": 9220 + }, + { + "epoch": 0.51, + "learning_rate": 9.080167202626106e-05, + "loss": 1.1633, + "step": 9225 + }, + { + "epoch": 0.51, + "learning_rate": 9.071192726543688e-05, + "loss": 1.1322, + "step": 9230 + }, + { + "epoch": 0.51, + "learning_rate": 9.062219004980355e-05, + "loss": 1.0703, + "step": 9235 + }, + { + "epoch": 0.51, + "learning_rate": 9.053246045225935e-05, + "loss": 1.1341, + "step": 9240 + }, + { + "epoch": 0.51, + "learning_rate": 9.044273854569635e-05, + "loss": 1.1245, + "step": 9245 + }, + { + "epoch": 0.51, + "learning_rate": 9.035302440300037e-05, + "loss": 1.0769, + "step": 9250 + }, + { + "epoch": 0.51, + "learning_rate": 9.026331809705098e-05, + "loss": 1.1332, + "step": 9255 + }, + { + "epoch": 0.51, + "learning_rate": 9.017361970072127e-05, + "loss": 1.0415, + "step": 9260 + }, + { + "epoch": 0.51, + "learning_rate": 9.008392928687806e-05, + "loss": 1.0913, + "step": 9265 + }, + { + "epoch": 0.51, + "learning_rate": 8.999424692838153e-05, + "loss": 1.219, + "step": 9270 + }, + { + "epoch": 0.51, + "learning_rate": 8.99045726980854e-05, + "loss": 1.0861, + "step": 9275 + }, + { + "epoch": 0.51, + "learning_rate": 8.981490666883679e-05, + "loss": 1.2252, + "step": 9280 + }, + { + "epoch": 0.51, + "learning_rate": 8.972524891347613e-05, + "loss": 1.0959, + "step": 9285 + }, + { + "epoch": 0.51, + "learning_rate": 8.963559950483714e-05, + "loss": 1.1034, + "step": 9290 + }, + { + "epoch": 0.51, + "learning_rate": 8.954595851574678e-05, + "loss": 1.098, + "step": 9295 + }, + { + "epoch": 0.51, + "learning_rate": 8.94563260190251e-05, + "loss": 1.1551, + "step": 9300 + }, + { + "epoch": 0.51, + "learning_rate": 8.936670208748532e-05, + "loss": 1.1954, + "step": 9305 + }, + { + "epoch": 0.51, + "learning_rate": 8.927708679393374e-05, + "loss": 1.1415, + "step": 9310 + }, + { + "epoch": 0.51, + "learning_rate": 8.918748021116947e-05, + "loss": 1.0772, + "step": 9315 + }, + { + "epoch": 0.51, + "learning_rate": 8.909788241198477e-05, + "loss": 1.1076, + "step": 9320 + }, + { + "epoch": 0.51, + "learning_rate": 8.900829346916456e-05, + "loss": 1.0871, + "step": 9325 + }, + { + "epoch": 0.51, + "learning_rate": 8.891871345548674e-05, + "loss": 1.1126, + "step": 9330 + }, + { + "epoch": 0.51, + "learning_rate": 8.88291424437218e-05, + "loss": 1.0862, + "step": 9335 + }, + { + "epoch": 0.51, + "learning_rate": 8.873958050663306e-05, + "loss": 1.1689, + "step": 9340 + }, + { + "epoch": 0.51, + "learning_rate": 8.865002771697636e-05, + "loss": 1.1083, + "step": 9345 + }, + { + "epoch": 0.51, + "learning_rate": 8.856048414750016e-05, + "loss": 1.1504, + "step": 9350 + }, + { + "epoch": 0.51, + "learning_rate": 8.847094987094539e-05, + "loss": 1.0532, + "step": 9355 + }, + { + "epoch": 0.51, + "learning_rate": 8.838142496004555e-05, + "loss": 1.0392, + "step": 9360 + }, + { + "epoch": 0.51, + "learning_rate": 8.829190948752635e-05, + "loss": 1.1317, + "step": 9365 + }, + { + "epoch": 0.51, + "learning_rate": 8.820240352610594e-05, + "loss": 1.1114, + "step": 9370 + }, + { + "epoch": 0.51, + "learning_rate": 8.811290714849475e-05, + "loss": 1.1714, + "step": 9375 + }, + { + "epoch": 0.51, + "learning_rate": 8.802342042739538e-05, + "loss": 1.1133, + "step": 9380 + }, + { + "epoch": 0.51, + "learning_rate": 8.793394343550268e-05, + "loss": 1.1332, + "step": 9385 + }, + { + "epoch": 0.51, + "learning_rate": 8.784447624550344e-05, + "loss": 1.1295, + "step": 9390 + }, + { + "epoch": 0.51, + "learning_rate": 8.775501893007663e-05, + "loss": 1.1029, + "step": 9395 + }, + { + "epoch": 0.51, + "learning_rate": 8.766557156189304e-05, + "loss": 1.1589, + "step": 9400 + }, + { + "epoch": 0.52, + "learning_rate": 8.75761342136156e-05, + "loss": 1.1283, + "step": 9405 + }, + { + "epoch": 0.52, + "learning_rate": 8.748670695789893e-05, + "loss": 1.0744, + "step": 9410 + }, + { + "epoch": 0.52, + "learning_rate": 8.739728986738951e-05, + "loss": 1.1723, + "step": 9415 + }, + { + "epoch": 0.52, + "learning_rate": 8.730788301472553e-05, + "loss": 1.171, + "step": 9420 + }, + { + "epoch": 0.52, + "learning_rate": 8.72184864725369e-05, + "loss": 1.1209, + "step": 9425 + }, + { + "epoch": 0.52, + "learning_rate": 8.712910031344516e-05, + "loss": 1.0965, + "step": 9430 + }, + { + "epoch": 0.52, + "learning_rate": 8.70397246100634e-05, + "loss": 1.1513, + "step": 9435 + }, + { + "epoch": 0.52, + "learning_rate": 8.695035943499618e-05, + "loss": 1.1732, + "step": 9440 + }, + { + "epoch": 0.52, + "learning_rate": 8.686100486083955e-05, + "loss": 1.1174, + "step": 9445 + }, + { + "epoch": 0.52, + "learning_rate": 8.677166096018097e-05, + "loss": 1.1186, + "step": 9450 + }, + { + "epoch": 0.52, + "learning_rate": 8.668232780559915e-05, + "loss": 1.1368, + "step": 9455 + }, + { + "epoch": 0.52, + "learning_rate": 8.659300546966415e-05, + "loss": 1.076, + "step": 9460 + }, + { + "epoch": 0.52, + "learning_rate": 8.650369402493722e-05, + "loss": 1.131, + "step": 9465 + }, + { + "epoch": 0.52, + "learning_rate": 8.641439354397073e-05, + "loss": 1.1503, + "step": 9470 + }, + { + "epoch": 0.52, + "learning_rate": 8.632510409930815e-05, + "loss": 1.1177, + "step": 9475 + }, + { + "epoch": 0.52, + "learning_rate": 8.623582576348406e-05, + "loss": 1.0798, + "step": 9480 + }, + { + "epoch": 0.52, + "learning_rate": 8.614655860902388e-05, + "loss": 1.1277, + "step": 9485 + }, + { + "epoch": 0.52, + "learning_rate": 8.605730270844408e-05, + "loss": 1.1412, + "step": 9490 + }, + { + "epoch": 0.52, + "learning_rate": 8.596805813425189e-05, + "loss": 1.1169, + "step": 9495 + }, + { + "epoch": 0.52, + "learning_rate": 8.587882495894543e-05, + "loss": 1.1518, + "step": 9500 + }, + { + "epoch": 0.52, + "learning_rate": 8.578960325501345e-05, + "loss": 1.1035, + "step": 9505 + }, + { + "epoch": 0.52, + "learning_rate": 8.570039309493546e-05, + "loss": 1.1088, + "step": 9510 + }, + { + "epoch": 0.52, + "learning_rate": 8.561119455118163e-05, + "loss": 1.0848, + "step": 9515 + }, + { + "epoch": 0.52, + "learning_rate": 8.552200769621255e-05, + "loss": 1.1476, + "step": 9520 + }, + { + "epoch": 0.52, + "learning_rate": 8.543283260247948e-05, + "loss": 1.0809, + "step": 9525 + }, + { + "epoch": 0.52, + "learning_rate": 8.534366934242396e-05, + "loss": 1.1005, + "step": 9530 + }, + { + "epoch": 0.52, + "learning_rate": 8.525451798847809e-05, + "loss": 1.0762, + "step": 9535 + }, + { + "epoch": 0.52, + "learning_rate": 8.516537861306419e-05, + "loss": 1.1484, + "step": 9540 + }, + { + "epoch": 0.52, + "learning_rate": 8.507625128859484e-05, + "loss": 1.0923, + "step": 9545 + }, + { + "epoch": 0.52, + "learning_rate": 8.498713608747285e-05, + "loss": 1.1191, + "step": 9550 + }, + { + "epoch": 0.52, + "learning_rate": 8.489803308209125e-05, + "loss": 1.1145, + "step": 9555 + }, + { + "epoch": 0.52, + "learning_rate": 8.480894234483307e-05, + "loss": 1.1304, + "step": 9560 + }, + { + "epoch": 0.52, + "learning_rate": 8.471986394807142e-05, + "loss": 1.0836, + "step": 9565 + }, + { + "epoch": 0.52, + "learning_rate": 8.463079796416937e-05, + "loss": 1.1304, + "step": 9570 + }, + { + "epoch": 0.52, + "learning_rate": 8.454174446547995e-05, + "loss": 1.1259, + "step": 9575 + }, + { + "epoch": 0.52, + "learning_rate": 8.445270352434593e-05, + "loss": 1.1105, + "step": 9580 + }, + { + "epoch": 0.53, + "learning_rate": 8.43636752131e-05, + "loss": 1.1828, + "step": 9585 + }, + { + "epoch": 0.53, + "learning_rate": 8.427465960406458e-05, + "loss": 1.1204, + "step": 9590 + }, + { + "epoch": 0.53, + "learning_rate": 8.418565676955167e-05, + "loss": 1.1321, + "step": 9595 + }, + { + "epoch": 0.53, + "learning_rate": 8.409666678186303e-05, + "loss": 1.1252, + "step": 9600 + }, + { + "epoch": 0.53, + "learning_rate": 8.400768971328988e-05, + "loss": 1.1206, + "step": 9605 + }, + { + "epoch": 0.53, + "learning_rate": 8.391872563611298e-05, + "loss": 1.0638, + "step": 9610 + }, + { + "epoch": 0.53, + "learning_rate": 8.382977462260257e-05, + "loss": 1.0795, + "step": 9615 + }, + { + "epoch": 0.53, + "learning_rate": 8.37408367450182e-05, + "loss": 1.203, + "step": 9620 + }, + { + "epoch": 0.53, + "learning_rate": 8.365191207560878e-05, + "loss": 1.0837, + "step": 9625 + }, + { + "epoch": 0.53, + "learning_rate": 8.356300068661259e-05, + "loss": 1.1268, + "step": 9630 + }, + { + "epoch": 0.53, + "learning_rate": 8.347410265025695e-05, + "loss": 1.1756, + "step": 9635 + }, + { + "epoch": 0.53, + "learning_rate": 8.338521803875848e-05, + "loss": 1.0606, + "step": 9640 + }, + { + "epoch": 0.53, + "learning_rate": 8.329634692432279e-05, + "loss": 1.0965, + "step": 9645 + }, + { + "epoch": 0.53, + "learning_rate": 8.320748937914458e-05, + "loss": 1.1075, + "step": 9650 + }, + { + "epoch": 0.53, + "learning_rate": 8.311864547540753e-05, + "loss": 1.0918, + "step": 9655 + }, + { + "epoch": 0.53, + "learning_rate": 8.302981528528417e-05, + "loss": 1.1592, + "step": 9660 + }, + { + "epoch": 0.53, + "learning_rate": 8.294099888093603e-05, + "loss": 1.1859, + "step": 9665 + }, + { + "epoch": 0.53, + "learning_rate": 8.285219633451333e-05, + "loss": 1.1572, + "step": 9670 + }, + { + "epoch": 0.53, + "learning_rate": 8.276340771815502e-05, + "loss": 1.1321, + "step": 9675 + }, + { + "epoch": 0.53, + "learning_rate": 8.267463310398876e-05, + "loss": 1.0831, + "step": 9680 + }, + { + "epoch": 0.53, + "learning_rate": 8.25858725641309e-05, + "loss": 1.147, + "step": 9685 + }, + { + "epoch": 0.53, + "learning_rate": 8.249712617068629e-05, + "loss": 1.1761, + "step": 9690 + }, + { + "epoch": 0.53, + "learning_rate": 8.240839399574831e-05, + "loss": 1.1747, + "step": 9695 + }, + { + "epoch": 0.53, + "learning_rate": 8.231967611139874e-05, + "loss": 1.1002, + "step": 9700 + }, + { + "epoch": 0.53, + "learning_rate": 8.223097258970783e-05, + "loss": 1.0486, + "step": 9705 + }, + { + "epoch": 0.53, + "learning_rate": 8.21422835027341e-05, + "loss": 1.142, + "step": 9710 + }, + { + "epoch": 0.53, + "learning_rate": 8.205360892252438e-05, + "loss": 1.0942, + "step": 9715 + }, + { + "epoch": 0.53, + "learning_rate": 8.196494892111369e-05, + "loss": 1.1185, + "step": 9720 + }, + { + "epoch": 0.53, + "learning_rate": 8.187630357052522e-05, + "loss": 1.0845, + "step": 9725 + }, + { + "epoch": 0.53, + "learning_rate": 8.178767294277031e-05, + "loss": 1.1787, + "step": 9730 + }, + { + "epoch": 0.53, + "learning_rate": 8.169905710984823e-05, + "loss": 1.1407, + "step": 9735 + }, + { + "epoch": 0.53, + "learning_rate": 8.161045614374632e-05, + "loss": 1.0873, + "step": 9740 + }, + { + "epoch": 0.53, + "learning_rate": 8.152187011643978e-05, + "loss": 1.0734, + "step": 9745 + }, + { + "epoch": 0.53, + "learning_rate": 8.143329909989175e-05, + "loss": 1.1591, + "step": 9750 + }, + { + "epoch": 0.53, + "learning_rate": 8.134474316605311e-05, + "loss": 1.0711, + "step": 9755 + }, + { + "epoch": 0.53, + "learning_rate": 8.125620238686253e-05, + "loss": 1.0927, + "step": 9760 + }, + { + "epoch": 0.53, + "learning_rate": 8.11676768342463e-05, + "loss": 1.1376, + "step": 9765 + }, + { + "epoch": 0.54, + "learning_rate": 8.107916658011847e-05, + "loss": 1.1452, + "step": 9770 + }, + { + "epoch": 0.54, + "learning_rate": 8.09906716963805e-05, + "loss": 1.1001, + "step": 9775 + }, + { + "epoch": 0.54, + "learning_rate": 8.090219225492153e-05, + "loss": 1.0564, + "step": 9780 + }, + { + "epoch": 0.54, + "learning_rate": 8.081372832761798e-05, + "loss": 1.0939, + "step": 9785 + }, + { + "epoch": 0.54, + "learning_rate": 8.072527998633381e-05, + "loss": 1.1263, + "step": 9790 + }, + { + "epoch": 0.54, + "learning_rate": 8.063684730292028e-05, + "loss": 1.0566, + "step": 9795 + }, + { + "epoch": 0.54, + "learning_rate": 8.054843034921589e-05, + "loss": 1.1386, + "step": 9800 + }, + { + "epoch": 0.54, + "learning_rate": 8.04600291970464e-05, + "loss": 1.106, + "step": 9805 + }, + { + "epoch": 0.54, + "learning_rate": 8.037164391822472e-05, + "loss": 1.1235, + "step": 9810 + }, + { + "epoch": 0.54, + "learning_rate": 8.02832745845509e-05, + "loss": 1.0541, + "step": 9815 + }, + { + "epoch": 0.54, + "learning_rate": 8.019492126781195e-05, + "loss": 1.1787, + "step": 9820 + }, + { + "epoch": 0.54, + "learning_rate": 8.010658403978197e-05, + "loss": 1.0591, + "step": 9825 + }, + { + "epoch": 0.54, + "learning_rate": 8.001826297222194e-05, + "loss": 1.1217, + "step": 9830 + }, + { + "epoch": 0.54, + "learning_rate": 7.992995813687969e-05, + "loss": 1.0979, + "step": 9835 + }, + { + "epoch": 0.54, + "learning_rate": 7.984166960548991e-05, + "loss": 1.1028, + "step": 9840 + }, + { + "epoch": 0.54, + "learning_rate": 7.975339744977401e-05, + "loss": 1.1215, + "step": 9845 + }, + { + "epoch": 0.54, + "learning_rate": 7.966514174144015e-05, + "loss": 1.1067, + "step": 9850 + }, + { + "epoch": 0.54, + "learning_rate": 7.957690255218304e-05, + "loss": 1.0957, + "step": 9855 + }, + { + "epoch": 0.54, + "learning_rate": 7.948867995368408e-05, + "loss": 1.1645, + "step": 9860 + }, + { + "epoch": 0.54, + "learning_rate": 7.940047401761107e-05, + "loss": 1.154, + "step": 9865 + }, + { + "epoch": 0.54, + "learning_rate": 7.931228481561841e-05, + "loss": 1.1907, + "step": 9870 + }, + { + "epoch": 0.54, + "learning_rate": 7.922411241934678e-05, + "loss": 1.1839, + "step": 9875 + }, + { + "epoch": 0.54, + "learning_rate": 7.91359569004233e-05, + "loss": 1.1389, + "step": 9880 + }, + { + "epoch": 0.54, + "learning_rate": 7.904781833046129e-05, + "loss": 1.124, + "step": 9885 + }, + { + "epoch": 0.54, + "learning_rate": 7.895969678106041e-05, + "loss": 1.1307, + "step": 9890 + }, + { + "epoch": 0.54, + "learning_rate": 7.887159232380639e-05, + "loss": 0.9746, + "step": 9895 + }, + { + "epoch": 0.54, + "learning_rate": 7.878350503027118e-05, + "loss": 1.1074, + "step": 9900 + }, + { + "epoch": 0.54, + "learning_rate": 7.869543497201268e-05, + "loss": 1.1333, + "step": 9905 + }, + { + "epoch": 0.54, + "learning_rate": 7.860738222057488e-05, + "loss": 1.0711, + "step": 9910 + }, + { + "epoch": 0.54, + "learning_rate": 7.851934684748765e-05, + "loss": 1.1405, + "step": 9915 + }, + { + "epoch": 0.54, + "learning_rate": 7.843132892426674e-05, + "loss": 1.0734, + "step": 9920 + }, + { + "epoch": 0.54, + "learning_rate": 7.83433285224138e-05, + "loss": 1.1478, + "step": 9925 + }, + { + "epoch": 0.54, + "learning_rate": 7.825534571341613e-05, + "loss": 1.0633, + "step": 9930 + }, + { + "epoch": 0.54, + "learning_rate": 7.816738056874686e-05, + "loss": 1.1151, + "step": 9935 + }, + { + "epoch": 0.54, + "learning_rate": 7.807943315986465e-05, + "loss": 1.08, + "step": 9940 + }, + { + "epoch": 0.54, + "learning_rate": 7.799150355821388e-05, + "loss": 1.0822, + "step": 9945 + }, + { + "epoch": 0.54, + "learning_rate": 7.790359183522435e-05, + "loss": 1.1019, + "step": 9950 + }, + { + "epoch": 0.55, + "learning_rate": 7.781569806231142e-05, + "loss": 1.1109, + "step": 9955 + }, + { + "epoch": 0.55, + "learning_rate": 7.772782231087577e-05, + "loss": 1.0887, + "step": 9960 + }, + { + "epoch": 0.55, + "learning_rate": 7.763996465230357e-05, + "loss": 1.0661, + "step": 9965 + }, + { + "epoch": 0.55, + "learning_rate": 7.755212515796615e-05, + "loss": 1.0919, + "step": 9970 + }, + { + "epoch": 0.55, + "learning_rate": 7.746430389922026e-05, + "loss": 1.1297, + "step": 9975 + }, + { + "epoch": 0.55, + "learning_rate": 7.737650094740767e-05, + "loss": 1.1043, + "step": 9980 + }, + { + "epoch": 0.55, + "learning_rate": 7.728871637385537e-05, + "loss": 1.175, + "step": 9985 + }, + { + "epoch": 0.55, + "learning_rate": 7.720095024987539e-05, + "loss": 1.0476, + "step": 9990 + }, + { + "epoch": 0.55, + "learning_rate": 7.711320264676476e-05, + "loss": 1.148, + "step": 9995 + }, + { + "epoch": 0.55, + "learning_rate": 7.70254736358055e-05, + "loss": 1.1077, + "step": 10000 + }, + { + "epoch": 0.55, + "learning_rate": 7.693776328826449e-05, + "loss": 1.0244, + "step": 10005 + }, + { + "epoch": 0.55, + "learning_rate": 7.685007167539348e-05, + "loss": 1.1144, + "step": 10010 + }, + { + "epoch": 0.55, + "learning_rate": 7.676239886842898e-05, + "loss": 1.065, + "step": 10015 + }, + { + "epoch": 0.55, + "learning_rate": 7.667474493859225e-05, + "loss": 1.131, + "step": 10020 + }, + { + "epoch": 0.55, + "learning_rate": 7.658710995708917e-05, + "loss": 1.0779, + "step": 10025 + }, + { + "epoch": 0.55, + "learning_rate": 7.649949399511027e-05, + "loss": 1.074, + "step": 10030 + }, + { + "epoch": 0.55, + "learning_rate": 7.641189712383058e-05, + "loss": 1.1598, + "step": 10035 + }, + { + "epoch": 0.55, + "learning_rate": 7.632431941440971e-05, + "loss": 1.1384, + "step": 10040 + }, + { + "epoch": 0.55, + "learning_rate": 7.623676093799159e-05, + "loss": 1.0922, + "step": 10045 + }, + { + "epoch": 0.55, + "learning_rate": 7.614922176570463e-05, + "loss": 1.0746, + "step": 10050 + }, + { + "epoch": 0.55, + "learning_rate": 7.606170196866148e-05, + "loss": 1.071, + "step": 10055 + }, + { + "epoch": 0.55, + "learning_rate": 7.597420161795909e-05, + "loss": 1.1491, + "step": 10060 + }, + { + "epoch": 0.55, + "learning_rate": 7.588672078467862e-05, + "loss": 1.1222, + "step": 10065 + }, + { + "epoch": 0.55, + "learning_rate": 7.579925953988534e-05, + "loss": 1.163, + "step": 10070 + }, + { + "epoch": 0.55, + "learning_rate": 7.571181795462866e-05, + "loss": 1.1735, + "step": 10075 + }, + { + "epoch": 0.55, + "learning_rate": 7.562439609994192e-05, + "loss": 1.1638, + "step": 10080 + }, + { + "epoch": 0.55, + "learning_rate": 7.553699404684255e-05, + "loss": 1.1776, + "step": 10085 + }, + { + "epoch": 0.55, + "learning_rate": 7.544961186633184e-05, + "loss": 1.1405, + "step": 10090 + }, + { + "epoch": 0.55, + "learning_rate": 7.536224962939497e-05, + "loss": 1.0934, + "step": 10095 + }, + { + "epoch": 0.55, + "learning_rate": 7.527490740700084e-05, + "loss": 1.0013, + "step": 10100 + }, + { + "epoch": 0.55, + "learning_rate": 7.518758527010217e-05, + "loss": 1.1097, + "step": 10105 + }, + { + "epoch": 0.55, + "learning_rate": 7.51002832896353e-05, + "loss": 1.0495, + "step": 10110 + }, + { + "epoch": 0.55, + "learning_rate": 7.501300153652027e-05, + "loss": 1.0718, + "step": 10115 + }, + { + "epoch": 0.55, + "learning_rate": 7.492574008166058e-05, + "loss": 1.1874, + "step": 10120 + }, + { + "epoch": 0.55, + "learning_rate": 7.483849899594334e-05, + "loss": 1.089, + "step": 10125 + }, + { + "epoch": 0.55, + "learning_rate": 7.47512783502391e-05, + "loss": 1.0803, + "step": 10130 + }, + { + "epoch": 0.56, + "learning_rate": 7.466407821540177e-05, + "loss": 1.111, + "step": 10135 + }, + { + "epoch": 0.56, + "learning_rate": 7.457689866226864e-05, + "loss": 1.1375, + "step": 10140 + }, + { + "epoch": 0.56, + "learning_rate": 7.448973976166018e-05, + "loss": 1.1459, + "step": 10145 + }, + { + "epoch": 0.56, + "learning_rate": 7.440260158438028e-05, + "loss": 1.0854, + "step": 10150 + }, + { + "epoch": 0.56, + "learning_rate": 7.431548420121575e-05, + "loss": 1.0891, + "step": 10155 + }, + { + "epoch": 0.56, + "learning_rate": 7.42283876829367e-05, + "loss": 1.0713, + "step": 10160 + }, + { + "epoch": 0.56, + "learning_rate": 7.414131210029622e-05, + "loss": 1.1664, + "step": 10165 + }, + { + "epoch": 0.56, + "learning_rate": 7.405425752403037e-05, + "loss": 1.0998, + "step": 10170 + }, + { + "epoch": 0.56, + "learning_rate": 7.396722402485817e-05, + "loss": 1.1358, + "step": 10175 + }, + { + "epoch": 0.56, + "learning_rate": 7.388021167348152e-05, + "loss": 1.1019, + "step": 10180 + }, + { + "epoch": 0.56, + "learning_rate": 7.379322054058515e-05, + "loss": 1.0098, + "step": 10185 + }, + { + "epoch": 0.56, + "learning_rate": 7.370625069683649e-05, + "loss": 1.1124, + "step": 10190 + }, + { + "epoch": 0.56, + "learning_rate": 7.361930221288581e-05, + "loss": 1.0658, + "step": 10195 + }, + { + "epoch": 0.56, + "learning_rate": 7.353237515936588e-05, + "loss": 1.0451, + "step": 10200 + }, + { + "epoch": 0.56, + "learning_rate": 7.344546960689215e-05, + "loss": 1.1221, + "step": 10205 + }, + { + "epoch": 0.56, + "learning_rate": 7.335858562606259e-05, + "loss": 1.103, + "step": 10210 + }, + { + "epoch": 0.56, + "learning_rate": 7.327172328745762e-05, + "loss": 1.0827, + "step": 10215 + }, + { + "epoch": 0.56, + "learning_rate": 7.318488266164012e-05, + "loss": 1.0917, + "step": 10220 + }, + { + "epoch": 0.56, + "learning_rate": 7.309806381915527e-05, + "loss": 1.1364, + "step": 10225 + }, + { + "epoch": 0.56, + "learning_rate": 7.301126683053058e-05, + "loss": 1.1422, + "step": 10230 + }, + { + "epoch": 0.56, + "learning_rate": 7.292449176627586e-05, + "loss": 1.1408, + "step": 10235 + }, + { + "epoch": 0.56, + "learning_rate": 7.283773869688303e-05, + "loss": 1.1349, + "step": 10240 + }, + { + "epoch": 0.56, + "learning_rate": 7.275100769282623e-05, + "loss": 1.0405, + "step": 10245 + }, + { + "epoch": 0.56, + "learning_rate": 7.266429882456157e-05, + "loss": 1.1005, + "step": 10250 + }, + { + "epoch": 0.56, + "learning_rate": 7.257761216252723e-05, + "loss": 1.1948, + "step": 10255 + }, + { + "epoch": 0.56, + "learning_rate": 7.24909477771434e-05, + "loss": 1.0878, + "step": 10260 + }, + { + "epoch": 0.56, + "learning_rate": 7.240430573881205e-05, + "loss": 1.1491, + "step": 10265 + }, + { + "epoch": 0.56, + "learning_rate": 7.23176861179172e-05, + "loss": 1.1067, + "step": 10270 + }, + { + "epoch": 0.56, + "learning_rate": 7.223108898482441e-05, + "loss": 1.112, + "step": 10275 + }, + { + "epoch": 0.56, + "learning_rate": 7.214451440988117e-05, + "loss": 1.1295, + "step": 10280 + }, + { + "epoch": 0.56, + "learning_rate": 7.205796246341652e-05, + "loss": 1.102, + "step": 10285 + }, + { + "epoch": 0.56, + "learning_rate": 7.197143321574119e-05, + "loss": 1.0947, + "step": 10290 + }, + { + "epoch": 0.56, + "learning_rate": 7.188492673714744e-05, + "loss": 1.1683, + "step": 10295 + }, + { + "epoch": 0.56, + "learning_rate": 7.179844309790904e-05, + "loss": 1.1363, + "step": 10300 + }, + { + "epoch": 0.56, + "learning_rate": 7.171198236828122e-05, + "loss": 1.0505, + "step": 10305 + }, + { + "epoch": 0.56, + "learning_rate": 7.16255446185006e-05, + "loss": 1.0529, + "step": 10310 + }, + { + "epoch": 0.56, + "learning_rate": 7.153912991878506e-05, + "loss": 1.0862, + "step": 10315 + }, + { + "epoch": 0.57, + "learning_rate": 7.145273833933388e-05, + "loss": 1.1673, + "step": 10320 + }, + { + "epoch": 0.57, + "learning_rate": 7.136636995032742e-05, + "loss": 1.1195, + "step": 10325 + }, + { + "epoch": 0.57, + "learning_rate": 7.12800248219273e-05, + "loss": 1.0825, + "step": 10330 + }, + { + "epoch": 0.57, + "learning_rate": 7.11937030242763e-05, + "loss": 1.1358, + "step": 10335 + }, + { + "epoch": 0.57, + "learning_rate": 7.110740462749805e-05, + "loss": 1.1831, + "step": 10340 + }, + { + "epoch": 0.57, + "learning_rate": 7.102112970169738e-05, + "loss": 1.0786, + "step": 10345 + }, + { + "epoch": 0.57, + "learning_rate": 7.093487831695985e-05, + "loss": 1.1177, + "step": 10350 + }, + { + "epoch": 0.57, + "learning_rate": 7.08486505433521e-05, + "loss": 1.2008, + "step": 10355 + }, + { + "epoch": 0.57, + "learning_rate": 7.076244645092141e-05, + "loss": 1.1226, + "step": 10360 + }, + { + "epoch": 0.57, + "learning_rate": 7.067626610969602e-05, + "loss": 1.1201, + "step": 10365 + }, + { + "epoch": 0.57, + "learning_rate": 7.059010958968462e-05, + "loss": 1.1061, + "step": 10370 + }, + { + "epoch": 0.57, + "learning_rate": 7.05039769608768e-05, + "loss": 1.1028, + "step": 10375 + }, + { + "epoch": 0.57, + "learning_rate": 7.041786829324258e-05, + "loss": 1.1542, + "step": 10380 + }, + { + "epoch": 0.57, + "learning_rate": 7.033178365673258e-05, + "loss": 1.1056, + "step": 10385 + }, + { + "epoch": 0.57, + "learning_rate": 7.02457231212779e-05, + "loss": 1.1801, + "step": 10390 + }, + { + "epoch": 0.57, + "learning_rate": 7.015968675679003e-05, + "loss": 1.2161, + "step": 10395 + }, + { + "epoch": 0.57, + "learning_rate": 7.007367463316088e-05, + "loss": 1.1018, + "step": 10400 + }, + { + "epoch": 0.57, + "learning_rate": 6.998768682026256e-05, + "loss": 1.1212, + "step": 10405 + }, + { + "epoch": 0.57, + "learning_rate": 6.990172338794759e-05, + "loss": 0.9987, + "step": 10410 + }, + { + "epoch": 0.57, + "learning_rate": 6.981578440604852e-05, + "loss": 1.0532, + "step": 10415 + }, + { + "epoch": 0.57, + "learning_rate": 6.972986994437815e-05, + "loss": 1.1031, + "step": 10420 + }, + { + "epoch": 0.57, + "learning_rate": 6.964398007272927e-05, + "loss": 1.1004, + "step": 10425 + }, + { + "epoch": 0.57, + "learning_rate": 6.955811486087484e-05, + "loss": 1.0524, + "step": 10430 + }, + { + "epoch": 0.57, + "learning_rate": 6.947227437856764e-05, + "loss": 1.0695, + "step": 10435 + }, + { + "epoch": 0.57, + "learning_rate": 6.938645869554042e-05, + "loss": 1.0736, + "step": 10440 + }, + { + "epoch": 0.57, + "learning_rate": 6.930066788150577e-05, + "loss": 1.1102, + "step": 10445 + }, + { + "epoch": 0.57, + "learning_rate": 6.92149020061561e-05, + "loss": 1.1338, + "step": 10450 + }, + { + "epoch": 0.57, + "learning_rate": 6.912916113916357e-05, + "loss": 1.0925, + "step": 10455 + }, + { + "epoch": 0.57, + "learning_rate": 6.904344535017995e-05, + "loss": 1.1167, + "step": 10460 + }, + { + "epoch": 0.57, + "learning_rate": 6.895775470883676e-05, + "loss": 1.0626, + "step": 10465 + }, + { + "epoch": 0.57, + "learning_rate": 6.887208928474502e-05, + "loss": 1.1136, + "step": 10470 + }, + { + "epoch": 0.57, + "learning_rate": 6.878644914749521e-05, + "loss": 1.0736, + "step": 10475 + }, + { + "epoch": 0.57, + "learning_rate": 6.870083436665737e-05, + "loss": 1.0951, + "step": 10480 + }, + { + "epoch": 0.57, + "learning_rate": 6.861524501178084e-05, + "loss": 1.0115, + "step": 10485 + }, + { + "epoch": 0.57, + "learning_rate": 6.852968115239444e-05, + "loss": 1.1083, + "step": 10490 + }, + { + "epoch": 0.57, + "learning_rate": 6.844414285800614e-05, + "loss": 1.0884, + "step": 10495 + }, + { + "epoch": 0.58, + "learning_rate": 6.835863019810325e-05, + "loss": 1.19, + "step": 10500 + }, + { + "epoch": 0.58, + "learning_rate": 6.827314324215216e-05, + "loss": 1.1527, + "step": 10505 + }, + { + "epoch": 0.58, + "learning_rate": 6.818768205959847e-05, + "loss": 1.0989, + "step": 10510 + }, + { + "epoch": 0.58, + "learning_rate": 6.810224671986675e-05, + "loss": 1.1104, + "step": 10515 + }, + { + "epoch": 0.58, + "learning_rate": 6.801683729236071e-05, + "loss": 1.1296, + "step": 10520 + }, + { + "epoch": 0.58, + "learning_rate": 6.793145384646284e-05, + "loss": 1.0106, + "step": 10525 + }, + { + "epoch": 0.58, + "learning_rate": 6.784609645153472e-05, + "loss": 1.0996, + "step": 10530 + }, + { + "epoch": 0.58, + "learning_rate": 6.776076517691654e-05, + "loss": 1.0468, + "step": 10535 + }, + { + "epoch": 0.58, + "learning_rate": 6.76754600919275e-05, + "loss": 1.0788, + "step": 10540 + }, + { + "epoch": 0.58, + "learning_rate": 6.759018126586531e-05, + "loss": 1.0402, + "step": 10545 + }, + { + "epoch": 0.58, + "learning_rate": 6.750492876800655e-05, + "loss": 1.127, + "step": 10550 + }, + { + "epoch": 0.58, + "learning_rate": 6.74197026676062e-05, + "loss": 1.1174, + "step": 10555 + }, + { + "epoch": 0.58, + "learning_rate": 6.733450303389805e-05, + "loss": 1.0851, + "step": 10560 + }, + { + "epoch": 0.58, + "learning_rate": 6.724932993609413e-05, + "loss": 1.1548, + "step": 10565 + }, + { + "epoch": 0.58, + "learning_rate": 6.716418344338511e-05, + "loss": 1.0381, + "step": 10570 + }, + { + "epoch": 0.58, + "learning_rate": 6.707906362493998e-05, + "loss": 1.1459, + "step": 10575 + }, + { + "epoch": 0.58, + "learning_rate": 6.699397054990599e-05, + "loss": 1.1529, + "step": 10580 + }, + { + "epoch": 0.58, + "learning_rate": 6.690890428740877e-05, + "loss": 1.1147, + "step": 10585 + }, + { + "epoch": 0.58, + "learning_rate": 6.682386490655209e-05, + "loss": 1.1362, + "step": 10590 + }, + { + "epoch": 0.58, + "learning_rate": 6.673885247641799e-05, + "loss": 1.1775, + "step": 10595 + }, + { + "epoch": 0.58, + "learning_rate": 6.665386706606646e-05, + "loss": 1.1248, + "step": 10600 + }, + { + "epoch": 0.58, + "learning_rate": 6.656890874453575e-05, + "loss": 1.199, + "step": 10605 + }, + { + "epoch": 0.58, + "learning_rate": 6.648397758084183e-05, + "loss": 1.0962, + "step": 10610 + }, + { + "epoch": 0.58, + "learning_rate": 6.63990736439789e-05, + "loss": 1.1644, + "step": 10615 + }, + { + "epoch": 0.58, + "learning_rate": 6.631419700291875e-05, + "loss": 1.179, + "step": 10620 + }, + { + "epoch": 0.58, + "learning_rate": 6.62293477266113e-05, + "loss": 1.0606, + "step": 10625 + }, + { + "epoch": 0.58, + "learning_rate": 6.614452588398394e-05, + "loss": 1.119, + "step": 10630 + }, + { + "epoch": 0.58, + "learning_rate": 6.605973154394207e-05, + "loss": 1.1478, + "step": 10635 + }, + { + "epoch": 0.58, + "learning_rate": 6.597496477536848e-05, + "loss": 1.1199, + "step": 10640 + }, + { + "epoch": 0.58, + "learning_rate": 6.58902256471238e-05, + "loss": 1.1428, + "step": 10645 + }, + { + "epoch": 0.58, + "learning_rate": 6.580551422804593e-05, + "loss": 1.1196, + "step": 10650 + }, + { + "epoch": 0.58, + "learning_rate": 6.572083058695056e-05, + "loss": 1.0792, + "step": 10655 + }, + { + "epoch": 0.58, + "learning_rate": 6.563617479263061e-05, + "loss": 1.0675, + "step": 10660 + }, + { + "epoch": 0.58, + "learning_rate": 6.555154691385648e-05, + "loss": 1.0904, + "step": 10665 + }, + { + "epoch": 0.58, + "learning_rate": 6.54669470193758e-05, + "loss": 1.071, + "step": 10670 + }, + { + "epoch": 0.58, + "learning_rate": 6.538237517791359e-05, + "loss": 1.0972, + "step": 10675 + }, + { + "epoch": 0.58, + "learning_rate": 6.529783145817193e-05, + "loss": 1.1276, + "step": 10680 + }, + { + "epoch": 0.59, + "learning_rate": 6.521331592883018e-05, + "loss": 1.1124, + "step": 10685 + }, + { + "epoch": 0.59, + "learning_rate": 6.51288286585448e-05, + "loss": 1.0746, + "step": 10690 + }, + { + "epoch": 0.59, + "learning_rate": 6.504436971594916e-05, + "loss": 1.1259, + "step": 10695 + }, + { + "epoch": 0.59, + "learning_rate": 6.495993916965383e-05, + "loss": 1.0786, + "step": 10700 + }, + { + "epoch": 0.59, + "learning_rate": 6.487553708824606e-05, + "loss": 1.1774, + "step": 10705 + }, + { + "epoch": 0.59, + "learning_rate": 6.479116354029023e-05, + "loss": 1.126, + "step": 10710 + }, + { + "epoch": 0.59, + "learning_rate": 6.47068185943273e-05, + "loss": 1.1933, + "step": 10715 + }, + { + "epoch": 0.59, + "learning_rate": 6.462250231887521e-05, + "loss": 1.1841, + "step": 10720 + }, + { + "epoch": 0.59, + "learning_rate": 6.453821478242847e-05, + "loss": 1.1088, + "step": 10725 + }, + { + "epoch": 0.59, + "learning_rate": 6.44539560534583e-05, + "loss": 1.1384, + "step": 10730 + }, + { + "epoch": 0.59, + "learning_rate": 6.436972620041249e-05, + "loss": 1.0844, + "step": 10735 + }, + { + "epoch": 0.59, + "learning_rate": 6.42855252917154e-05, + "loss": 1.0835, + "step": 10740 + }, + { + "epoch": 0.59, + "learning_rate": 6.420135339576784e-05, + "loss": 1.1662, + "step": 10745 + }, + { + "epoch": 0.59, + "learning_rate": 6.411721058094713e-05, + "loss": 1.0605, + "step": 10750 + }, + { + "epoch": 0.59, + "learning_rate": 6.403309691560681e-05, + "loss": 1.1286, + "step": 10755 + }, + { + "epoch": 0.59, + "learning_rate": 6.394901246807695e-05, + "loss": 1.1304, + "step": 10760 + }, + { + "epoch": 0.59, + "learning_rate": 6.386495730666375e-05, + "loss": 1.1059, + "step": 10765 + }, + { + "epoch": 0.59, + "learning_rate": 6.378093149964961e-05, + "loss": 1.0618, + "step": 10770 + }, + { + "epoch": 0.59, + "learning_rate": 6.369693511529314e-05, + "loss": 1.1487, + "step": 10775 + }, + { + "epoch": 0.59, + "learning_rate": 6.361296822182906e-05, + "loss": 1.1809, + "step": 10780 + }, + { + "epoch": 0.59, + "learning_rate": 6.352903088746802e-05, + "loss": 1.0398, + "step": 10785 + }, + { + "epoch": 0.59, + "learning_rate": 6.34451231803968e-05, + "loss": 1.128, + "step": 10790 + }, + { + "epoch": 0.59, + "learning_rate": 6.33612451687781e-05, + "loss": 1.1885, + "step": 10795 + }, + { + "epoch": 0.59, + "learning_rate": 6.327739692075033e-05, + "loss": 1.1196, + "step": 10800 + }, + { + "epoch": 0.59, + "learning_rate": 6.319357850442793e-05, + "loss": 1.0162, + "step": 10805 + }, + { + "epoch": 0.59, + "learning_rate": 6.310978998790096e-05, + "loss": 1.1401, + "step": 10810 + }, + { + "epoch": 0.59, + "learning_rate": 6.30260314392353e-05, + "loss": 1.0988, + "step": 10815 + }, + { + "epoch": 0.59, + "learning_rate": 6.29423029264724e-05, + "loss": 1.1285, + "step": 10820 + }, + { + "epoch": 0.59, + "learning_rate": 6.285860451762935e-05, + "loss": 1.1223, + "step": 10825 + }, + { + "epoch": 0.59, + "learning_rate": 6.277493628069878e-05, + "loss": 1.1253, + "step": 10830 + }, + { + "epoch": 0.59, + "learning_rate": 6.269129828364881e-05, + "loss": 1.1792, + "step": 10835 + }, + { + "epoch": 0.59, + "learning_rate": 6.260769059442296e-05, + "loss": 1.0359, + "step": 10840 + }, + { + "epoch": 0.59, + "learning_rate": 6.252411328094021e-05, + "loss": 1.0991, + "step": 10845 + }, + { + "epoch": 0.59, + "learning_rate": 6.244056641109478e-05, + "loss": 1.0626, + "step": 10850 + }, + { + "epoch": 0.59, + "learning_rate": 6.235705005275622e-05, + "loss": 1.1192, + "step": 10855 + }, + { + "epoch": 0.59, + "learning_rate": 6.227356427376921e-05, + "loss": 1.0149, + "step": 10860 + }, + { + "epoch": 0.6, + "learning_rate": 6.219010914195376e-05, + "loss": 1.1014, + "step": 10865 + }, + { + "epoch": 0.6, + "learning_rate": 6.210668472510472e-05, + "loss": 1.1001, + "step": 10870 + }, + { + "epoch": 0.6, + "learning_rate": 6.20232910909923e-05, + "loss": 1.1319, + "step": 10875 + }, + { + "epoch": 0.6, + "learning_rate": 6.193992830736139e-05, + "loss": 1.0524, + "step": 10880 + }, + { + "epoch": 0.6, + "learning_rate": 6.185659644193211e-05, + "loss": 1.0679, + "step": 10885 + }, + { + "epoch": 0.6, + "learning_rate": 6.177329556239921e-05, + "loss": 1.1015, + "step": 10890 + }, + { + "epoch": 0.6, + "learning_rate": 6.169002573643245e-05, + "loss": 1.1231, + "step": 10895 + }, + { + "epoch": 0.6, + "learning_rate": 6.160678703167629e-05, + "loss": 1.0801, + "step": 10900 + }, + { + "epoch": 0.6, + "learning_rate": 6.15235795157499e-05, + "loss": 1.1222, + "step": 10905 + }, + { + "epoch": 0.6, + "learning_rate": 6.144040325624716e-05, + "loss": 1.0823, + "step": 10910 + }, + { + "epoch": 0.6, + "learning_rate": 6.135725832073645e-05, + "loss": 1.0739, + "step": 10915 + }, + { + "epoch": 0.6, + "learning_rate": 6.12741447767609e-05, + "loss": 1.1289, + "step": 10920 + }, + { + "epoch": 0.6, + "learning_rate": 6.119106269183793e-05, + "loss": 1.0874, + "step": 10925 + }, + { + "epoch": 0.6, + "learning_rate": 6.110801213345952e-05, + "loss": 1.1188, + "step": 10930 + }, + { + "epoch": 0.6, + "learning_rate": 6.102499316909199e-05, + "loss": 1.1673, + "step": 10935 + }, + { + "epoch": 0.6, + "learning_rate": 6.094200586617609e-05, + "loss": 1.1936, + "step": 10940 + }, + { + "epoch": 0.6, + "learning_rate": 6.085905029212668e-05, + "loss": 1.1333, + "step": 10945 + }, + { + "epoch": 0.6, + "learning_rate": 6.077612651433306e-05, + "loss": 1.1491, + "step": 10950 + }, + { + "epoch": 0.6, + "learning_rate": 6.0693234600158456e-05, + "loss": 1.1421, + "step": 10955 + }, + { + "epoch": 0.6, + "learning_rate": 6.061037461694047e-05, + "loss": 1.069, + "step": 10960 + }, + { + "epoch": 0.6, + "learning_rate": 6.05275466319905e-05, + "loss": 1.1936, + "step": 10965 + }, + { + "epoch": 0.6, + "learning_rate": 6.0444750712594206e-05, + "loss": 1.1178, + "step": 10970 + }, + { + "epoch": 0.6, + "learning_rate": 6.036198692601096e-05, + "loss": 1.0836, + "step": 10975 + }, + { + "epoch": 0.6, + "learning_rate": 6.027925533947426e-05, + "loss": 1.1179, + "step": 10980 + }, + { + "epoch": 0.6, + "learning_rate": 6.019655602019122e-05, + "loss": 1.1624, + "step": 10985 + }, + { + "epoch": 0.6, + "learning_rate": 6.0113889035342935e-05, + "loss": 1.1446, + "step": 10990 + }, + { + "epoch": 0.6, + "learning_rate": 6.0031254452084106e-05, + "loss": 1.1677, + "step": 10995 + }, + { + "epoch": 0.6, + "learning_rate": 5.994865233754321e-05, + "loss": 1.1706, + "step": 11000 + }, + { + "epoch": 0.6, + "learning_rate": 5.986608275882224e-05, + "loss": 1.1413, + "step": 11005 + }, + { + "epoch": 0.6, + "learning_rate": 5.978354578299683e-05, + "loss": 1.1753, + "step": 11010 + }, + { + "epoch": 0.6, + "learning_rate": 5.970104147711616e-05, + "loss": 1.1574, + "step": 11015 + }, + { + "epoch": 0.6, + "learning_rate": 5.961856990820276e-05, + "loss": 1.0639, + "step": 11020 + }, + { + "epoch": 0.6, + "learning_rate": 5.9536131143252725e-05, + "loss": 1.0847, + "step": 11025 + }, + { + "epoch": 0.6, + "learning_rate": 5.945372524923526e-05, + "loss": 1.1702, + "step": 11030 + }, + { + "epoch": 0.6, + "learning_rate": 5.9371352293093194e-05, + "loss": 1.205, + "step": 11035 + }, + { + "epoch": 0.6, + "learning_rate": 5.928901234174222e-05, + "loss": 1.1253, + "step": 11040 + }, + { + "epoch": 0.6, + "learning_rate": 5.920670546207162e-05, + "loss": 1.0925, + "step": 11045 + }, + { + "epoch": 0.61, + "learning_rate": 5.912443172094344e-05, + "loss": 1.1342, + "step": 11050 + }, + { + "epoch": 0.61, + "learning_rate": 5.90421911851931e-05, + "loss": 1.1636, + "step": 11055 + }, + { + "epoch": 0.61, + "learning_rate": 5.8959983921628806e-05, + "loss": 1.1584, + "step": 11060 + }, + { + "epoch": 0.61, + "learning_rate": 5.887780999703197e-05, + "loss": 1.0906, + "step": 11065 + }, + { + "epoch": 0.61, + "learning_rate": 5.879566947815669e-05, + "loss": 1.1653, + "step": 11070 + }, + { + "epoch": 0.61, + "learning_rate": 5.871356243173015e-05, + "loss": 1.1211, + "step": 11075 + }, + { + "epoch": 0.61, + "learning_rate": 5.8631488924452114e-05, + "loss": 1.1169, + "step": 11080 + }, + { + "epoch": 0.61, + "learning_rate": 5.85494490229953e-05, + "loss": 1.0977, + "step": 11085 + }, + { + "epoch": 0.61, + "learning_rate": 5.846744279400499e-05, + "loss": 1.1312, + "step": 11090 + }, + { + "epoch": 0.61, + "learning_rate": 5.8385470304099204e-05, + "loss": 1.078, + "step": 11095 + }, + { + "epoch": 0.61, + "learning_rate": 5.8303531619868496e-05, + "loss": 1.0715, + "step": 11100 + }, + { + "epoch": 0.61, + "learning_rate": 5.8221626807876e-05, + "loss": 1.1397, + "step": 11105 + }, + { + "epoch": 0.61, + "learning_rate": 5.813975593465725e-05, + "loss": 1.1986, + "step": 11110 + }, + { + "epoch": 0.61, + "learning_rate": 5.8057919066720335e-05, + "loss": 1.115, + "step": 11115 + }, + { + "epoch": 0.61, + "learning_rate": 5.797611627054563e-05, + "loss": 1.0957, + "step": 11120 + }, + { + "epoch": 0.61, + "learning_rate": 5.789434761258581e-05, + "loss": 1.0986, + "step": 11125 + }, + { + "epoch": 0.61, + "learning_rate": 5.781261315926596e-05, + "loss": 1.1246, + "step": 11130 + }, + { + "epoch": 0.61, + "learning_rate": 5.773091297698318e-05, + "loss": 1.0196, + "step": 11135 + }, + { + "epoch": 0.61, + "learning_rate": 5.7649247132107e-05, + "loss": 1.1791, + "step": 11140 + }, + { + "epoch": 0.61, + "learning_rate": 5.756761569097866e-05, + "loss": 1.135, + "step": 11145 + }, + { + "epoch": 0.61, + "learning_rate": 5.748601871991183e-05, + "loss": 1.2191, + "step": 11150 + }, + { + "epoch": 0.61, + "learning_rate": 5.7404456285191963e-05, + "loss": 1.1379, + "step": 11155 + }, + { + "epoch": 0.61, + "learning_rate": 5.732292845307664e-05, + "loss": 1.1218, + "step": 11160 + }, + { + "epoch": 0.61, + "learning_rate": 5.724143528979505e-05, + "loss": 1.1387, + "step": 11165 + }, + { + "epoch": 0.61, + "learning_rate": 5.715997686154849e-05, + "loss": 1.0241, + "step": 11170 + }, + { + "epoch": 0.61, + "learning_rate": 5.7078553234509864e-05, + "loss": 1.1862, + "step": 11175 + }, + { + "epoch": 0.61, + "learning_rate": 5.699716447482399e-05, + "loss": 1.1326, + "step": 11180 + }, + { + "epoch": 0.61, + "learning_rate": 5.691581064860709e-05, + "loss": 1.1234, + "step": 11185 + }, + { + "epoch": 0.61, + "learning_rate": 5.6834491821947295e-05, + "loss": 1.1737, + "step": 11190 + }, + { + "epoch": 0.61, + "learning_rate": 5.675320806090408e-05, + "loss": 1.0744, + "step": 11195 + }, + { + "epoch": 0.61, + "learning_rate": 5.66719594315086e-05, + "loss": 1.0945, + "step": 11200 + }, + { + "epoch": 0.61, + "learning_rate": 5.6590745999763365e-05, + "loss": 1.0955, + "step": 11205 + }, + { + "epoch": 0.61, + "learning_rate": 5.650956783164233e-05, + "loss": 1.0479, + "step": 11210 + }, + { + "epoch": 0.61, + "learning_rate": 5.6428424993090744e-05, + "loss": 1.1357, + "step": 11215 + }, + { + "epoch": 0.61, + "learning_rate": 5.6347317550025305e-05, + "loss": 1.0897, + "step": 11220 + }, + { + "epoch": 0.61, + "learning_rate": 5.6266245568333815e-05, + "loss": 1.0691, + "step": 11225 + }, + { + "epoch": 0.62, + "learning_rate": 5.6185209113875275e-05, + "loss": 1.1319, + "step": 11230 + }, + { + "epoch": 0.62, + "learning_rate": 5.610420825247994e-05, + "loss": 1.1202, + "step": 11235 + }, + { + "epoch": 0.62, + "learning_rate": 5.602324304994901e-05, + "loss": 1.1487, + "step": 11240 + }, + { + "epoch": 0.62, + "learning_rate": 5.594231357205492e-05, + "loss": 1.1044, + "step": 11245 + }, + { + "epoch": 0.62, + "learning_rate": 5.586141988454076e-05, + "loss": 1.1353, + "step": 11250 + }, + { + "epoch": 0.62, + "learning_rate": 5.578056205312085e-05, + "loss": 1.1617, + "step": 11255 + }, + { + "epoch": 0.62, + "learning_rate": 5.5699740143480226e-05, + "loss": 1.1061, + "step": 11260 + }, + { + "epoch": 0.62, + "learning_rate": 5.561895422127481e-05, + "loss": 1.1968, + "step": 11265 + }, + { + "epoch": 0.62, + "learning_rate": 5.5538204352131275e-05, + "loss": 1.1268, + "step": 11270 + }, + { + "epoch": 0.62, + "learning_rate": 5.545749060164696e-05, + "loss": 1.1069, + "step": 11275 + }, + { + "epoch": 0.62, + "learning_rate": 5.5376813035389874e-05, + "loss": 1.2296, + "step": 11280 + }, + { + "epoch": 0.62, + "learning_rate": 5.5296171718898724e-05, + "loss": 1.0729, + "step": 11285 + }, + { + "epoch": 0.62, + "learning_rate": 5.521556671768267e-05, + "loss": 1.129, + "step": 11290 + }, + { + "epoch": 0.62, + "learning_rate": 5.513499809722139e-05, + "loss": 1.1066, + "step": 11295 + }, + { + "epoch": 0.62, + "learning_rate": 5.505446592296501e-05, + "loss": 1.0274, + "step": 11300 + }, + { + "epoch": 0.62, + "learning_rate": 5.4973970260334096e-05, + "loss": 1.1432, + "step": 11305 + }, + { + "epoch": 0.62, + "learning_rate": 5.48935111747195e-05, + "loss": 1.1238, + "step": 11310 + }, + { + "epoch": 0.62, + "learning_rate": 5.481308873148236e-05, + "loss": 1.1235, + "step": 11315 + }, + { + "epoch": 0.62, + "learning_rate": 5.473270299595405e-05, + "loss": 1.1306, + "step": 11320 + }, + { + "epoch": 0.62, + "learning_rate": 5.465235403343619e-05, + "loss": 1.1266, + "step": 11325 + }, + { + "epoch": 0.62, + "learning_rate": 5.4572041909200406e-05, + "loss": 1.1028, + "step": 11330 + }, + { + "epoch": 0.62, + "learning_rate": 5.449176668848857e-05, + "loss": 1.1316, + "step": 11335 + }, + { + "epoch": 0.62, + "learning_rate": 5.4411528436512414e-05, + "loss": 1.115, + "step": 11340 + }, + { + "epoch": 0.62, + "learning_rate": 5.4331327218453646e-05, + "loss": 1.144, + "step": 11345 + }, + { + "epoch": 0.62, + "learning_rate": 5.425116309946403e-05, + "loss": 1.0982, + "step": 11350 + }, + { + "epoch": 0.62, + "learning_rate": 5.417103614466507e-05, + "loss": 1.1867, + "step": 11355 + }, + { + "epoch": 0.62, + "learning_rate": 5.409094641914812e-05, + "loss": 1.1451, + "step": 11360 + }, + { + "epoch": 0.62, + "learning_rate": 5.4010893987974224e-05, + "loss": 1.1376, + "step": 11365 + }, + { + "epoch": 0.62, + "learning_rate": 5.3930878916174284e-05, + "loss": 1.1039, + "step": 11370 + }, + { + "epoch": 0.62, + "learning_rate": 5.385090126874873e-05, + "loss": 1.1259, + "step": 11375 + }, + { + "epoch": 0.62, + "learning_rate": 5.377096111066758e-05, + "loss": 1.0517, + "step": 11380 + }, + { + "epoch": 0.62, + "learning_rate": 5.369105850687044e-05, + "loss": 1.1109, + "step": 11385 + }, + { + "epoch": 0.62, + "learning_rate": 5.361119352226645e-05, + "loss": 1.0659, + "step": 11390 + }, + { + "epoch": 0.62, + "learning_rate": 5.353136622173414e-05, + "loss": 1.0613, + "step": 11395 + }, + { + "epoch": 0.62, + "learning_rate": 5.345157667012141e-05, + "loss": 1.0763, + "step": 11400 + }, + { + "epoch": 0.62, + "learning_rate": 5.337182493224547e-05, + "loss": 1.1335, + "step": 11405 + }, + { + "epoch": 0.62, + "learning_rate": 5.3292111072892946e-05, + "loss": 1.16, + "step": 11410 + }, + { + "epoch": 0.63, + "learning_rate": 5.321243515681953e-05, + "loss": 1.194, + "step": 11415 + }, + { + "epoch": 0.63, + "learning_rate": 5.313279724875029e-05, + "loss": 1.046, + "step": 11420 + }, + { + "epoch": 0.63, + "learning_rate": 5.3053197413379116e-05, + "loss": 1.0739, + "step": 11425 + }, + { + "epoch": 0.63, + "learning_rate": 5.2973635715369285e-05, + "loss": 1.1689, + "step": 11430 + }, + { + "epoch": 0.63, + "learning_rate": 5.289411221935287e-05, + "loss": 1.1096, + "step": 11435 + }, + { + "epoch": 0.63, + "learning_rate": 5.281462698993106e-05, + "loss": 1.1251, + "step": 11440 + }, + { + "epoch": 0.63, + "learning_rate": 5.2735180091673896e-05, + "loss": 1.1601, + "step": 11445 + }, + { + "epoch": 0.63, + "learning_rate": 5.265577158912023e-05, + "loss": 1.0862, + "step": 11450 + }, + { + "epoch": 0.63, + "learning_rate": 5.257640154677783e-05, + "loss": 1.1508, + "step": 11455 + }, + { + "epoch": 0.63, + "learning_rate": 5.249707002912312e-05, + "loss": 1.1167, + "step": 11460 + }, + { + "epoch": 0.63, + "learning_rate": 5.24177771006013e-05, + "loss": 1.108, + "step": 11465 + }, + { + "epoch": 0.63, + "learning_rate": 5.2338522825626135e-05, + "loss": 1.1568, + "step": 11470 + }, + { + "epoch": 0.63, + "learning_rate": 5.225930726858013e-05, + "loss": 1.131, + "step": 11475 + }, + { + "epoch": 0.63, + "learning_rate": 5.2180130493814185e-05, + "loss": 1.0945, + "step": 11480 + }, + { + "epoch": 0.63, + "learning_rate": 5.210099256564787e-05, + "loss": 1.0948, + "step": 11485 + }, + { + "epoch": 0.63, + "learning_rate": 5.202189354836895e-05, + "loss": 1.1538, + "step": 11490 + }, + { + "epoch": 0.63, + "learning_rate": 5.194283350623384e-05, + "loss": 1.1023, + "step": 11495 + }, + { + "epoch": 0.63, + "learning_rate": 5.186381250346709e-05, + "loss": 1.2009, + "step": 11500 + }, + { + "epoch": 0.63, + "learning_rate": 5.178483060426174e-05, + "loss": 1.1419, + "step": 11505 + }, + { + "epoch": 0.63, + "learning_rate": 5.1705887872778805e-05, + "loss": 1.1852, + "step": 11510 + }, + { + "epoch": 0.63, + "learning_rate": 5.162698437314773e-05, + "loss": 1.0519, + "step": 11515 + }, + { + "epoch": 0.63, + "learning_rate": 5.154812016946591e-05, + "loss": 1.1591, + "step": 11520 + }, + { + "epoch": 0.63, + "learning_rate": 5.1469295325799024e-05, + "loss": 1.1399, + "step": 11525 + }, + { + "epoch": 0.63, + "learning_rate": 5.139050990618047e-05, + "loss": 1.1471, + "step": 11530 + }, + { + "epoch": 0.63, + "learning_rate": 5.131176397461192e-05, + "loss": 1.1197, + "step": 11535 + }, + { + "epoch": 0.63, + "learning_rate": 5.123305759506277e-05, + "loss": 1.083, + "step": 11540 + }, + { + "epoch": 0.63, + "learning_rate": 5.1154390831470443e-05, + "loss": 1.1562, + "step": 11545 + }, + { + "epoch": 0.63, + "learning_rate": 5.107576374774004e-05, + "loss": 1.1215, + "step": 11550 + }, + { + "epoch": 0.63, + "learning_rate": 5.099717640774446e-05, + "loss": 1.1243, + "step": 11555 + }, + { + "epoch": 0.63, + "learning_rate": 5.091862887532439e-05, + "loss": 1.1023, + "step": 11560 + }, + { + "epoch": 0.63, + "learning_rate": 5.084012121428808e-05, + "loss": 1.1031, + "step": 11565 + }, + { + "epoch": 0.63, + "learning_rate": 5.076165348841155e-05, + "loss": 1.1212, + "step": 11570 + }, + { + "epoch": 0.63, + "learning_rate": 5.0683225761438104e-05, + "loss": 1.0882, + "step": 11575 + }, + { + "epoch": 0.63, + "learning_rate": 5.0604838097078844e-05, + "loss": 1.1344, + "step": 11580 + }, + { + "epoch": 0.63, + "learning_rate": 5.0526490559012096e-05, + "loss": 1.113, + "step": 11585 + }, + { + "epoch": 0.63, + "learning_rate": 5.044818321088385e-05, + "loss": 1.1068, + "step": 11590 + }, + { + "epoch": 0.64, + "learning_rate": 5.036991611630713e-05, + "loss": 1.1007, + "step": 11595 + }, + { + "epoch": 0.64, + "learning_rate": 5.029168933886252e-05, + "loss": 1.0702, + "step": 11600 + }, + { + "epoch": 0.64, + "learning_rate": 5.021350294209767e-05, + "loss": 1.1364, + "step": 11605 + }, + { + "epoch": 0.64, + "learning_rate": 5.013535698952759e-05, + "loss": 1.1071, + "step": 11610 + }, + { + "epoch": 0.64, + "learning_rate": 5.005725154463433e-05, + "loss": 1.2017, + "step": 11615 + }, + { + "epoch": 0.64, + "learning_rate": 4.9979186670867034e-05, + "loss": 1.1132, + "step": 11620 + }, + { + "epoch": 0.64, + "learning_rate": 4.99011624316419e-05, + "loss": 1.0277, + "step": 11625 + }, + { + "epoch": 0.64, + "learning_rate": 4.982317889034219e-05, + "loss": 1.1167, + "step": 11630 + }, + { + "epoch": 0.64, + "learning_rate": 4.9745236110318016e-05, + "loss": 1.0663, + "step": 11635 + }, + { + "epoch": 0.64, + "learning_rate": 4.9667334154886396e-05, + "loss": 1.1204, + "step": 11640 + }, + { + "epoch": 0.64, + "learning_rate": 4.9589473087331173e-05, + "loss": 1.1003, + "step": 11645 + }, + { + "epoch": 0.64, + "learning_rate": 4.951165297090304e-05, + "loss": 1.1447, + "step": 11650 + }, + { + "epoch": 0.64, + "learning_rate": 4.943387386881936e-05, + "loss": 1.1249, + "step": 11655 + }, + { + "epoch": 0.64, + "learning_rate": 4.935613584426416e-05, + "loss": 1.115, + "step": 11660 + }, + { + "epoch": 0.64, + "learning_rate": 4.9278438960388205e-05, + "loss": 1.098, + "step": 11665 + }, + { + "epoch": 0.64, + "learning_rate": 4.920078328030869e-05, + "loss": 1.0942, + "step": 11670 + }, + { + "epoch": 0.64, + "learning_rate": 4.912316886710948e-05, + "loss": 1.0794, + "step": 11675 + }, + { + "epoch": 0.64, + "learning_rate": 4.9045595783840843e-05, + "loss": 1.1648, + "step": 11680 + }, + { + "epoch": 0.64, + "learning_rate": 4.896806409351945e-05, + "loss": 1.1045, + "step": 11685 + }, + { + "epoch": 0.64, + "learning_rate": 4.889057385912834e-05, + "loss": 1.1336, + "step": 11690 + }, + { + "epoch": 0.64, + "learning_rate": 4.881312514361702e-05, + "loss": 1.1451, + "step": 11695 + }, + { + "epoch": 0.64, + "learning_rate": 4.873571800990111e-05, + "loss": 1.1223, + "step": 11700 + }, + { + "epoch": 0.64, + "learning_rate": 4.865835252086249e-05, + "loss": 1.1818, + "step": 11705 + }, + { + "epoch": 0.64, + "learning_rate": 4.8581028739349186e-05, + "loss": 1.2081, + "step": 11710 + }, + { + "epoch": 0.64, + "learning_rate": 4.850374672817548e-05, + "loss": 1.1436, + "step": 11715 + }, + { + "epoch": 0.64, + "learning_rate": 4.842650655012156e-05, + "loss": 1.103, + "step": 11720 + }, + { + "epoch": 0.64, + "learning_rate": 4.834930826793371e-05, + "loss": 1.103, + "step": 11725 + }, + { + "epoch": 0.64, + "learning_rate": 4.8272151944324115e-05, + "loss": 1.1512, + "step": 11730 + }, + { + "epoch": 0.64, + "learning_rate": 4.8195037641971e-05, + "loss": 1.0671, + "step": 11735 + }, + { + "epoch": 0.64, + "learning_rate": 4.8117965423518296e-05, + "loss": 1.0607, + "step": 11740 + }, + { + "epoch": 0.64, + "learning_rate": 4.804093535157593e-05, + "loss": 1.1404, + "step": 11745 + }, + { + "epoch": 0.64, + "learning_rate": 4.796394748871937e-05, + "loss": 1.0394, + "step": 11750 + }, + { + "epoch": 0.64, + "learning_rate": 4.7887001897489995e-05, + "loss": 1.1789, + "step": 11755 + }, + { + "epoch": 0.64, + "learning_rate": 4.7810098640394706e-05, + "loss": 1.0647, + "step": 11760 + }, + { + "epoch": 0.64, + "learning_rate": 4.773323777990616e-05, + "loss": 1.1012, + "step": 11765 + }, + { + "epoch": 0.64, + "learning_rate": 4.7656419378462356e-05, + "loss": 1.0892, + "step": 11770 + }, + { + "epoch": 0.64, + "learning_rate": 4.7579643498467e-05, + "loss": 1.163, + "step": 11775 + }, + { + "epoch": 0.65, + "learning_rate": 4.750291020228921e-05, + "loss": 1.0868, + "step": 11780 + }, + { + "epoch": 0.65, + "learning_rate": 4.742621955226347e-05, + "loss": 1.1675, + "step": 11785 + }, + { + "epoch": 0.65, + "learning_rate": 4.7349571610689625e-05, + "loss": 1.1235, + "step": 11790 + }, + { + "epoch": 0.65, + "learning_rate": 4.727296643983279e-05, + "loss": 1.1036, + "step": 11795 + }, + { + "epoch": 0.65, + "learning_rate": 4.719640410192346e-05, + "loss": 1.0805, + "step": 11800 + }, + { + "epoch": 0.65, + "learning_rate": 4.711988465915723e-05, + "loss": 1.0863, + "step": 11805 + }, + { + "epoch": 0.65, + "learning_rate": 4.7043408173694846e-05, + "loss": 1.1648, + "step": 11810 + }, + { + "epoch": 0.65, + "learning_rate": 4.6966974707662195e-05, + "loss": 1.1605, + "step": 11815 + }, + { + "epoch": 0.65, + "learning_rate": 4.689058432315024e-05, + "loss": 1.1116, + "step": 11820 + }, + { + "epoch": 0.65, + "learning_rate": 4.681423708221487e-05, + "loss": 1.1126, + "step": 11825 + }, + { + "epoch": 0.65, + "learning_rate": 4.673793304687707e-05, + "loss": 1.1478, + "step": 11830 + }, + { + "epoch": 0.65, + "learning_rate": 4.666167227912247e-05, + "loss": 1.0948, + "step": 11835 + }, + { + "epoch": 0.65, + "learning_rate": 4.658545484090184e-05, + "loss": 1.1132, + "step": 11840 + }, + { + "epoch": 0.65, + "learning_rate": 4.650928079413055e-05, + "loss": 1.1758, + "step": 11845 + }, + { + "epoch": 0.65, + "learning_rate": 4.6433150200688896e-05, + "loss": 1.1221, + "step": 11850 + }, + { + "epoch": 0.65, + "learning_rate": 4.6357063122421613e-05, + "loss": 1.0648, + "step": 11855 + }, + { + "epoch": 0.65, + "learning_rate": 4.628101962113834e-05, + "loss": 1.109, + "step": 11860 + }, + { + "epoch": 0.65, + "learning_rate": 4.6205019758613185e-05, + "loss": 1.2088, + "step": 11865 + }, + { + "epoch": 0.65, + "learning_rate": 4.612906359658489e-05, + "loss": 1.0691, + "step": 11870 + }, + { + "epoch": 0.65, + "learning_rate": 4.605315119675662e-05, + "loss": 1.1582, + "step": 11875 + }, + { + "epoch": 0.65, + "learning_rate": 4.597728262079599e-05, + "loss": 1.0383, + "step": 11880 + }, + { + "epoch": 0.65, + "learning_rate": 4.590145793033509e-05, + "loss": 1.0629, + "step": 11885 + }, + { + "epoch": 0.65, + "learning_rate": 4.582567718697023e-05, + "loss": 1.1229, + "step": 11890 + }, + { + "epoch": 0.65, + "learning_rate": 4.574994045226226e-05, + "loss": 1.1112, + "step": 11895 + }, + { + "epoch": 0.65, + "learning_rate": 4.567424778773592e-05, + "loss": 1.1251, + "step": 11900 + }, + { + "epoch": 0.65, + "learning_rate": 4.559859925488048e-05, + "loss": 1.1147, + "step": 11905 + }, + { + "epoch": 0.65, + "learning_rate": 4.552299491514917e-05, + "loss": 1.1154, + "step": 11910 + }, + { + "epoch": 0.65, + "learning_rate": 4.5447434829959475e-05, + "loss": 1.122, + "step": 11915 + }, + { + "epoch": 0.65, + "learning_rate": 4.537191906069269e-05, + "loss": 1.2, + "step": 11920 + }, + { + "epoch": 0.65, + "learning_rate": 4.5296447668694366e-05, + "loss": 1.0759, + "step": 11925 + }, + { + "epoch": 0.65, + "learning_rate": 4.5221020715273816e-05, + "loss": 1.1403, + "step": 11930 + }, + { + "epoch": 0.65, + "learning_rate": 4.51456382617044e-05, + "loss": 1.1571, + "step": 11935 + }, + { + "epoch": 0.65, + "learning_rate": 4.5070300369223215e-05, + "loss": 1.1304, + "step": 11940 + }, + { + "epoch": 0.65, + "learning_rate": 4.499500709903121e-05, + "loss": 1.0566, + "step": 11945 + }, + { + "epoch": 0.65, + "learning_rate": 4.4919758512293036e-05, + "loss": 1.0922, + "step": 11950 + }, + { + "epoch": 0.65, + "learning_rate": 4.4844554670137145e-05, + "loss": 1.1554, + "step": 11955 + }, + { + "epoch": 0.66, + "learning_rate": 4.4769395633655554e-05, + "loss": 1.1302, + "step": 11960 + }, + { + "epoch": 0.66, + "learning_rate": 4.469428146390392e-05, + "loss": 1.0698, + "step": 11965 + }, + { + "epoch": 0.66, + "learning_rate": 4.4619212221901376e-05, + "loss": 1.0959, + "step": 11970 + }, + { + "epoch": 0.66, + "learning_rate": 4.454418796863072e-05, + "loss": 1.0745, + "step": 11975 + }, + { + "epoch": 0.66, + "learning_rate": 4.446920876503807e-05, + "loss": 1.1268, + "step": 11980 + }, + { + "epoch": 0.66, + "learning_rate": 4.439427467203294e-05, + "loss": 1.0728, + "step": 11985 + }, + { + "epoch": 0.66, + "learning_rate": 4.4319385750488315e-05, + "loss": 1.1349, + "step": 11990 + }, + { + "epoch": 0.66, + "learning_rate": 4.424454206124032e-05, + "loss": 1.1114, + "step": 11995 + }, + { + "epoch": 0.66, + "learning_rate": 4.4169743665088565e-05, + "loss": 1.0812, + "step": 12000 + }, + { + "epoch": 0.66, + "learning_rate": 4.409499062279558e-05, + "loss": 1.1699, + "step": 12005 + }, + { + "epoch": 0.66, + "learning_rate": 4.402028299508729e-05, + "loss": 1.0348, + "step": 12010 + }, + { + "epoch": 0.66, + "learning_rate": 4.394562084265259e-05, + "loss": 1.121, + "step": 12015 + }, + { + "epoch": 0.66, + "learning_rate": 4.387100422614353e-05, + "loss": 1.1061, + "step": 12020 + }, + { + "epoch": 0.66, + "learning_rate": 4.37964332061751e-05, + "loss": 1.1052, + "step": 12025 + }, + { + "epoch": 0.66, + "learning_rate": 4.372190784332526e-05, + "loss": 1.0799, + "step": 12030 + }, + { + "epoch": 0.66, + "learning_rate": 4.3647428198134845e-05, + "loss": 1.1057, + "step": 12035 + }, + { + "epoch": 0.66, + "learning_rate": 4.357299433110767e-05, + "loss": 1.1581, + "step": 12040 + }, + { + "epoch": 0.66, + "learning_rate": 4.349860630271027e-05, + "loss": 1.0838, + "step": 12045 + }, + { + "epoch": 0.66, + "learning_rate": 4.342426417337194e-05, + "loss": 1.142, + "step": 12050 + }, + { + "epoch": 0.66, + "learning_rate": 4.334996800348468e-05, + "loss": 1.0359, + "step": 12055 + }, + { + "epoch": 0.66, + "learning_rate": 4.3275717853403255e-05, + "loss": 1.0875, + "step": 12060 + }, + { + "epoch": 0.66, + "learning_rate": 4.320151378344493e-05, + "loss": 1.128, + "step": 12065 + }, + { + "epoch": 0.66, + "learning_rate": 4.3127355853889584e-05, + "loss": 1.12, + "step": 12070 + }, + { + "epoch": 0.66, + "learning_rate": 4.305324412497959e-05, + "loss": 1.084, + "step": 12075 + }, + { + "epoch": 0.66, + "learning_rate": 4.297917865691987e-05, + "loss": 1.1244, + "step": 12080 + }, + { + "epoch": 0.66, + "learning_rate": 4.290515950987763e-05, + "loss": 1.0532, + "step": 12085 + }, + { + "epoch": 0.66, + "learning_rate": 4.283118674398262e-05, + "loss": 1.0696, + "step": 12090 + }, + { + "epoch": 0.66, + "learning_rate": 4.27572604193267e-05, + "loss": 1.1579, + "step": 12095 + }, + { + "epoch": 0.66, + "learning_rate": 4.268338059596417e-05, + "loss": 1.0662, + "step": 12100 + }, + { + "epoch": 0.66, + "learning_rate": 4.260954733391156e-05, + "loss": 1.0738, + "step": 12105 + }, + { + "epoch": 0.66, + "learning_rate": 4.253576069314747e-05, + "loss": 1.1181, + "step": 12110 + }, + { + "epoch": 0.66, + "learning_rate": 4.2462020733612656e-05, + "loss": 1.0766, + "step": 12115 + }, + { + "epoch": 0.66, + "learning_rate": 4.2388327515209975e-05, + "loss": 1.1526, + "step": 12120 + }, + { + "epoch": 0.66, + "learning_rate": 4.231468109780436e-05, + "loss": 1.0989, + "step": 12125 + }, + { + "epoch": 0.66, + "learning_rate": 4.224108154122264e-05, + "loss": 1.1137, + "step": 12130 + }, + { + "epoch": 0.66, + "learning_rate": 4.216752890525362e-05, + "loss": 1.0953, + "step": 12135 + }, + { + "epoch": 0.66, + "learning_rate": 4.209402324964793e-05, + "loss": 1.0427, + "step": 12140 + }, + { + "epoch": 0.67, + "learning_rate": 4.202056463411818e-05, + "loss": 1.1042, + "step": 12145 + }, + { + "epoch": 0.67, + "learning_rate": 4.1947153118338596e-05, + "loss": 1.0568, + "step": 12150 + }, + { + "epoch": 0.67, + "learning_rate": 4.187378876194533e-05, + "loss": 1.1007, + "step": 12155 + }, + { + "epoch": 0.67, + "learning_rate": 4.180047162453595e-05, + "loss": 1.1365, + "step": 12160 + }, + { + "epoch": 0.67, + "learning_rate": 4.1727201765669956e-05, + "loss": 1.0907, + "step": 12165 + }, + { + "epoch": 0.67, + "learning_rate": 4.165397924486825e-05, + "loss": 1.0548, + "step": 12170 + }, + { + "epoch": 0.67, + "learning_rate": 4.158080412161344e-05, + "loss": 1.107, + "step": 12175 + }, + { + "epoch": 0.67, + "learning_rate": 4.150767645534939e-05, + "loss": 1.0936, + "step": 12180 + }, + { + "epoch": 0.67, + "learning_rate": 4.143459630548167e-05, + "loss": 1.1255, + "step": 12185 + }, + { + "epoch": 0.67, + "learning_rate": 4.1361563731377055e-05, + "loss": 1.1627, + "step": 12190 + }, + { + "epoch": 0.67, + "learning_rate": 4.128857879236385e-05, + "loss": 1.1539, + "step": 12195 + }, + { + "epoch": 0.67, + "learning_rate": 4.121564154773146e-05, + "loss": 1.0531, + "step": 12200 + }, + { + "epoch": 0.67, + "learning_rate": 4.114275205673069e-05, + "loss": 1.063, + "step": 12205 + }, + { + "epoch": 0.67, + "learning_rate": 4.1069910378573575e-05, + "loss": 1.1276, + "step": 12210 + }, + { + "epoch": 0.67, + "learning_rate": 4.099711657243317e-05, + "loss": 1.1043, + "step": 12215 + }, + { + "epoch": 0.67, + "learning_rate": 4.092437069744382e-05, + "loss": 1.1477, + "step": 12220 + }, + { + "epoch": 0.67, + "learning_rate": 4.085167281270068e-05, + "loss": 1.1241, + "step": 12225 + }, + { + "epoch": 0.67, + "learning_rate": 4.077902297726019e-05, + "loss": 1.1134, + "step": 12230 + }, + { + "epoch": 0.67, + "learning_rate": 4.070642125013956e-05, + "loss": 1.0757, + "step": 12235 + }, + { + "epoch": 0.67, + "learning_rate": 4.063386769031712e-05, + "loss": 1.1748, + "step": 12240 + }, + { + "epoch": 0.67, + "learning_rate": 4.056136235673179e-05, + "loss": 1.0801, + "step": 12245 + }, + { + "epoch": 0.67, + "learning_rate": 4.0488905308283596e-05, + "loss": 1.127, + "step": 12250 + }, + { + "epoch": 0.67, + "learning_rate": 4.041649660383313e-05, + "loss": 1.0326, + "step": 12255 + }, + { + "epoch": 0.67, + "learning_rate": 4.034413630220192e-05, + "loss": 1.1034, + "step": 12260 + }, + { + "epoch": 0.67, + "learning_rate": 4.027182446217188e-05, + "loss": 1.0684, + "step": 12265 + }, + { + "epoch": 0.67, + "learning_rate": 4.0199561142485864e-05, + "loss": 1.0526, + "step": 12270 + }, + { + "epoch": 0.67, + "learning_rate": 4.012734640184709e-05, + "loss": 1.1638, + "step": 12275 + }, + { + "epoch": 0.67, + "learning_rate": 4.005518029891947e-05, + "loss": 1.0238, + "step": 12280 + }, + { + "epoch": 0.67, + "learning_rate": 3.998306289232731e-05, + "loss": 1.221, + "step": 12285 + }, + { + "epoch": 0.67, + "learning_rate": 3.991099424065536e-05, + "loss": 1.1718, + "step": 12290 + }, + { + "epoch": 0.67, + "learning_rate": 3.983897440244875e-05, + "loss": 1.1192, + "step": 12295 + }, + { + "epoch": 0.67, + "learning_rate": 3.976700343621307e-05, + "loss": 1.0926, + "step": 12300 + }, + { + "epoch": 0.67, + "learning_rate": 3.969508140041409e-05, + "loss": 1.118, + "step": 12305 + }, + { + "epoch": 0.67, + "learning_rate": 3.962320835347782e-05, + "loss": 0.9906, + "step": 12310 + }, + { + "epoch": 0.67, + "learning_rate": 3.955138435379061e-05, + "loss": 1.1031, + "step": 12315 + }, + { + "epoch": 0.67, + "learning_rate": 3.94796094596988e-05, + "loss": 1.1357, + "step": 12320 + }, + { + "epoch": 0.68, + "learning_rate": 3.940788372950902e-05, + "loss": 1.1203, + "step": 12325 + }, + { + "epoch": 0.68, + "learning_rate": 3.9336207221487735e-05, + "loss": 1.066, + "step": 12330 + }, + { + "epoch": 0.68, + "learning_rate": 3.926457999386164e-05, + "loss": 1.1502, + "step": 12335 + }, + { + "epoch": 0.68, + "learning_rate": 3.919300210481726e-05, + "loss": 1.062, + "step": 12340 + }, + { + "epoch": 0.68, + "learning_rate": 3.912147361250111e-05, + "loss": 1.1143, + "step": 12345 + }, + { + "epoch": 0.68, + "learning_rate": 3.904999457501957e-05, + "loss": 1.1342, + "step": 12350 + }, + { + "epoch": 0.68, + "learning_rate": 3.89785650504388e-05, + "loss": 1.0738, + "step": 12355 + }, + { + "epoch": 0.68, + "learning_rate": 3.890718509678472e-05, + "loss": 1.0528, + "step": 12360 + }, + { + "epoch": 0.68, + "learning_rate": 3.883585477204313e-05, + "loss": 1.0769, + "step": 12365 + }, + { + "epoch": 0.68, + "learning_rate": 3.876457413415934e-05, + "loss": 1.0464, + "step": 12370 + }, + { + "epoch": 0.68, + "learning_rate": 3.869334324103839e-05, + "loss": 1.1172, + "step": 12375 + }, + { + "epoch": 0.68, + "learning_rate": 3.862216215054485e-05, + "loss": 1.1176, + "step": 12380 + }, + { + "epoch": 0.68, + "learning_rate": 3.855103092050295e-05, + "loss": 1.129, + "step": 12385 + }, + { + "epoch": 0.68, + "learning_rate": 3.8479949608696285e-05, + "loss": 1.1021, + "step": 12390 + }, + { + "epoch": 0.68, + "learning_rate": 3.840891827286795e-05, + "loss": 1.0312, + "step": 12395 + }, + { + "epoch": 0.68, + "learning_rate": 3.83379369707204e-05, + "loss": 1.1089, + "step": 12400 + }, + { + "epoch": 0.68, + "learning_rate": 3.826700575991557e-05, + "loss": 1.1446, + "step": 12405 + }, + { + "epoch": 0.68, + "learning_rate": 3.8196124698074564e-05, + "loss": 1.1468, + "step": 12410 + }, + { + "epoch": 0.68, + "learning_rate": 3.8125293842777874e-05, + "loss": 1.0899, + "step": 12415 + }, + { + "epoch": 0.68, + "learning_rate": 3.805451325156502e-05, + "loss": 1.1116, + "step": 12420 + }, + { + "epoch": 0.68, + "learning_rate": 3.798378298193487e-05, + "loss": 1.0851, + "step": 12425 + }, + { + "epoch": 0.68, + "learning_rate": 3.791310309134537e-05, + "loss": 1.1426, + "step": 12430 + }, + { + "epoch": 0.68, + "learning_rate": 3.78424736372135e-05, + "loss": 1.1121, + "step": 12435 + }, + { + "epoch": 0.68, + "learning_rate": 3.777189467691529e-05, + "loss": 1.0453, + "step": 12440 + }, + { + "epoch": 0.68, + "learning_rate": 3.770136626778569e-05, + "loss": 1.1383, + "step": 12445 + }, + { + "epoch": 0.68, + "learning_rate": 3.7630888467118734e-05, + "loss": 1.0837, + "step": 12450 + }, + { + "epoch": 0.68, + "learning_rate": 3.7560461332167223e-05, + "loss": 1.0824, + "step": 12455 + }, + { + "epoch": 0.68, + "learning_rate": 3.749008492014281e-05, + "loss": 1.084, + "step": 12460 + }, + { + "epoch": 0.68, + "learning_rate": 3.741975928821595e-05, + "loss": 1.116, + "step": 12465 + }, + { + "epoch": 0.68, + "learning_rate": 3.734948449351591e-05, + "loss": 1.0822, + "step": 12470 + }, + { + "epoch": 0.68, + "learning_rate": 3.7279260593130584e-05, + "loss": 1.0547, + "step": 12475 + }, + { + "epoch": 0.68, + "learning_rate": 3.720908764410653e-05, + "loss": 1.1336, + "step": 12480 + }, + { + "epoch": 0.68, + "learning_rate": 3.713896570344891e-05, + "loss": 1.1628, + "step": 12485 + }, + { + "epoch": 0.68, + "learning_rate": 3.7068894828121545e-05, + "loss": 1.136, + "step": 12490 + }, + { + "epoch": 0.68, + "learning_rate": 3.699887507504664e-05, + "loss": 1.0782, + "step": 12495 + }, + { + "epoch": 0.68, + "learning_rate": 3.6928906501105005e-05, + "loss": 1.132, + "step": 12500 + }, + { + "epoch": 0.68, + "learning_rate": 3.6858989163135656e-05, + "loss": 1.0995, + "step": 12505 + }, + { + "epoch": 0.69, + "learning_rate": 3.6789123117936266e-05, + "loss": 1.0418, + "step": 12510 + }, + { + "epoch": 0.69, + "learning_rate": 3.671930842226262e-05, + "loss": 1.1233, + "step": 12515 + }, + { + "epoch": 0.69, + "learning_rate": 3.664954513282898e-05, + "loss": 1.1218, + "step": 12520 + }, + { + "epoch": 0.69, + "learning_rate": 3.657983330630761e-05, + "loss": 1.1092, + "step": 12525 + }, + { + "epoch": 0.69, + "learning_rate": 3.6510172999329144e-05, + "loss": 0.9964, + "step": 12530 + }, + { + "epoch": 0.69, + "learning_rate": 3.6440564268482386e-05, + "loss": 1.1084, + "step": 12535 + }, + { + "epoch": 0.69, + "learning_rate": 3.637100717031411e-05, + "loss": 1.0999, + "step": 12540 + }, + { + "epoch": 0.69, + "learning_rate": 3.6301501761329226e-05, + "loss": 1.1879, + "step": 12545 + }, + { + "epoch": 0.69, + "learning_rate": 3.623204809799061e-05, + "loss": 1.1341, + "step": 12550 + }, + { + "epoch": 0.69, + "learning_rate": 3.6162646236719204e-05, + "loss": 1.1048, + "step": 12555 + }, + { + "epoch": 0.69, + "learning_rate": 3.609329623389372e-05, + "loss": 1.0768, + "step": 12560 + }, + { + "epoch": 0.69, + "learning_rate": 3.602399814585093e-05, + "loss": 1.0301, + "step": 12565 + }, + { + "epoch": 0.69, + "learning_rate": 3.595475202888518e-05, + "loss": 1.1138, + "step": 12570 + }, + { + "epoch": 0.69, + "learning_rate": 3.588555793924885e-05, + "loss": 1.129, + "step": 12575 + }, + { + "epoch": 0.69, + "learning_rate": 3.5816415933151864e-05, + "loss": 1.1351, + "step": 12580 + }, + { + "epoch": 0.69, + "learning_rate": 3.5747326066762005e-05, + "loss": 1.1261, + "step": 12585 + }, + { + "epoch": 0.69, + "learning_rate": 3.567828839620451e-05, + "loss": 1.1093, + "step": 12590 + }, + { + "epoch": 0.69, + "learning_rate": 3.560930297756239e-05, + "loss": 1.1134, + "step": 12595 + }, + { + "epoch": 0.69, + "learning_rate": 3.554036986687609e-05, + "loss": 1.0971, + "step": 12600 + }, + { + "epoch": 0.69, + "learning_rate": 3.547148912014368e-05, + "loss": 1.1343, + "step": 12605 + }, + { + "epoch": 0.69, + "learning_rate": 3.540266079332052e-05, + "loss": 1.075, + "step": 12610 + }, + { + "epoch": 0.69, + "learning_rate": 3.533388494231956e-05, + "loss": 1.0977, + "step": 12615 + }, + { + "epoch": 0.69, + "learning_rate": 3.5265161623011004e-05, + "loss": 1.1513, + "step": 12620 + }, + { + "epoch": 0.69, + "learning_rate": 3.5196490891222467e-05, + "loss": 1.1216, + "step": 12625 + }, + { + "epoch": 0.69, + "learning_rate": 3.512787280273878e-05, + "loss": 1.1924, + "step": 12630 + }, + { + "epoch": 0.69, + "learning_rate": 3.5059307413302026e-05, + "loss": 1.0665, + "step": 12635 + }, + { + "epoch": 0.69, + "learning_rate": 3.499079477861148e-05, + "loss": 1.068, + "step": 12640 + }, + { + "epoch": 0.69, + "learning_rate": 3.4922334954323564e-05, + "loss": 1.143, + "step": 12645 + }, + { + "epoch": 0.69, + "learning_rate": 3.485392799605186e-05, + "loss": 1.1131, + "step": 12650 + }, + { + "epoch": 0.69, + "learning_rate": 3.478557395936686e-05, + "loss": 1.1357, + "step": 12655 + }, + { + "epoch": 0.69, + "learning_rate": 3.471727289979619e-05, + "loss": 1.1969, + "step": 12660 + }, + { + "epoch": 0.69, + "learning_rate": 3.4649024872824374e-05, + "loss": 1.181, + "step": 12665 + }, + { + "epoch": 0.69, + "learning_rate": 3.458082993389296e-05, + "loss": 1.2171, + "step": 12670 + }, + { + "epoch": 0.69, + "learning_rate": 3.451268813840017e-05, + "loss": 1.1365, + "step": 12675 + }, + { + "epoch": 0.69, + "learning_rate": 3.4444599541701256e-05, + "loss": 1.1166, + "step": 12680 + }, + { + "epoch": 0.69, + "learning_rate": 3.437656419910813e-05, + "loss": 1.1471, + "step": 12685 + }, + { + "epoch": 0.7, + "learning_rate": 3.4308582165889556e-05, + "loss": 1.0413, + "step": 12690 + }, + { + "epoch": 0.7, + "learning_rate": 3.4240653497270855e-05, + "loss": 1.0813, + "step": 12695 + }, + { + "epoch": 0.7, + "learning_rate": 3.417277824843409e-05, + "loss": 1.0251, + "step": 12700 + }, + { + "epoch": 0.7, + "learning_rate": 3.410495647451786e-05, + "loss": 1.0604, + "step": 12705 + }, + { + "epoch": 0.7, + "learning_rate": 3.4037188230617426e-05, + "loss": 1.1318, + "step": 12710 + }, + { + "epoch": 0.7, + "learning_rate": 3.396947357178449e-05, + "loss": 1.1136, + "step": 12715 + }, + { + "epoch": 0.7, + "learning_rate": 3.39018125530272e-05, + "loss": 1.0477, + "step": 12720 + }, + { + "epoch": 0.7, + "learning_rate": 3.383420522931017e-05, + "loss": 1.1374, + "step": 12725 + }, + { + "epoch": 0.7, + "learning_rate": 3.376665165555445e-05, + "loss": 1.1639, + "step": 12730 + }, + { + "epoch": 0.7, + "learning_rate": 3.3699151886637325e-05, + "loss": 1.093, + "step": 12735 + }, + { + "epoch": 0.7, + "learning_rate": 3.3631705977392414e-05, + "loss": 1.1244, + "step": 12740 + }, + { + "epoch": 0.7, + "learning_rate": 3.3564313982609544e-05, + "loss": 1.1487, + "step": 12745 + }, + { + "epoch": 0.7, + "learning_rate": 3.3496975957034825e-05, + "loss": 1.1063, + "step": 12750 + }, + { + "epoch": 0.7, + "learning_rate": 3.3429691955370526e-05, + "loss": 1.1197, + "step": 12755 + }, + { + "epoch": 0.7, + "learning_rate": 3.3362462032274924e-05, + "loss": 1.1169, + "step": 12760 + }, + { + "epoch": 0.7, + "learning_rate": 3.329528624236247e-05, + "loss": 1.1236, + "step": 12765 + }, + { + "epoch": 0.7, + "learning_rate": 3.3228164640203524e-05, + "loss": 1.141, + "step": 12770 + }, + { + "epoch": 0.7, + "learning_rate": 3.3161097280324594e-05, + "loss": 1.1073, + "step": 12775 + }, + { + "epoch": 0.7, + "learning_rate": 3.3094084217207994e-05, + "loss": 1.1008, + "step": 12780 + }, + { + "epoch": 0.7, + "learning_rate": 3.3027125505291964e-05, + "loss": 1.133, + "step": 12785 + }, + { + "epoch": 0.7, + "learning_rate": 3.296022119897054e-05, + "loss": 1.1476, + "step": 12790 + }, + { + "epoch": 0.7, + "learning_rate": 3.28933713525937e-05, + "loss": 1.112, + "step": 12795 + }, + { + "epoch": 0.7, + "learning_rate": 3.282657602046706e-05, + "loss": 1.1707, + "step": 12800 + }, + { + "epoch": 0.7, + "learning_rate": 3.2759835256851994e-05, + "loss": 1.0002, + "step": 12805 + }, + { + "epoch": 0.7, + "learning_rate": 3.269314911596549e-05, + "loss": 1.1404, + "step": 12810 + }, + { + "epoch": 0.7, + "learning_rate": 3.262651765198029e-05, + "loss": 1.0785, + "step": 12815 + }, + { + "epoch": 0.7, + "learning_rate": 3.2559940919024584e-05, + "loss": 1.1305, + "step": 12820 + }, + { + "epoch": 0.7, + "learning_rate": 3.2493418971182253e-05, + "loss": 1.2168, + "step": 12825 + }, + { + "epoch": 0.7, + "learning_rate": 3.242695186249245e-05, + "loss": 1.0668, + "step": 12830 + }, + { + "epoch": 0.7, + "learning_rate": 3.236053964695e-05, + "loss": 1.1709, + "step": 12835 + }, + { + "epoch": 0.7, + "learning_rate": 3.2294182378505e-05, + "loss": 1.1051, + "step": 12840 + }, + { + "epoch": 0.7, + "learning_rate": 3.222788011106303e-05, + "loss": 1.1427, + "step": 12845 + }, + { + "epoch": 0.7, + "learning_rate": 3.2161632898484786e-05, + "loss": 1.1116, + "step": 12850 + }, + { + "epoch": 0.7, + "learning_rate": 3.2095440794586457e-05, + "loss": 1.1097, + "step": 12855 + }, + { + "epoch": 0.7, + "learning_rate": 3.202930385313938e-05, + "loss": 1.0516, + "step": 12860 + }, + { + "epoch": 0.7, + "learning_rate": 3.196322212787007e-05, + "loss": 1.1658, + "step": 12865 + }, + { + "epoch": 0.7, + "learning_rate": 3.189719567246018e-05, + "loss": 1.1148, + "step": 12870 + }, + { + "epoch": 0.71, + "learning_rate": 3.183122454054646e-05, + "loss": 1.1051, + "step": 12875 + }, + { + "epoch": 0.71, + "learning_rate": 3.176530878572077e-05, + "loss": 1.0867, + "step": 12880 + }, + { + "epoch": 0.71, + "learning_rate": 3.169944846152996e-05, + "loss": 1.124, + "step": 12885 + }, + { + "epoch": 0.71, + "learning_rate": 3.16336436214758e-05, + "loss": 1.198, + "step": 12890 + }, + { + "epoch": 0.71, + "learning_rate": 3.156789431901503e-05, + "loss": 1.0649, + "step": 12895 + }, + { + "epoch": 0.71, + "learning_rate": 3.1502200607559316e-05, + "loss": 1.1083, + "step": 12900 + }, + { + "epoch": 0.71, + "learning_rate": 3.143656254047509e-05, + "loss": 1.1248, + "step": 12905 + }, + { + "epoch": 0.71, + "learning_rate": 3.1370980171083654e-05, + "loss": 1.0997, + "step": 12910 + }, + { + "epoch": 0.71, + "learning_rate": 3.130545355266094e-05, + "loss": 1.1382, + "step": 12915 + }, + { + "epoch": 0.71, + "learning_rate": 3.123998273843774e-05, + "loss": 1.1247, + "step": 12920 + }, + { + "epoch": 0.71, + "learning_rate": 3.11745677815994e-05, + "loss": 1.219, + "step": 12925 + }, + { + "epoch": 0.71, + "learning_rate": 3.110920873528603e-05, + "loss": 1.0724, + "step": 12930 + }, + { + "epoch": 0.71, + "learning_rate": 3.1043905652592085e-05, + "loss": 1.1091, + "step": 12935 + }, + { + "epoch": 0.71, + "learning_rate": 3.0978658586566806e-05, + "loss": 1.1715, + "step": 12940 + }, + { + "epoch": 0.71, + "learning_rate": 3.091346759021377e-05, + "loss": 1.1078, + "step": 12945 + }, + { + "epoch": 0.71, + "learning_rate": 3.0848332716491084e-05, + "loss": 1.1069, + "step": 12950 + }, + { + "epoch": 0.71, + "learning_rate": 3.078325401831125e-05, + "loss": 1.0679, + "step": 12955 + }, + { + "epoch": 0.71, + "learning_rate": 3.071823154854111e-05, + "loss": 1.1401, + "step": 12960 + }, + { + "epoch": 0.71, + "learning_rate": 3.065326536000181e-05, + "loss": 1.1073, + "step": 12965 + }, + { + "epoch": 0.71, + "learning_rate": 3.058835550546883e-05, + "loss": 1.119, + "step": 12970 + }, + { + "epoch": 0.71, + "learning_rate": 3.052350203767193e-05, + "loss": 1.1249, + "step": 12975 + }, + { + "epoch": 0.71, + "learning_rate": 3.0458705009294886e-05, + "loss": 1.0811, + "step": 12980 + }, + { + "epoch": 0.71, + "learning_rate": 3.0393964472975814e-05, + "loss": 1.1094, + "step": 12985 + }, + { + "epoch": 0.71, + "learning_rate": 3.032928048130681e-05, + "loss": 1.112, + "step": 12990 + }, + { + "epoch": 0.71, + "learning_rate": 3.0264653086834182e-05, + "loss": 1.1141, + "step": 12995 + }, + { + "epoch": 0.71, + "learning_rate": 3.0200082342058043e-05, + "loss": 1.0816, + "step": 13000 + }, + { + "epoch": 0.71, + "learning_rate": 3.01355682994327e-05, + "loss": 1.1427, + "step": 13005 + }, + { + "epoch": 0.71, + "learning_rate": 3.0071111011366258e-05, + "loss": 1.1684, + "step": 13010 + }, + { + "epoch": 0.71, + "learning_rate": 3.0006710530220815e-05, + "loss": 1.1816, + "step": 13015 + }, + { + "epoch": 0.71, + "learning_rate": 2.9942366908312257e-05, + "loss": 1.1468, + "step": 13020 + }, + { + "epoch": 0.71, + "learning_rate": 2.987808019791029e-05, + "loss": 1.1237, + "step": 13025 + }, + { + "epoch": 0.71, + "learning_rate": 2.9813850451238364e-05, + "loss": 1.1578, + "step": 13030 + }, + { + "epoch": 0.71, + "learning_rate": 2.9749677720473768e-05, + "loss": 1.0485, + "step": 13035 + }, + { + "epoch": 0.71, + "learning_rate": 2.9685562057747327e-05, + "loss": 1.1203, + "step": 13040 + }, + { + "epoch": 0.71, + "learning_rate": 2.962150351514361e-05, + "loss": 1.2114, + "step": 13045 + }, + { + "epoch": 0.71, + "learning_rate": 2.9557502144700686e-05, + "loss": 1.1761, + "step": 13050 + }, + { + "epoch": 0.72, + "learning_rate": 2.949355799841031e-05, + "loss": 1.0751, + "step": 13055 + }, + { + "epoch": 0.72, + "learning_rate": 2.942967112821765e-05, + "loss": 1.1175, + "step": 13060 + }, + { + "epoch": 0.72, + "learning_rate": 2.9365841586021382e-05, + "loss": 1.1024, + "step": 13065 + }, + { + "epoch": 0.72, + "learning_rate": 2.930206942367357e-05, + "loss": 1.0135, + "step": 13070 + }, + { + "epoch": 0.72, + "learning_rate": 2.923835469297975e-05, + "loss": 1.1041, + "step": 13075 + }, + { + "epoch": 0.72, + "learning_rate": 2.917469744569879e-05, + "loss": 1.1339, + "step": 13080 + }, + { + "epoch": 0.72, + "learning_rate": 2.911109773354272e-05, + "loss": 1.1029, + "step": 13085 + }, + { + "epoch": 0.72, + "learning_rate": 2.904755560817702e-05, + "loss": 1.1495, + "step": 13090 + }, + { + "epoch": 0.72, + "learning_rate": 2.898407112122024e-05, + "loss": 1.2055, + "step": 13095 + }, + { + "epoch": 0.72, + "learning_rate": 2.892064432424425e-05, + "loss": 1.0567, + "step": 13100 + }, + { + "epoch": 0.72, + "learning_rate": 2.8857275268773956e-05, + "loss": 1.0984, + "step": 13105 + }, + { + "epoch": 0.72, + "learning_rate": 2.8793964006287357e-05, + "loss": 1.1613, + "step": 13110 + }, + { + "epoch": 0.72, + "learning_rate": 2.87307105882155e-05, + "loss": 1.0997, + "step": 13115 + }, + { + "epoch": 0.72, + "learning_rate": 2.8667515065942552e-05, + "loss": 1.0995, + "step": 13120 + }, + { + "epoch": 0.72, + "learning_rate": 2.8604377490805517e-05, + "loss": 1.1035, + "step": 13125 + }, + { + "epoch": 0.72, + "learning_rate": 2.8541297914094368e-05, + "loss": 1.1854, + "step": 13130 + }, + { + "epoch": 0.72, + "learning_rate": 2.8478276387051948e-05, + "loss": 1.1477, + "step": 13135 + }, + { + "epoch": 0.72, + "learning_rate": 2.8415312960874014e-05, + "loss": 1.1755, + "step": 13140 + }, + { + "epoch": 0.72, + "learning_rate": 2.835240768670906e-05, + "loss": 1.1125, + "step": 13145 + }, + { + "epoch": 0.72, + "learning_rate": 2.8289560615658364e-05, + "loss": 1.1011, + "step": 13150 + }, + { + "epoch": 0.72, + "learning_rate": 2.8226771798775853e-05, + "loss": 1.1177, + "step": 13155 + }, + { + "epoch": 0.72, + "learning_rate": 2.8164041287068264e-05, + "loss": 1.1192, + "step": 13160 + }, + { + "epoch": 0.72, + "learning_rate": 2.810136913149484e-05, + "loss": 1.1319, + "step": 13165 + }, + { + "epoch": 0.72, + "learning_rate": 2.8038755382967577e-05, + "loss": 1.0785, + "step": 13170 + }, + { + "epoch": 0.72, + "learning_rate": 2.7976200092350784e-05, + "loss": 1.0592, + "step": 13175 + }, + { + "epoch": 0.72, + "learning_rate": 2.7913703310461507e-05, + "loss": 1.1038, + "step": 13180 + }, + { + "epoch": 0.72, + "learning_rate": 2.785126508806913e-05, + "loss": 1.1802, + "step": 13185 + }, + { + "epoch": 0.72, + "learning_rate": 2.7788885475895558e-05, + "loss": 1.0879, + "step": 13190 + }, + { + "epoch": 0.72, + "learning_rate": 2.772656452461501e-05, + "loss": 1.1564, + "step": 13195 + }, + { + "epoch": 0.72, + "learning_rate": 2.766430228485405e-05, + "loss": 1.1016, + "step": 13200 + }, + { + "epoch": 0.72, + "learning_rate": 2.7602098807191637e-05, + "loss": 1.1713, + "step": 13205 + }, + { + "epoch": 0.72, + "learning_rate": 2.75399541421589e-05, + "loss": 1.0543, + "step": 13210 + }, + { + "epoch": 0.72, + "learning_rate": 2.747786834023924e-05, + "loss": 1.1137, + "step": 13215 + }, + { + "epoch": 0.72, + "learning_rate": 2.7415841451868165e-05, + "loss": 1.0905, + "step": 13220 + }, + { + "epoch": 0.72, + "learning_rate": 2.7353873527433476e-05, + "loss": 1.0709, + "step": 13225 + }, + { + "epoch": 0.72, + "learning_rate": 2.7291964617274895e-05, + "loss": 1.0793, + "step": 13230 + }, + { + "epoch": 0.72, + "learning_rate": 2.7230114771684407e-05, + "loss": 1.1697, + "step": 13235 + }, + { + "epoch": 0.73, + "learning_rate": 2.716832404090575e-05, + "loss": 1.0943, + "step": 13240 + }, + { + "epoch": 0.73, + "learning_rate": 2.7106592475134916e-05, + "loss": 1.0944, + "step": 13245 + }, + { + "epoch": 0.73, + "learning_rate": 2.7044920124519638e-05, + "loss": 1.1024, + "step": 13250 + }, + { + "epoch": 0.73, + "learning_rate": 2.6983307039159674e-05, + "loss": 1.1247, + "step": 13255 + }, + { + "epoch": 0.73, + "learning_rate": 2.6921753269106496e-05, + "loss": 1.1211, + "step": 13260 + }, + { + "epoch": 0.73, + "learning_rate": 2.686025886436356e-05, + "loss": 1.181, + "step": 13265 + }, + { + "epoch": 0.73, + "learning_rate": 2.679882387488593e-05, + "loss": 1.0776, + "step": 13270 + }, + { + "epoch": 0.73, + "learning_rate": 2.6737448350580598e-05, + "loss": 1.081, + "step": 13275 + }, + { + "epoch": 0.73, + "learning_rate": 2.6676132341306005e-05, + "loss": 1.0723, + "step": 13280 + }, + { + "epoch": 0.73, + "learning_rate": 2.661487589687246e-05, + "loss": 1.0979, + "step": 13285 + }, + { + "epoch": 0.73, + "learning_rate": 2.6553679067041762e-05, + "loss": 1.1002, + "step": 13290 + }, + { + "epoch": 0.73, + "learning_rate": 2.6492541901527327e-05, + "loss": 1.1407, + "step": 13295 + }, + { + "epoch": 0.73, + "learning_rate": 2.643146444999417e-05, + "loss": 1.0677, + "step": 13300 + }, + { + "epoch": 0.73, + "learning_rate": 2.6370446762058602e-05, + "loss": 1.1042, + "step": 13305 + }, + { + "epoch": 0.73, + "learning_rate": 2.630948888728859e-05, + "loss": 1.0832, + "step": 13310 + }, + { + "epoch": 0.73, + "learning_rate": 2.6248590875203353e-05, + "loss": 1.1264, + "step": 13315 + }, + { + "epoch": 0.73, + "learning_rate": 2.618775277527368e-05, + "loss": 1.1957, + "step": 13320 + }, + { + "epoch": 0.73, + "learning_rate": 2.6126974636921422e-05, + "loss": 1.0779, + "step": 13325 + }, + { + "epoch": 0.73, + "learning_rate": 2.6066256509519965e-05, + "loss": 1.0938, + "step": 13330 + }, + { + "epoch": 0.73, + "learning_rate": 2.600559844239377e-05, + "loss": 1.1281, + "step": 13335 + }, + { + "epoch": 0.73, + "learning_rate": 2.5945000484818674e-05, + "loss": 1.136, + "step": 13340 + }, + { + "epoch": 0.73, + "learning_rate": 2.588446268602148e-05, + "loss": 1.111, + "step": 13345 + }, + { + "epoch": 0.73, + "learning_rate": 2.582398509518031e-05, + "loss": 1.1041, + "step": 13350 + }, + { + "epoch": 0.73, + "learning_rate": 2.576356776142424e-05, + "loss": 1.0408, + "step": 13355 + }, + { + "epoch": 0.73, + "learning_rate": 2.5703210733833508e-05, + "loss": 1.1333, + "step": 13360 + }, + { + "epoch": 0.73, + "learning_rate": 2.56429140614393e-05, + "loss": 1.1083, + "step": 13365 + }, + { + "epoch": 0.73, + "learning_rate": 2.558267779322377e-05, + "loss": 1.1371, + "step": 13370 + }, + { + "epoch": 0.73, + "learning_rate": 2.5522501978119967e-05, + "loss": 1.1701, + "step": 13375 + }, + { + "epoch": 0.73, + "learning_rate": 2.5462386665011944e-05, + "loss": 1.1124, + "step": 13380 + }, + { + "epoch": 0.73, + "learning_rate": 2.540233190273452e-05, + "loss": 1.145, + "step": 13385 + }, + { + "epoch": 0.73, + "learning_rate": 2.5342337740073327e-05, + "loss": 1.0931, + "step": 13390 + }, + { + "epoch": 0.73, + "learning_rate": 2.5282404225764765e-05, + "loss": 1.1021, + "step": 13395 + }, + { + "epoch": 0.73, + "learning_rate": 2.5222531408496007e-05, + "loss": 1.0982, + "step": 13400 + }, + { + "epoch": 0.73, + "learning_rate": 2.516271933690497e-05, + "loss": 1.0976, + "step": 13405 + }, + { + "epoch": 0.73, + "learning_rate": 2.510296805958001e-05, + "loss": 1.0351, + "step": 13410 + }, + { + "epoch": 0.73, + "learning_rate": 2.5043277625060342e-05, + "loss": 1.0522, + "step": 13415 + }, + { + "epoch": 0.74, + "learning_rate": 2.498364808183559e-05, + "loss": 1.149, + "step": 13420 + }, + { + "epoch": 0.74, + "learning_rate": 2.492407947834603e-05, + "loss": 1.0945, + "step": 13425 + }, + { + "epoch": 0.74, + "learning_rate": 2.4864571862982354e-05, + "loss": 1.1975, + "step": 13430 + }, + { + "epoch": 0.74, + "learning_rate": 2.480512528408573e-05, + "loss": 1.0145, + "step": 13435 + }, + { + "epoch": 0.74, + "learning_rate": 2.4745739789947707e-05, + "loss": 1.0774, + "step": 13440 + }, + { + "epoch": 0.74, + "learning_rate": 2.4686415428810324e-05, + "loss": 1.0966, + "step": 13445 + }, + { + "epoch": 0.74, + "learning_rate": 2.462715224886584e-05, + "loss": 1.1288, + "step": 13450 + }, + { + "epoch": 0.74, + "learning_rate": 2.456795029825688e-05, + "loss": 1.1367, + "step": 13455 + }, + { + "epoch": 0.74, + "learning_rate": 2.450880962507626e-05, + "loss": 1.1045, + "step": 13460 + }, + { + "epoch": 0.74, + "learning_rate": 2.4449730277367138e-05, + "loss": 1.098, + "step": 13465 + }, + { + "epoch": 0.74, + "learning_rate": 2.4390712303122747e-05, + "loss": 1.1466, + "step": 13470 + }, + { + "epoch": 0.74, + "learning_rate": 2.4331755750286497e-05, + "loss": 1.1213, + "step": 13475 + }, + { + "epoch": 0.74, + "learning_rate": 2.4272860666751874e-05, + "loss": 1.107, + "step": 13480 + }, + { + "epoch": 0.74, + "learning_rate": 2.4214027100362504e-05, + "loss": 1.0435, + "step": 13485 + }, + { + "epoch": 0.74, + "learning_rate": 2.4155255098911936e-05, + "loss": 1.1201, + "step": 13490 + }, + { + "epoch": 0.74, + "learning_rate": 2.4096544710143864e-05, + "loss": 1.182, + "step": 13495 + }, + { + "epoch": 0.74, + "learning_rate": 2.403789598175171e-05, + "loss": 1.1396, + "step": 13500 + }, + { + "epoch": 0.74, + "learning_rate": 2.3979308961379008e-05, + "loss": 1.1197, + "step": 13505 + }, + { + "epoch": 0.74, + "learning_rate": 2.392078369661903e-05, + "loss": 1.0763, + "step": 13510 + }, + { + "epoch": 0.74, + "learning_rate": 2.386232023501497e-05, + "loss": 1.0941, + "step": 13515 + }, + { + "epoch": 0.74, + "learning_rate": 2.3803918624059778e-05, + "loss": 1.083, + "step": 13520 + }, + { + "epoch": 0.74, + "learning_rate": 2.3745578911196125e-05, + "loss": 1.1338, + "step": 13525 + }, + { + "epoch": 0.74, + "learning_rate": 2.3687301143816476e-05, + "loss": 1.1166, + "step": 13530 + }, + { + "epoch": 0.74, + "learning_rate": 2.3629085369262915e-05, + "loss": 1.1572, + "step": 13535 + }, + { + "epoch": 0.74, + "learning_rate": 2.3570931634827172e-05, + "loss": 1.0818, + "step": 13540 + }, + { + "epoch": 0.74, + "learning_rate": 2.351283998775057e-05, + "loss": 1.0831, + "step": 13545 + }, + { + "epoch": 0.74, + "learning_rate": 2.3454810475224076e-05, + "loss": 1.046, + "step": 13550 + }, + { + "epoch": 0.74, + "learning_rate": 2.3396843144388093e-05, + "loss": 1.0966, + "step": 13555 + }, + { + "epoch": 0.74, + "learning_rate": 2.3338938042332523e-05, + "loss": 1.1668, + "step": 13560 + }, + { + "epoch": 0.74, + "learning_rate": 2.3281095216096715e-05, + "loss": 1.1433, + "step": 13565 + }, + { + "epoch": 0.74, + "learning_rate": 2.32233147126695e-05, + "loss": 1.2075, + "step": 13570 + }, + { + "epoch": 0.74, + "learning_rate": 2.316559657898896e-05, + "loss": 1.0886, + "step": 13575 + }, + { + "epoch": 0.74, + "learning_rate": 2.3107940861942667e-05, + "loss": 1.1546, + "step": 13580 + }, + { + "epoch": 0.74, + "learning_rate": 2.30503476083673e-05, + "loss": 1.0657, + "step": 13585 + }, + { + "epoch": 0.74, + "learning_rate": 2.2992816865048957e-05, + "loss": 1.0969, + "step": 13590 + }, + { + "epoch": 0.74, + "learning_rate": 2.293534867872283e-05, + "loss": 1.077, + "step": 13595 + }, + { + "epoch": 0.74, + "learning_rate": 2.287794309607346e-05, + "loss": 1.1192, + "step": 13600 + }, + { + "epoch": 0.75, + "learning_rate": 2.2820600163734308e-05, + "loss": 1.0944, + "step": 13605 + }, + { + "epoch": 0.75, + "learning_rate": 2.2763319928288128e-05, + "loss": 1.1207, + "step": 13610 + }, + { + "epoch": 0.75, + "learning_rate": 2.270610243626664e-05, + "loss": 1.1778, + "step": 13615 + }, + { + "epoch": 0.75, + "learning_rate": 2.2648947734150673e-05, + "loss": 1.0632, + "step": 13620 + }, + { + "epoch": 0.75, + "learning_rate": 2.259185586836997e-05, + "loss": 1.1142, + "step": 13625 + }, + { + "epoch": 0.75, + "learning_rate": 2.253482688530324e-05, + "loss": 1.1039, + "step": 13630 + }, + { + "epoch": 0.75, + "learning_rate": 2.247786083127818e-05, + "loss": 1.1981, + "step": 13635 + }, + { + "epoch": 0.75, + "learning_rate": 2.2420957752571277e-05, + "loss": 1.0875, + "step": 13640 + }, + { + "epoch": 0.75, + "learning_rate": 2.2364117695407966e-05, + "loss": 1.1427, + "step": 13645 + }, + { + "epoch": 0.75, + "learning_rate": 2.230734070596232e-05, + "loss": 1.1005, + "step": 13650 + }, + { + "epoch": 0.75, + "learning_rate": 2.225062683035735e-05, + "loss": 1.1464, + "step": 13655 + }, + { + "epoch": 0.75, + "learning_rate": 2.219397611466468e-05, + "loss": 1.1583, + "step": 13660 + }, + { + "epoch": 0.75, + "learning_rate": 2.2137388604904757e-05, + "loss": 1.0324, + "step": 13665 + }, + { + "epoch": 0.75, + "learning_rate": 2.208086434704649e-05, + "loss": 1.1044, + "step": 13670 + }, + { + "epoch": 0.75, + "learning_rate": 2.202440338700757e-05, + "loss": 1.1663, + "step": 13675 + }, + { + "epoch": 0.75, + "learning_rate": 2.1968005770654178e-05, + "loss": 1.0342, + "step": 13680 + }, + { + "epoch": 0.75, + "learning_rate": 2.1911671543801118e-05, + "loss": 1.1859, + "step": 13685 + }, + { + "epoch": 0.75, + "learning_rate": 2.185540075221162e-05, + "loss": 1.1405, + "step": 13690 + }, + { + "epoch": 0.75, + "learning_rate": 2.1799193441597412e-05, + "loss": 1.1567, + "step": 13695 + }, + { + "epoch": 0.75, + "learning_rate": 2.174304965761864e-05, + "loss": 1.0746, + "step": 13700 + }, + { + "epoch": 0.75, + "learning_rate": 2.1686969445883893e-05, + "loss": 1.103, + "step": 13705 + }, + { + "epoch": 0.75, + "learning_rate": 2.163095285195006e-05, + "loss": 1.0605, + "step": 13710 + }, + { + "epoch": 0.75, + "learning_rate": 2.1574999921322382e-05, + "loss": 1.0985, + "step": 13715 + }, + { + "epoch": 0.75, + "learning_rate": 2.151911069945434e-05, + "loss": 1.0536, + "step": 13720 + }, + { + "epoch": 0.75, + "learning_rate": 2.146328523174774e-05, + "loss": 1.0509, + "step": 13725 + }, + { + "epoch": 0.75, + "learning_rate": 2.1407523563552542e-05, + "loss": 1.1405, + "step": 13730 + }, + { + "epoch": 0.75, + "learning_rate": 2.135182574016684e-05, + "loss": 1.139, + "step": 13735 + }, + { + "epoch": 0.75, + "learning_rate": 2.129619180683697e-05, + "loss": 1.158, + "step": 13740 + }, + { + "epoch": 0.75, + "learning_rate": 2.1240621808757265e-05, + "loss": 1.1466, + "step": 13745 + }, + { + "epoch": 0.75, + "learning_rate": 2.118511579107024e-05, + "loss": 1.1264, + "step": 13750 + }, + { + "epoch": 0.75, + "learning_rate": 2.1129673798866227e-05, + "loss": 1.1122, + "step": 13755 + }, + { + "epoch": 0.75, + "learning_rate": 2.1074295877183792e-05, + "loss": 1.1042, + "step": 13760 + }, + { + "epoch": 0.75, + "learning_rate": 2.1018982071009263e-05, + "loss": 1.0984, + "step": 13765 + }, + { + "epoch": 0.75, + "learning_rate": 2.0963732425277016e-05, + "loss": 1.1177, + "step": 13770 + }, + { + "epoch": 0.75, + "learning_rate": 2.0908546984869227e-05, + "loss": 1.0712, + "step": 13775 + }, + { + "epoch": 0.75, + "learning_rate": 2.085342579461593e-05, + "loss": 1.0738, + "step": 13780 + }, + { + "epoch": 0.76, + "learning_rate": 2.0798368899294965e-05, + "loss": 1.1011, + "step": 13785 + }, + { + "epoch": 0.76, + "learning_rate": 2.0743376343631973e-05, + "loss": 1.0885, + "step": 13790 + }, + { + "epoch": 0.76, + "learning_rate": 2.068844817230029e-05, + "loss": 1.1315, + "step": 13795 + }, + { + "epoch": 0.76, + "learning_rate": 2.063358442992097e-05, + "loss": 1.1061, + "step": 13800 + }, + { + "epoch": 0.76, + "learning_rate": 2.0578785161062694e-05, + "loss": 1.1283, + "step": 13805 + }, + { + "epoch": 0.76, + "learning_rate": 2.0524050410241843e-05, + "loss": 1.0953, + "step": 13810 + }, + { + "epoch": 0.76, + "learning_rate": 2.0469380221922317e-05, + "loss": 1.1648, + "step": 13815 + }, + { + "epoch": 0.76, + "learning_rate": 2.0414774640515587e-05, + "loss": 1.0729, + "step": 13820 + }, + { + "epoch": 0.76, + "learning_rate": 2.036023371038061e-05, + "loss": 1.1419, + "step": 13825 + }, + { + "epoch": 0.76, + "learning_rate": 2.030575747582393e-05, + "loss": 1.1052, + "step": 13830 + }, + { + "epoch": 0.76, + "learning_rate": 2.0251345981099388e-05, + "loss": 1.0747, + "step": 13835 + }, + { + "epoch": 0.76, + "learning_rate": 2.0196999270408374e-05, + "loss": 1.1237, + "step": 13840 + }, + { + "epoch": 0.76, + "learning_rate": 2.0142717387899564e-05, + "loss": 1.1346, + "step": 13845 + }, + { + "epoch": 0.76, + "learning_rate": 2.0088500377668944e-05, + "loss": 1.0706, + "step": 13850 + }, + { + "epoch": 0.76, + "learning_rate": 2.003434828375992e-05, + "loss": 1.1789, + "step": 13855 + }, + { + "epoch": 0.76, + "learning_rate": 1.998026115016306e-05, + "loss": 1.0862, + "step": 13860 + }, + { + "epoch": 0.76, + "learning_rate": 1.9926239020816193e-05, + "loss": 1.1025, + "step": 13865 + }, + { + "epoch": 0.76, + "learning_rate": 1.9872281939604308e-05, + "loss": 1.0944, + "step": 13870 + }, + { + "epoch": 0.76, + "learning_rate": 1.981838995035964e-05, + "loss": 1.1507, + "step": 13875 + }, + { + "epoch": 0.76, + "learning_rate": 1.9764563096861463e-05, + "loss": 1.1189, + "step": 13880 + }, + { + "epoch": 0.76, + "learning_rate": 1.9710801422836175e-05, + "loss": 1.0884, + "step": 13885 + }, + { + "epoch": 0.76, + "learning_rate": 1.965710497195719e-05, + "loss": 1.1377, + "step": 13890 + }, + { + "epoch": 0.76, + "learning_rate": 1.9603473787844994e-05, + "loss": 1.1438, + "step": 13895 + }, + { + "epoch": 0.76, + "learning_rate": 1.9549907914067013e-05, + "loss": 1.1214, + "step": 13900 + }, + { + "epoch": 0.76, + "learning_rate": 1.9496407394137676e-05, + "loss": 1.1477, + "step": 13905 + }, + { + "epoch": 0.76, + "learning_rate": 1.944297227151817e-05, + "loss": 1.0481, + "step": 13910 + }, + { + "epoch": 0.76, + "learning_rate": 1.9389602589616738e-05, + "loss": 1.184, + "step": 13915 + }, + { + "epoch": 0.76, + "learning_rate": 1.933629839178835e-05, + "loss": 1.1532, + "step": 13920 + }, + { + "epoch": 0.76, + "learning_rate": 1.9283059721334877e-05, + "loss": 1.0982, + "step": 13925 + }, + { + "epoch": 0.76, + "learning_rate": 1.9229886621504803e-05, + "loss": 1.1322, + "step": 13930 + }, + { + "epoch": 0.76, + "learning_rate": 1.917677913549351e-05, + "loss": 1.156, + "step": 13935 + }, + { + "epoch": 0.76, + "learning_rate": 1.9123737306442957e-05, + "loss": 1.1052, + "step": 13940 + }, + { + "epoch": 0.76, + "learning_rate": 1.9070761177441863e-05, + "loss": 1.0726, + "step": 13945 + }, + { + "epoch": 0.76, + "learning_rate": 1.901785079152551e-05, + "loss": 1.1467, + "step": 13950 + }, + { + "epoch": 0.76, + "learning_rate": 1.8965006191675762e-05, + "loss": 1.1082, + "step": 13955 + }, + { + "epoch": 0.76, + "learning_rate": 1.891222742082113e-05, + "loss": 1.1113, + "step": 13960 + }, + { + "epoch": 0.76, + "learning_rate": 1.8859514521836554e-05, + "loss": 1.121, + "step": 13965 + }, + { + "epoch": 0.77, + "learning_rate": 1.8806867537543508e-05, + "loss": 1.0504, + "step": 13970 + }, + { + "epoch": 0.77, + "learning_rate": 1.875428651070989e-05, + "loss": 1.0968, + "step": 13975 + }, + { + "epoch": 0.77, + "learning_rate": 1.8701771484050075e-05, + "loss": 1.0976, + "step": 13980 + }, + { + "epoch": 0.77, + "learning_rate": 1.8649322500224738e-05, + "loss": 1.1125, + "step": 13985 + }, + { + "epoch": 0.77, + "learning_rate": 1.8596939601841044e-05, + "loss": 1.1515, + "step": 13990 + }, + { + "epoch": 0.77, + "learning_rate": 1.8544622831452263e-05, + "loss": 1.1904, + "step": 13995 + }, + { + "epoch": 0.77, + "learning_rate": 1.849237223155814e-05, + "loss": 1.1123, + "step": 14000 + }, + { + "epoch": 0.77, + "learning_rate": 1.8440187844604552e-05, + "loss": 1.1667, + "step": 14005 + }, + { + "epoch": 0.77, + "learning_rate": 1.838806971298369e-05, + "loss": 1.1128, + "step": 14010 + }, + { + "epoch": 0.77, + "learning_rate": 1.8336017879033766e-05, + "loss": 1.1282, + "step": 14015 + }, + { + "epoch": 0.77, + "learning_rate": 1.8284032385039298e-05, + "loss": 1.1173, + "step": 14020 + }, + { + "epoch": 0.77, + "learning_rate": 1.823211327323079e-05, + "loss": 1.0989, + "step": 14025 + }, + { + "epoch": 0.77, + "learning_rate": 1.818026058578493e-05, + "loss": 1.0901, + "step": 14030 + }, + { + "epoch": 0.77, + "learning_rate": 1.8128474364824348e-05, + "loss": 1.1538, + "step": 14035 + }, + { + "epoch": 0.77, + "learning_rate": 1.807675465241774e-05, + "loss": 1.1019, + "step": 14040 + }, + { + "epoch": 0.77, + "learning_rate": 1.8025101490579708e-05, + "loss": 1.0927, + "step": 14045 + }, + { + "epoch": 0.77, + "learning_rate": 1.7973514921270904e-05, + "loss": 1.1338, + "step": 14050 + }, + { + "epoch": 0.77, + "learning_rate": 1.7921994986397788e-05, + "loss": 1.0442, + "step": 14055 + }, + { + "epoch": 0.77, + "learning_rate": 1.787054172781269e-05, + "loss": 1.1012, + "step": 14060 + }, + { + "epoch": 0.77, + "learning_rate": 1.7819155187313846e-05, + "loss": 1.1339, + "step": 14065 + }, + { + "epoch": 0.77, + "learning_rate": 1.776783540664521e-05, + "loss": 1.1106, + "step": 14070 + }, + { + "epoch": 0.77, + "learning_rate": 1.7716582427496617e-05, + "loss": 1.184, + "step": 14075 + }, + { + "epoch": 0.77, + "learning_rate": 1.7665396291503452e-05, + "loss": 1.156, + "step": 14080 + }, + { + "epoch": 0.77, + "learning_rate": 1.7614277040247003e-05, + "loss": 1.1387, + "step": 14085 + }, + { + "epoch": 0.77, + "learning_rate": 1.756322471525406e-05, + "loss": 1.1431, + "step": 14090 + }, + { + "epoch": 0.77, + "learning_rate": 1.751223935799719e-05, + "loss": 1.1367, + "step": 14095 + }, + { + "epoch": 0.77, + "learning_rate": 1.7461321009894452e-05, + "loss": 1.187, + "step": 14100 + }, + { + "epoch": 0.77, + "learning_rate": 1.7410469712309488e-05, + "loss": 1.0791, + "step": 14105 + }, + { + "epoch": 0.77, + "learning_rate": 1.7359685506551474e-05, + "loss": 1.0798, + "step": 14110 + }, + { + "epoch": 0.77, + "learning_rate": 1.730896843387515e-05, + "loss": 1.1235, + "step": 14115 + }, + { + "epoch": 0.77, + "learning_rate": 1.7258318535480632e-05, + "loss": 1.076, + "step": 14120 + }, + { + "epoch": 0.77, + "learning_rate": 1.7207735852513505e-05, + "loss": 1.0844, + "step": 14125 + }, + { + "epoch": 0.77, + "learning_rate": 1.7157220426064732e-05, + "loss": 1.1181, + "step": 14130 + }, + { + "epoch": 0.77, + "learning_rate": 1.7106772297170688e-05, + "loss": 1.1591, + "step": 14135 + }, + { + "epoch": 0.77, + "learning_rate": 1.7056391506813026e-05, + "loss": 1.1628, + "step": 14140 + }, + { + "epoch": 0.77, + "learning_rate": 1.700607809591873e-05, + "loss": 1.0763, + "step": 14145 + }, + { + "epoch": 0.78, + "learning_rate": 1.695583210535999e-05, + "loss": 1.1178, + "step": 14150 + }, + { + "epoch": 0.78, + "learning_rate": 1.6905653575954345e-05, + "loss": 1.1503, + "step": 14155 + }, + { + "epoch": 0.78, + "learning_rate": 1.6855542548464414e-05, + "loss": 1.096, + "step": 14160 + }, + { + "epoch": 0.78, + "learning_rate": 1.6805499063598006e-05, + "loss": 1.0913, + "step": 14165 + }, + { + "epoch": 0.78, + "learning_rate": 1.6755523162008128e-05, + "loss": 1.1116, + "step": 14170 + }, + { + "epoch": 0.78, + "learning_rate": 1.670561488429279e-05, + "loss": 1.1686, + "step": 14175 + }, + { + "epoch": 0.78, + "learning_rate": 1.665577427099517e-05, + "loss": 1.0464, + "step": 14180 + }, + { + "epoch": 0.78, + "learning_rate": 1.660600136260338e-05, + "loss": 1.1262, + "step": 14185 + }, + { + "epoch": 0.78, + "learning_rate": 1.6556296199550593e-05, + "loss": 1.1405, + "step": 14190 + }, + { + "epoch": 0.78, + "learning_rate": 1.6506658822214906e-05, + "loss": 1.0876, + "step": 14195 + }, + { + "epoch": 0.78, + "learning_rate": 1.6457089270919424e-05, + "loss": 1.0455, + "step": 14200 + }, + { + "epoch": 0.78, + "learning_rate": 1.6407587585932083e-05, + "loss": 1.0927, + "step": 14205 + }, + { + "epoch": 0.78, + "learning_rate": 1.6358153807465704e-05, + "loss": 1.1686, + "step": 14210 + }, + { + "epoch": 0.78, + "learning_rate": 1.6308787975677943e-05, + "loss": 1.1128, + "step": 14215 + }, + { + "epoch": 0.78, + "learning_rate": 1.6259490130671283e-05, + "loss": 1.1565, + "step": 14220 + }, + { + "epoch": 0.78, + "learning_rate": 1.6210260312492962e-05, + "loss": 1.1049, + "step": 14225 + }, + { + "epoch": 0.78, + "learning_rate": 1.616109856113494e-05, + "loss": 1.1708, + "step": 14230 + }, + { + "epoch": 0.78, + "learning_rate": 1.611200491653389e-05, + "loss": 1.1054, + "step": 14235 + }, + { + "epoch": 0.78, + "learning_rate": 1.606297941857121e-05, + "loss": 1.1213, + "step": 14240 + }, + { + "epoch": 0.78, + "learning_rate": 1.601402210707283e-05, + "loss": 1.1641, + "step": 14245 + }, + { + "epoch": 0.78, + "learning_rate": 1.596513302180945e-05, + "loss": 1.0863, + "step": 14250 + }, + { + "epoch": 0.78, + "learning_rate": 1.591631220249613e-05, + "loss": 1.0899, + "step": 14255 + }, + { + "epoch": 0.78, + "learning_rate": 1.5867559688792686e-05, + "loss": 1.156, + "step": 14260 + }, + { + "epoch": 0.78, + "learning_rate": 1.5818875520303297e-05, + "loss": 1.0327, + "step": 14265 + }, + { + "epoch": 0.78, + "learning_rate": 1.5770259736576746e-05, + "loss": 1.0896, + "step": 14270 + }, + { + "epoch": 0.78, + "learning_rate": 1.5721712377106107e-05, + "loss": 1.0675, + "step": 14275 + }, + { + "epoch": 0.78, + "learning_rate": 1.5673233481329e-05, + "loss": 1.0433, + "step": 14280 + }, + { + "epoch": 0.78, + "learning_rate": 1.5624823088627418e-05, + "loss": 1.1567, + "step": 14285 + }, + { + "epoch": 0.78, + "learning_rate": 1.5576481238327648e-05, + "loss": 1.1344, + "step": 14290 + }, + { + "epoch": 0.78, + "learning_rate": 1.5528207969700317e-05, + "loss": 1.1423, + "step": 14295 + }, + { + "epoch": 0.78, + "learning_rate": 1.5480003321960316e-05, + "loss": 1.0844, + "step": 14300 + }, + { + "epoch": 0.78, + "learning_rate": 1.5431867334266886e-05, + "loss": 1.1175, + "step": 14305 + }, + { + "epoch": 0.78, + "learning_rate": 1.5383800045723364e-05, + "loss": 1.094, + "step": 14310 + }, + { + "epoch": 0.78, + "learning_rate": 1.5335801495377417e-05, + "loss": 1.1264, + "step": 14315 + }, + { + "epoch": 0.78, + "learning_rate": 1.5287871722220698e-05, + "loss": 1.1052, + "step": 14320 + }, + { + "epoch": 0.78, + "learning_rate": 1.5240010765189155e-05, + "loss": 1.1341, + "step": 14325 + }, + { + "epoch": 0.78, + "learning_rate": 1.5192218663162729e-05, + "loss": 1.0309, + "step": 14330 + }, + { + "epoch": 0.79, + "learning_rate": 1.5144495454965528e-05, + "loss": 1.1309, + "step": 14335 + }, + { + "epoch": 0.79, + "learning_rate": 1.509684117936553e-05, + "loss": 1.1607, + "step": 14340 + }, + { + "epoch": 0.79, + "learning_rate": 1.5049255875074885e-05, + "loss": 1.1157, + "step": 14345 + }, + { + "epoch": 0.79, + "learning_rate": 1.5001739580749606e-05, + "loss": 1.2165, + "step": 14350 + }, + { + "epoch": 0.79, + "learning_rate": 1.4954292334989738e-05, + "loss": 1.1538, + "step": 14355 + }, + { + "epoch": 0.79, + "learning_rate": 1.4906914176339105e-05, + "loss": 1.06, + "step": 14360 + }, + { + "epoch": 0.79, + "learning_rate": 1.4859605143285526e-05, + "loss": 1.1644, + "step": 14365 + }, + { + "epoch": 0.79, + "learning_rate": 1.4812365274260596e-05, + "loss": 1.0295, + "step": 14370 + }, + { + "epoch": 0.79, + "learning_rate": 1.4765194607639782e-05, + "loss": 1.0812, + "step": 14375 + }, + { + "epoch": 0.79, + "learning_rate": 1.4718093181742291e-05, + "loss": 1.0472, + "step": 14380 + }, + { + "epoch": 0.79, + "learning_rate": 1.4671061034831063e-05, + "loss": 1.0802, + "step": 14385 + }, + { + "epoch": 0.79, + "learning_rate": 1.462409820511284e-05, + "loss": 1.1038, + "step": 14390 + }, + { + "epoch": 0.79, + "learning_rate": 1.4577204730737952e-05, + "loss": 1.1691, + "step": 14395 + }, + { + "epoch": 0.79, + "learning_rate": 1.4530380649800507e-05, + "loss": 1.129, + "step": 14400 + }, + { + "epoch": 0.79, + "learning_rate": 1.4483626000338084e-05, + "loss": 1.1195, + "step": 14405 + }, + { + "epoch": 0.79, + "learning_rate": 1.4436940820332006e-05, + "loss": 1.0641, + "step": 14410 + }, + { + "epoch": 0.79, + "learning_rate": 1.4390325147707078e-05, + "loss": 1.1007, + "step": 14415 + }, + { + "epoch": 0.79, + "learning_rate": 1.4343779020331729e-05, + "loss": 1.1644, + "step": 14420 + }, + { + "epoch": 0.79, + "learning_rate": 1.4297302476017729e-05, + "loss": 1.1147, + "step": 14425 + }, + { + "epoch": 0.79, + "learning_rate": 1.4250895552520505e-05, + "loss": 1.0384, + "step": 14430 + }, + { + "epoch": 0.79, + "learning_rate": 1.4204558287538793e-05, + "loss": 1.13, + "step": 14435 + }, + { + "epoch": 0.79, + "learning_rate": 1.4158290718714839e-05, + "loss": 1.0701, + "step": 14440 + }, + { + "epoch": 0.79, + "learning_rate": 1.4112092883634211e-05, + "loss": 1.0848, + "step": 14445 + }, + { + "epoch": 0.79, + "learning_rate": 1.4065964819825853e-05, + "loss": 1.0651, + "step": 14450 + }, + { + "epoch": 0.79, + "learning_rate": 1.4019906564761966e-05, + "loss": 1.0654, + "step": 14455 + }, + { + "epoch": 0.79, + "learning_rate": 1.3973918155858178e-05, + "loss": 1.1258, + "step": 14460 + }, + { + "epoch": 0.79, + "learning_rate": 1.3927999630473253e-05, + "loss": 1.0602, + "step": 14465 + }, + { + "epoch": 0.79, + "learning_rate": 1.3882151025909237e-05, + "loss": 1.1346, + "step": 14470 + }, + { + "epoch": 0.79, + "learning_rate": 1.383637237941135e-05, + "loss": 1.1683, + "step": 14475 + }, + { + "epoch": 0.79, + "learning_rate": 1.3790663728168021e-05, + "loss": 1.0821, + "step": 14480 + }, + { + "epoch": 0.79, + "learning_rate": 1.37450251093108e-05, + "loss": 1.069, + "step": 14485 + }, + { + "epoch": 0.79, + "learning_rate": 1.3699456559914309e-05, + "loss": 1.0604, + "step": 14490 + }, + { + "epoch": 0.79, + "learning_rate": 1.365395811699633e-05, + "loss": 1.1048, + "step": 14495 + }, + { + "epoch": 0.79, + "learning_rate": 1.3608529817517584e-05, + "loss": 1.121, + "step": 14500 + }, + { + "epoch": 0.79, + "learning_rate": 1.3563171698381941e-05, + "loss": 1.0915, + "step": 14505 + }, + { + "epoch": 0.79, + "learning_rate": 1.3517883796436152e-05, + "loss": 1.1266, + "step": 14510 + }, + { + "epoch": 0.8, + "learning_rate": 1.3472666148469976e-05, + "loss": 1.1148, + "step": 14515 + }, + { + "epoch": 0.8, + "learning_rate": 1.3427518791216048e-05, + "loss": 1.1668, + "step": 14520 + }, + { + "epoch": 0.8, + "learning_rate": 1.3382441761350023e-05, + "loss": 1.0674, + "step": 14525 + }, + { + "epoch": 0.8, + "learning_rate": 1.3337435095490296e-05, + "loss": 1.1686, + "step": 14530 + }, + { + "epoch": 0.8, + "learning_rate": 1.3292498830198169e-05, + "loss": 1.1115, + "step": 14535 + }, + { + "epoch": 0.8, + "learning_rate": 1.3247633001977705e-05, + "loss": 1.1087, + "step": 14540 + }, + { + "epoch": 0.8, + "learning_rate": 1.3202837647275842e-05, + "loss": 1.1455, + "step": 14545 + }, + { + "epoch": 0.8, + "learning_rate": 1.3158112802482167e-05, + "loss": 1.1759, + "step": 14550 + }, + { + "epoch": 0.8, + "learning_rate": 1.3113458503929029e-05, + "loss": 1.1278, + "step": 14555 + }, + { + "epoch": 0.8, + "learning_rate": 1.3068874787891457e-05, + "loss": 1.0648, + "step": 14560 + }, + { + "epoch": 0.8, + "learning_rate": 1.302436169058719e-05, + "loss": 1.1783, + "step": 14565 + }, + { + "epoch": 0.8, + "learning_rate": 1.2979919248176514e-05, + "loss": 1.0736, + "step": 14570 + }, + { + "epoch": 0.8, + "learning_rate": 1.2935547496762438e-05, + "loss": 1.1644, + "step": 14575 + }, + { + "epoch": 0.8, + "learning_rate": 1.2891246472390373e-05, + "loss": 1.0614, + "step": 14580 + }, + { + "epoch": 0.8, + "learning_rate": 1.284701621104847e-05, + "loss": 1.1576, + "step": 14585 + }, + { + "epoch": 0.8, + "learning_rate": 1.2802856748667237e-05, + "loss": 1.1394, + "step": 14590 + }, + { + "epoch": 0.8, + "learning_rate": 1.2758768121119792e-05, + "loss": 1.0738, + "step": 14595 + }, + { + "epoch": 0.8, + "learning_rate": 1.2714750364221584e-05, + "loss": 1.0264, + "step": 14600 + }, + { + "epoch": 0.8, + "learning_rate": 1.2670803513730577e-05, + "loss": 1.0791, + "step": 14605 + }, + { + "epoch": 0.8, + "learning_rate": 1.2626927605347147e-05, + "loss": 1.0826, + "step": 14610 + }, + { + "epoch": 0.8, + "learning_rate": 1.2583122674713974e-05, + "loss": 1.1423, + "step": 14615 + }, + { + "epoch": 0.8, + "learning_rate": 1.2539388757416136e-05, + "loss": 1.0908, + "step": 14620 + }, + { + "epoch": 0.8, + "learning_rate": 1.2495725888980948e-05, + "loss": 1.1082, + "step": 14625 + }, + { + "epoch": 0.8, + "learning_rate": 1.2452134104878124e-05, + "loss": 1.1416, + "step": 14630 + }, + { + "epoch": 0.8, + "learning_rate": 1.2408613440519524e-05, + "loss": 1.1877, + "step": 14635 + }, + { + "epoch": 0.8, + "learning_rate": 1.2365163931259293e-05, + "loss": 1.1972, + "step": 14640 + }, + { + "epoch": 0.8, + "learning_rate": 1.2321785612393721e-05, + "loss": 1.2007, + "step": 14645 + }, + { + "epoch": 0.8, + "learning_rate": 1.2278478519161363e-05, + "loss": 1.1861, + "step": 14650 + }, + { + "epoch": 0.8, + "learning_rate": 1.2235242686742796e-05, + "loss": 1.1312, + "step": 14655 + }, + { + "epoch": 0.8, + "learning_rate": 1.2192078150260844e-05, + "loss": 1.1126, + "step": 14660 + }, + { + "epoch": 0.8, + "learning_rate": 1.2148984944780229e-05, + "loss": 1.152, + "step": 14665 + }, + { + "epoch": 0.8, + "learning_rate": 1.2105963105307906e-05, + "loss": 1.0607, + "step": 14670 + }, + { + "epoch": 0.8, + "learning_rate": 1.206301266679274e-05, + "loss": 1.1657, + "step": 14675 + }, + { + "epoch": 0.8, + "learning_rate": 1.2020133664125698e-05, + "loss": 1.1544, + "step": 14680 + }, + { + "epoch": 0.8, + "learning_rate": 1.1977326132139577e-05, + "loss": 1.0904, + "step": 14685 + }, + { + "epoch": 0.8, + "learning_rate": 1.1934590105609232e-05, + "loss": 1.0232, + "step": 14690 + }, + { + "epoch": 0.8, + "learning_rate": 1.1891925619251365e-05, + "loss": 1.1031, + "step": 14695 + }, + { + "epoch": 0.81, + "learning_rate": 1.1849332707724626e-05, + "loss": 1.1519, + "step": 14700 + }, + { + "epoch": 0.81, + "learning_rate": 1.180681140562946e-05, + "loss": 1.0821, + "step": 14705 + }, + { + "epoch": 0.81, + "learning_rate": 1.1764361747508125e-05, + "loss": 1.1709, + "step": 14710 + }, + { + "epoch": 0.81, + "learning_rate": 1.172198376784478e-05, + "loss": 1.0843, + "step": 14715 + }, + { + "epoch": 0.81, + "learning_rate": 1.1679677501065233e-05, + "loss": 1.1104, + "step": 14720 + }, + { + "epoch": 0.81, + "learning_rate": 1.1637442981537173e-05, + "loss": 1.1851, + "step": 14725 + }, + { + "epoch": 0.81, + "learning_rate": 1.1595280243569817e-05, + "loss": 1.149, + "step": 14730 + }, + { + "epoch": 0.81, + "learning_rate": 1.155318932141426e-05, + "loss": 1.1588, + "step": 14735 + }, + { + "epoch": 0.81, + "learning_rate": 1.1511170249263138e-05, + "loss": 1.0147, + "step": 14740 + }, + { + "epoch": 0.81, + "learning_rate": 1.1469223061250799e-05, + "loss": 1.1911, + "step": 14745 + }, + { + "epoch": 0.81, + "learning_rate": 1.142734779145308e-05, + "loss": 1.1319, + "step": 14750 + }, + { + "epoch": 0.81, + "learning_rate": 1.1385544473887543e-05, + "loss": 1.076, + "step": 14755 + }, + { + "epoch": 0.81, + "learning_rate": 1.134381314251317e-05, + "loss": 1.0629, + "step": 14760 + }, + { + "epoch": 0.81, + "learning_rate": 1.1302153831230565e-05, + "loss": 1.0782, + "step": 14765 + }, + { + "epoch": 0.81, + "learning_rate": 1.126056657388177e-05, + "loss": 1.0539, + "step": 14770 + }, + { + "epoch": 0.81, + "learning_rate": 1.121905140425029e-05, + "loss": 1.1337, + "step": 14775 + }, + { + "epoch": 0.81, + "learning_rate": 1.1177608356061075e-05, + "loss": 1.129, + "step": 14780 + }, + { + "epoch": 0.81, + "learning_rate": 1.1136237462980521e-05, + "loss": 1.0913, + "step": 14785 + }, + { + "epoch": 0.81, + "learning_rate": 1.1094938758616386e-05, + "loss": 1.0456, + "step": 14790 + }, + { + "epoch": 0.81, + "learning_rate": 1.1053712276517785e-05, + "loss": 1.1827, + "step": 14795 + }, + { + "epoch": 0.81, + "learning_rate": 1.101255805017512e-05, + "loss": 1.1398, + "step": 14800 + }, + { + "epoch": 0.81, + "learning_rate": 1.0971476113020192e-05, + "loss": 1.1424, + "step": 14805 + }, + { + "epoch": 0.81, + "learning_rate": 1.0930466498426e-05, + "loss": 1.1412, + "step": 14810 + }, + { + "epoch": 0.81, + "learning_rate": 1.0889529239706811e-05, + "loss": 1.177, + "step": 14815 + }, + { + "epoch": 0.81, + "learning_rate": 1.0848664370118102e-05, + "loss": 1.1508, + "step": 14820 + }, + { + "epoch": 0.81, + "learning_rate": 1.0807871922856573e-05, + "loss": 1.151, + "step": 14825 + }, + { + "epoch": 0.81, + "learning_rate": 1.0767151931060129e-05, + "loss": 1.1165, + "step": 14830 + }, + { + "epoch": 0.81, + "learning_rate": 1.0726504427807686e-05, + "loss": 1.047, + "step": 14835 + }, + { + "epoch": 0.81, + "learning_rate": 1.0685929446119395e-05, + "loss": 1.1675, + "step": 14840 + }, + { + "epoch": 0.81, + "learning_rate": 1.0645427018956423e-05, + "loss": 1.1338, + "step": 14845 + }, + { + "epoch": 0.81, + "learning_rate": 1.0604997179221066e-05, + "loss": 1.1181, + "step": 14850 + }, + { + "epoch": 0.81, + "learning_rate": 1.0564639959756595e-05, + "loss": 1.1002, + "step": 14855 + }, + { + "epoch": 0.81, + "learning_rate": 1.0524355393347297e-05, + "loss": 1.132, + "step": 14860 + }, + { + "epoch": 0.81, + "learning_rate": 1.0484143512718438e-05, + "loss": 1.1145, + "step": 14865 + }, + { + "epoch": 0.81, + "learning_rate": 1.0444004350536286e-05, + "loss": 1.0696, + "step": 14870 + }, + { + "epoch": 0.81, + "learning_rate": 1.0403937939407958e-05, + "loss": 1.0861, + "step": 14875 + }, + { + "epoch": 0.82, + "learning_rate": 1.0363944311881546e-05, + "loss": 1.0521, + "step": 14880 + }, + { + "epoch": 0.82, + "learning_rate": 1.0324023500445933e-05, + "loss": 1.188, + "step": 14885 + }, + { + "epoch": 0.82, + "learning_rate": 1.0284175537530948e-05, + "loss": 1.0528, + "step": 14890 + }, + { + "epoch": 0.82, + "learning_rate": 1.0244400455507161e-05, + "loss": 1.1431, + "step": 14895 + }, + { + "epoch": 0.82, + "learning_rate": 1.0204698286685984e-05, + "loss": 1.0853, + "step": 14900 + }, + { + "epoch": 0.82, + "learning_rate": 1.016506906331954e-05, + "loss": 1.1475, + "step": 14905 + }, + { + "epoch": 0.82, + "learning_rate": 1.0125512817600798e-05, + "loss": 1.177, + "step": 14910 + }, + { + "epoch": 0.82, + "learning_rate": 1.0086029581663333e-05, + "loss": 1.1124, + "step": 14915 + }, + { + "epoch": 0.82, + "learning_rate": 1.0046619387581497e-05, + "loss": 1.1108, + "step": 14920 + }, + { + "epoch": 0.82, + "learning_rate": 1.0007282267370218e-05, + "loss": 1.0483, + "step": 14925 + }, + { + "epoch": 0.82, + "learning_rate": 9.96801825298513e-06, + "loss": 1.1628, + "step": 14930 + }, + { + "epoch": 0.82, + "learning_rate": 9.9288273763225e-06, + "loss": 1.1352, + "step": 14935 + }, + { + "epoch": 0.82, + "learning_rate": 9.88970966921909e-06, + "loss": 1.0802, + "step": 14940 + }, + { + "epoch": 0.82, + "learning_rate": 9.850665163452308e-06, + "loss": 1.119, + "step": 14945 + }, + { + "epoch": 0.82, + "learning_rate": 9.811693890740034e-06, + "loss": 1.156, + "step": 14950 + }, + { + "epoch": 0.82, + "learning_rate": 9.772795882740712e-06, + "loss": 1.163, + "step": 14955 + }, + { + "epoch": 0.82, + "learning_rate": 9.733971171053233e-06, + "loss": 1.1857, + "step": 14960 + }, + { + "epoch": 0.82, + "learning_rate": 9.695219787216966e-06, + "loss": 1.0886, + "step": 14965 + }, + { + "epoch": 0.82, + "learning_rate": 9.656541762711666e-06, + "loss": 1.1179, + "step": 14970 + }, + { + "epoch": 0.82, + "learning_rate": 9.617937128957576e-06, + "loss": 1.0918, + "step": 14975 + }, + { + "epoch": 0.82, + "learning_rate": 9.57940591731523e-06, + "loss": 1.1592, + "step": 14980 + }, + { + "epoch": 0.82, + "learning_rate": 9.540948159085627e-06, + "loss": 1.1356, + "step": 14985 + }, + { + "epoch": 0.82, + "learning_rate": 9.502563885509961e-06, + "loss": 1.1696, + "step": 14990 + }, + { + "epoch": 0.82, + "learning_rate": 9.464253127769838e-06, + "loss": 1.1137, + "step": 14995 + }, + { + "epoch": 0.82, + "learning_rate": 9.426015916987071e-06, + "loss": 1.1584, + "step": 15000 + }, + { + "epoch": 0.82, + "learning_rate": 9.387852284223826e-06, + "loss": 1.1233, + "step": 15005 + }, + { + "epoch": 0.82, + "learning_rate": 9.34976226048236e-06, + "loss": 1.0998, + "step": 15010 + }, + { + "epoch": 0.82, + "learning_rate": 9.311745876705242e-06, + "loss": 1.1126, + "step": 15015 + }, + { + "epoch": 0.82, + "learning_rate": 9.27380316377518e-06, + "loss": 1.0879, + "step": 15020 + }, + { + "epoch": 0.82, + "learning_rate": 9.235934152515064e-06, + "loss": 1.1092, + "step": 15025 + }, + { + "epoch": 0.82, + "learning_rate": 9.198138873687829e-06, + "loss": 1.1679, + "step": 15030 + }, + { + "epoch": 0.82, + "learning_rate": 9.160417357996594e-06, + "loss": 1.1006, + "step": 15035 + }, + { + "epoch": 0.82, + "learning_rate": 9.122769636084569e-06, + "loss": 1.0688, + "step": 15040 + }, + { + "epoch": 0.82, + "learning_rate": 9.085195738534952e-06, + "loss": 1.1219, + "step": 15045 + }, + { + "epoch": 0.82, + "learning_rate": 9.047695695871012e-06, + "loss": 1.1509, + "step": 15050 + }, + { + "epoch": 0.82, + "learning_rate": 9.010269538555977e-06, + "loss": 1.1096, + "step": 15055 + }, + { + "epoch": 0.82, + "learning_rate": 8.972917296993138e-06, + "loss": 1.0901, + "step": 15060 + }, + { + "epoch": 0.83, + "learning_rate": 8.935639001525636e-06, + "loss": 1.0876, + "step": 15065 + }, + { + "epoch": 0.83, + "learning_rate": 8.898434682436659e-06, + "loss": 1.1571, + "step": 15070 + }, + { + "epoch": 0.83, + "learning_rate": 8.861304369949158e-06, + "loss": 1.1578, + "step": 15075 + }, + { + "epoch": 0.83, + "learning_rate": 8.8242480942261e-06, + "loss": 1.0897, + "step": 15080 + }, + { + "epoch": 0.83, + "learning_rate": 8.787265885370216e-06, + "loss": 1.1255, + "step": 15085 + }, + { + "epoch": 0.83, + "learning_rate": 8.750357773424152e-06, + "loss": 1.1242, + "step": 15090 + }, + { + "epoch": 0.83, + "learning_rate": 8.713523788370244e-06, + "loss": 1.0924, + "step": 15095 + }, + { + "epoch": 0.83, + "learning_rate": 8.676763960130736e-06, + "loss": 1.0899, + "step": 15100 + }, + { + "epoch": 0.83, + "learning_rate": 8.64007831856752e-06, + "loss": 1.1433, + "step": 15105 + }, + { + "epoch": 0.83, + "learning_rate": 8.603466893482332e-06, + "loss": 1.1505, + "step": 15110 + }, + { + "epoch": 0.83, + "learning_rate": 8.566929714616527e-06, + "loss": 1.0991, + "step": 15115 + }, + { + "epoch": 0.83, + "learning_rate": 8.530466811651198e-06, + "loss": 1.1318, + "step": 15120 + }, + { + "epoch": 0.83, + "learning_rate": 8.494078214207062e-06, + "loss": 1.1226, + "step": 15125 + }, + { + "epoch": 0.83, + "learning_rate": 8.457763951844514e-06, + "loss": 1.1172, + "step": 15130 + }, + { + "epoch": 0.83, + "learning_rate": 8.421524054063524e-06, + "loss": 1.0945, + "step": 15135 + }, + { + "epoch": 0.83, + "learning_rate": 8.385358550303693e-06, + "loss": 1.0909, + "step": 15140 + }, + { + "epoch": 0.83, + "learning_rate": 8.349267469944135e-06, + "loss": 1.1227, + "step": 15145 + }, + { + "epoch": 0.83, + "learning_rate": 8.313250842303544e-06, + "loss": 1.1964, + "step": 15150 + }, + { + "epoch": 0.83, + "learning_rate": 8.277308696640186e-06, + "loss": 1.1688, + "step": 15155 + }, + { + "epoch": 0.83, + "learning_rate": 8.241441062151657e-06, + "loss": 1.0771, + "step": 15160 + }, + { + "epoch": 0.83, + "learning_rate": 8.205647967975205e-06, + "loss": 1.1096, + "step": 15165 + }, + { + "epoch": 0.83, + "learning_rate": 8.169929443187396e-06, + "loss": 1.1335, + "step": 15170 + }, + { + "epoch": 0.83, + "learning_rate": 8.13428551680431e-06, + "loss": 1.1183, + "step": 15175 + }, + { + "epoch": 0.83, + "learning_rate": 8.09871621778137e-06, + "loss": 1.1953, + "step": 15180 + }, + { + "epoch": 0.83, + "learning_rate": 8.063221575013382e-06, + "loss": 1.0554, + "step": 15185 + }, + { + "epoch": 0.83, + "learning_rate": 8.027801617334511e-06, + "loss": 1.0197, + "step": 15190 + }, + { + "epoch": 0.83, + "learning_rate": 7.992456373518278e-06, + "loss": 1.044, + "step": 15195 + }, + { + "epoch": 0.83, + "learning_rate": 7.957185872277487e-06, + "loss": 1.069, + "step": 15200 + }, + { + "epoch": 0.83, + "learning_rate": 7.921990142264202e-06, + "loss": 1.0493, + "step": 15205 + }, + { + "epoch": 0.83, + "learning_rate": 7.886869212069771e-06, + "loss": 1.1269, + "step": 15210 + }, + { + "epoch": 0.83, + "learning_rate": 7.851823110224788e-06, + "loss": 1.0462, + "step": 15215 + }, + { + "epoch": 0.83, + "learning_rate": 7.81685186519906e-06, + "loss": 1.0593, + "step": 15220 + }, + { + "epoch": 0.83, + "learning_rate": 7.78195550540155e-06, + "loss": 1.1268, + "step": 15225 + }, + { + "epoch": 0.83, + "learning_rate": 7.747134059180393e-06, + "loss": 1.0949, + "step": 15230 + }, + { + "epoch": 0.83, + "learning_rate": 7.71238755482293e-06, + "loss": 1.0858, + "step": 15235 + }, + { + "epoch": 0.83, + "learning_rate": 7.677716020555556e-06, + "loss": 1.1019, + "step": 15240 + }, + { + "epoch": 0.84, + "learning_rate": 7.643119484543788e-06, + "loss": 1.0684, + "step": 15245 + }, + { + "epoch": 0.84, + "learning_rate": 7.608597974892178e-06, + "loss": 1.0584, + "step": 15250 + }, + { + "epoch": 0.84, + "learning_rate": 7.574151519644415e-06, + "loss": 1.1031, + "step": 15255 + }, + { + "epoch": 0.84, + "learning_rate": 7.539780146783173e-06, + "loss": 1.0976, + "step": 15260 + }, + { + "epoch": 0.84, + "learning_rate": 7.505483884230105e-06, + "loss": 1.0825, + "step": 15265 + }, + { + "epoch": 0.84, + "learning_rate": 7.471262759845878e-06, + "loss": 1.0622, + "step": 15270 + }, + { + "epoch": 0.84, + "learning_rate": 7.4371168014301e-06, + "loss": 1.1313, + "step": 15275 + }, + { + "epoch": 0.84, + "learning_rate": 7.403046036721362e-06, + "loss": 1.1014, + "step": 15280 + }, + { + "epoch": 0.84, + "learning_rate": 7.369050493397123e-06, + "loss": 1.1541, + "step": 15285 + }, + { + "epoch": 0.84, + "learning_rate": 7.335130199073759e-06, + "loss": 1.0662, + "step": 15290 + }, + { + "epoch": 0.84, + "learning_rate": 7.301285181306505e-06, + "loss": 1.1495, + "step": 15295 + }, + { + "epoch": 0.84, + "learning_rate": 7.267515467589458e-06, + "loss": 1.2118, + "step": 15300 + }, + { + "epoch": 0.84, + "learning_rate": 7.233821085355541e-06, + "loss": 1.0682, + "step": 15305 + }, + { + "epoch": 0.84, + "learning_rate": 7.200202061976469e-06, + "loss": 1.0419, + "step": 15310 + }, + { + "epoch": 0.84, + "learning_rate": 7.16665842476274e-06, + "loss": 1.1222, + "step": 15315 + }, + { + "epoch": 0.84, + "learning_rate": 7.133190200963624e-06, + "loss": 1.087, + "step": 15320 + }, + { + "epoch": 0.84, + "learning_rate": 7.0997974177671175e-06, + "loss": 1.1532, + "step": 15325 + }, + { + "epoch": 0.84, + "learning_rate": 7.066480102299978e-06, + "loss": 1.2219, + "step": 15330 + }, + { + "epoch": 0.84, + "learning_rate": 7.0332382816275524e-06, + "loss": 1.0025, + "step": 15335 + }, + { + "epoch": 0.84, + "learning_rate": 7.000071982753964e-06, + "loss": 1.1892, + "step": 15340 + }, + { + "epoch": 0.84, + "learning_rate": 6.966981232621916e-06, + "loss": 1.0898, + "step": 15345 + }, + { + "epoch": 0.84, + "learning_rate": 6.9339660581128175e-06, + "loss": 1.0582, + "step": 15350 + }, + { + "epoch": 0.84, + "learning_rate": 6.901026486046568e-06, + "loss": 1.0936, + "step": 15355 + }, + { + "epoch": 0.84, + "learning_rate": 6.868162543181776e-06, + "loss": 1.0732, + "step": 15360 + }, + { + "epoch": 0.84, + "learning_rate": 6.8353742562155085e-06, + "loss": 1.1495, + "step": 15365 + }, + { + "epoch": 0.84, + "learning_rate": 6.802661651783457e-06, + "loss": 1.1067, + "step": 15370 + }, + { + "epoch": 0.84, + "learning_rate": 6.770024756459777e-06, + "loss": 1.1011, + "step": 15375 + }, + { + "epoch": 0.84, + "learning_rate": 6.737463596757115e-06, + "loss": 1.1788, + "step": 15380 + }, + { + "epoch": 0.84, + "learning_rate": 6.70497819912666e-06, + "loss": 1.0994, + "step": 15385 + }, + { + "epoch": 0.84, + "learning_rate": 6.672568589957978e-06, + "loss": 1.0815, + "step": 15390 + }, + { + "epoch": 0.84, + "learning_rate": 6.6402347955791525e-06, + "loss": 1.1746, + "step": 15395 + }, + { + "epoch": 0.84, + "learning_rate": 6.607976842256551e-06, + "loss": 1.0857, + "step": 15400 + }, + { + "epoch": 0.84, + "learning_rate": 6.575794756195067e-06, + "loss": 1.1396, + "step": 15405 + }, + { + "epoch": 0.84, + "learning_rate": 6.543688563537878e-06, + "loss": 1.099, + "step": 15410 + }, + { + "epoch": 0.84, + "learning_rate": 6.5116582903665645e-06, + "loss": 1.0743, + "step": 15415 + }, + { + "epoch": 0.84, + "learning_rate": 6.479703962700958e-06, + "loss": 1.081, + "step": 15420 + }, + { + "epoch": 0.84, + "learning_rate": 6.447825606499293e-06, + "loss": 1.1219, + "step": 15425 + }, + { + "epoch": 0.85, + "learning_rate": 6.416023247657989e-06, + "loss": 1.1589, + "step": 15430 + }, + { + "epoch": 0.85, + "learning_rate": 6.384296912011835e-06, + "loss": 1.0749, + "step": 15435 + }, + { + "epoch": 0.85, + "learning_rate": 6.352646625333748e-06, + "loss": 1.0635, + "step": 15440 + }, + { + "epoch": 0.85, + "learning_rate": 6.321072413334962e-06, + "loss": 1.0475, + "step": 15445 + }, + { + "epoch": 0.85, + "learning_rate": 6.289574301664848e-06, + "loss": 1.0372, + "step": 15450 + }, + { + "epoch": 0.85, + "learning_rate": 6.258152315911004e-06, + "loss": 1.1169, + "step": 15455 + }, + { + "epoch": 0.85, + "learning_rate": 6.226806481599146e-06, + "loss": 1.1027, + "step": 15460 + }, + { + "epoch": 0.85, + "learning_rate": 6.195536824193171e-06, + "loss": 1.0746, + "step": 15465 + }, + { + "epoch": 0.85, + "learning_rate": 6.1643433690950274e-06, + "loss": 1.1372, + "step": 15470 + }, + { + "epoch": 0.85, + "learning_rate": 6.1332261416448325e-06, + "loss": 1.1275, + "step": 15475 + }, + { + "epoch": 0.85, + "learning_rate": 6.1021851671207775e-06, + "loss": 1.1642, + "step": 15480 + }, + { + "epoch": 0.85, + "learning_rate": 6.0712204707390146e-06, + "loss": 1.0729, + "step": 15485 + }, + { + "epoch": 0.85, + "learning_rate": 6.040332077653843e-06, + "loss": 1.1197, + "step": 15490 + }, + { + "epoch": 0.85, + "learning_rate": 6.009520012957515e-06, + "loss": 1.0777, + "step": 15495 + }, + { + "epoch": 0.85, + "learning_rate": 5.978784301680329e-06, + "loss": 1.1061, + "step": 15500 + }, + { + "epoch": 0.85, + "learning_rate": 5.9481249687904586e-06, + "loss": 1.0993, + "step": 15505 + }, + { + "epoch": 0.85, + "learning_rate": 5.917542039194157e-06, + "loss": 1.0466, + "step": 15510 + }, + { + "epoch": 0.85, + "learning_rate": 5.887035537735497e-06, + "loss": 1.1489, + "step": 15515 + }, + { + "epoch": 0.85, + "learning_rate": 5.856605489196563e-06, + "loss": 1.0831, + "step": 15520 + }, + { + "epoch": 0.85, + "learning_rate": 5.8262519182972746e-06, + "loss": 1.0411, + "step": 15525 + }, + { + "epoch": 0.85, + "learning_rate": 5.795974849695416e-06, + "loss": 1.0776, + "step": 15530 + }, + { + "epoch": 0.85, + "learning_rate": 5.765774307986659e-06, + "loss": 1.1137, + "step": 15535 + }, + { + "epoch": 0.85, + "learning_rate": 5.7356503177044986e-06, + "loss": 1.1409, + "step": 15540 + }, + { + "epoch": 0.85, + "learning_rate": 5.705602903320238e-06, + "loss": 1.057, + "step": 15545 + }, + { + "epoch": 0.85, + "learning_rate": 5.675632089242975e-06, + "loss": 1.1074, + "step": 15550 + }, + { + "epoch": 0.85, + "learning_rate": 5.645737899819556e-06, + "loss": 1.094, + "step": 15555 + }, + { + "epoch": 0.85, + "learning_rate": 5.615920359334656e-06, + "loss": 1.1088, + "step": 15560 + }, + { + "epoch": 0.85, + "learning_rate": 5.5861794920105995e-06, + "loss": 1.1397, + "step": 15565 + }, + { + "epoch": 0.85, + "learning_rate": 5.5565153220074585e-06, + "loss": 1.1354, + "step": 15570 + }, + { + "epoch": 0.85, + "learning_rate": 5.5269278734230025e-06, + "loss": 1.1178, + "step": 15575 + }, + { + "epoch": 0.85, + "learning_rate": 5.497417170292683e-06, + "loss": 1.1433, + "step": 15580 + }, + { + "epoch": 0.85, + "learning_rate": 5.467983236589624e-06, + "loss": 1.0541, + "step": 15585 + }, + { + "epoch": 0.85, + "learning_rate": 5.4386260962245185e-06, + "loss": 1.1599, + "step": 15590 + }, + { + "epoch": 0.85, + "learning_rate": 5.4093457730457355e-06, + "loss": 1.1396, + "step": 15595 + }, + { + "epoch": 0.85, + "learning_rate": 5.380142290839215e-06, + "loss": 1.0739, + "step": 15600 + }, + { + "epoch": 0.85, + "learning_rate": 5.3510156733285045e-06, + "loss": 1.1661, + "step": 15605 + }, + { + "epoch": 0.86, + "learning_rate": 5.321965944174686e-06, + "loss": 1.151, + "step": 15610 + }, + { + "epoch": 0.86, + "learning_rate": 5.292993126976373e-06, + "loss": 1.0758, + "step": 15615 + }, + { + "epoch": 0.86, + "learning_rate": 5.264097245269703e-06, + "loss": 1.0851, + "step": 15620 + }, + { + "epoch": 0.86, + "learning_rate": 5.235278322528345e-06, + "loss": 1.1199, + "step": 15625 + }, + { + "epoch": 0.86, + "learning_rate": 5.206536382163425e-06, + "loss": 1.0436, + "step": 15630 + }, + { + "epoch": 0.86, + "learning_rate": 5.177871447523525e-06, + "loss": 1.1559, + "step": 15635 + }, + { + "epoch": 0.86, + "learning_rate": 5.149283541894672e-06, + "loss": 1.1605, + "step": 15640 + }, + { + "epoch": 0.86, + "learning_rate": 5.120772688500369e-06, + "loss": 1.1924, + "step": 15645 + }, + { + "epoch": 0.86, + "learning_rate": 5.092338910501429e-06, + "loss": 1.1025, + "step": 15650 + }, + { + "epoch": 0.86, + "learning_rate": 5.06398223099617e-06, + "loss": 1.1805, + "step": 15655 + }, + { + "epoch": 0.86, + "learning_rate": 5.0357026730201625e-06, + "loss": 1.1369, + "step": 15660 + }, + { + "epoch": 0.86, + "learning_rate": 5.0075002595464075e-06, + "loss": 1.0946, + "step": 15665 + }, + { + "epoch": 0.86, + "learning_rate": 4.979375013485201e-06, + "loss": 1.1092, + "step": 15670 + }, + { + "epoch": 0.86, + "learning_rate": 4.951326957684189e-06, + "loss": 1.1243, + "step": 15675 + }, + { + "epoch": 0.86, + "learning_rate": 4.923356114928245e-06, + "loss": 1.1355, + "step": 15680 + }, + { + "epoch": 0.86, + "learning_rate": 4.8954625079395844e-06, + "loss": 1.1517, + "step": 15685 + }, + { + "epoch": 0.86, + "learning_rate": 4.867646159377636e-06, + "loss": 1.121, + "step": 15690 + }, + { + "epoch": 0.86, + "learning_rate": 4.839907091839112e-06, + "loss": 1.1251, + "step": 15695 + }, + { + "epoch": 0.86, + "learning_rate": 4.812245327857899e-06, + "loss": 1.16, + "step": 15700 + }, + { + "epoch": 0.86, + "learning_rate": 4.784660889905099e-06, + "loss": 1.1407, + "step": 15705 + }, + { + "epoch": 0.86, + "learning_rate": 4.757153800389022e-06, + "loss": 1.1172, + "step": 15710 + }, + { + "epoch": 0.86, + "learning_rate": 4.729724081655126e-06, + "loss": 1.1653, + "step": 15715 + }, + { + "epoch": 0.86, + "learning_rate": 4.702371755986008e-06, + "loss": 1.1363, + "step": 15720 + }, + { + "epoch": 0.86, + "learning_rate": 4.675096845601386e-06, + "loss": 1.1129, + "step": 15725 + }, + { + "epoch": 0.86, + "learning_rate": 4.647899372658144e-06, + "loss": 1.1746, + "step": 15730 + }, + { + "epoch": 0.86, + "learning_rate": 4.620779359250189e-06, + "loss": 1.0906, + "step": 15735 + }, + { + "epoch": 0.86, + "learning_rate": 4.593736827408574e-06, + "loss": 1.0549, + "step": 15740 + }, + { + "epoch": 0.86, + "learning_rate": 4.566771799101328e-06, + "loss": 1.1265, + "step": 15745 + }, + { + "epoch": 0.86, + "learning_rate": 4.539884296233606e-06, + "loss": 1.146, + "step": 15750 + }, + { + "epoch": 0.86, + "learning_rate": 4.513074340647501e-06, + "loss": 1.1519, + "step": 15755 + }, + { + "epoch": 0.86, + "learning_rate": 4.486341954122204e-06, + "loss": 1.0767, + "step": 15760 + }, + { + "epoch": 0.86, + "learning_rate": 4.459687158373789e-06, + "loss": 1.0886, + "step": 15765 + }, + { + "epoch": 0.86, + "learning_rate": 4.433109975055379e-06, + "loss": 1.1667, + "step": 15770 + }, + { + "epoch": 0.86, + "learning_rate": 4.406610425757003e-06, + "loss": 1.0441, + "step": 15775 + }, + { + "epoch": 0.86, + "learning_rate": 4.3801885320056755e-06, + "loss": 1.0611, + "step": 15780 + }, + { + "epoch": 0.86, + "learning_rate": 4.35384431526526e-06, + "loss": 1.0406, + "step": 15785 + }, + { + "epoch": 0.86, + "learning_rate": 4.327577796936572e-06, + "loss": 1.0848, + "step": 15790 + }, + { + "epoch": 0.87, + "learning_rate": 4.301388998357265e-06, + "loss": 1.0957, + "step": 15795 + }, + { + "epoch": 0.87, + "learning_rate": 4.2752779408018865e-06, + "loss": 1.1554, + "step": 15800 + }, + { + "epoch": 0.87, + "learning_rate": 4.24924464548186e-06, + "loss": 1.0796, + "step": 15805 + }, + { + "epoch": 0.87, + "learning_rate": 4.2232891335453555e-06, + "loss": 1.1156, + "step": 15810 + }, + { + "epoch": 0.87, + "learning_rate": 4.197411426077429e-06, + "loss": 1.0907, + "step": 15815 + }, + { + "epoch": 0.87, + "learning_rate": 4.171611544099889e-06, + "loss": 1.0705, + "step": 15820 + }, + { + "epoch": 0.87, + "learning_rate": 4.1458895085713695e-06, + "loss": 1.0766, + "step": 15825 + }, + { + "epoch": 0.87, + "learning_rate": 4.120245340387189e-06, + "loss": 1.0711, + "step": 15830 + }, + { + "epoch": 0.87, + "learning_rate": 4.094679060379506e-06, + "loss": 1.1613, + "step": 15835 + }, + { + "epoch": 0.87, + "learning_rate": 4.069190689317107e-06, + "loss": 1.0675, + "step": 15840 + }, + { + "epoch": 0.87, + "learning_rate": 4.043780247905584e-06, + "loss": 1.0787, + "step": 15845 + }, + { + "epoch": 0.87, + "learning_rate": 4.01844775678715e-06, + "loss": 1.1046, + "step": 15850 + }, + { + "epoch": 0.87, + "learning_rate": 3.99319323654074e-06, + "loss": 1.1347, + "step": 15855 + }, + { + "epoch": 0.87, + "learning_rate": 3.968016707681899e-06, + "loss": 1.0645, + "step": 15860 + }, + { + "epoch": 0.87, + "learning_rate": 3.942918190662893e-06, + "loss": 1.0998, + "step": 15865 + }, + { + "epoch": 0.87, + "learning_rate": 3.917897705872543e-06, + "loss": 1.0556, + "step": 15870 + }, + { + "epoch": 0.87, + "learning_rate": 3.892955273636313e-06, + "loss": 1.1649, + "step": 15875 + }, + { + "epoch": 0.87, + "learning_rate": 3.868090914216239e-06, + "loss": 1.0941, + "step": 15880 + }, + { + "epoch": 0.87, + "learning_rate": 3.843304647810975e-06, + "loss": 1.0874, + "step": 15885 + }, + { + "epoch": 0.87, + "learning_rate": 3.818596494555704e-06, + "loss": 1.0992, + "step": 15890 + }, + { + "epoch": 0.87, + "learning_rate": 3.793966474522159e-06, + "loss": 1.1281, + "step": 15895 + }, + { + "epoch": 0.87, + "learning_rate": 3.7694146077185796e-06, + "loss": 1.033, + "step": 15900 + }, + { + "epoch": 0.87, + "learning_rate": 3.7449409140897778e-06, + "loss": 1.1218, + "step": 15905 + }, + { + "epoch": 0.87, + "learning_rate": 3.7205454135170163e-06, + "loss": 1.14, + "step": 15910 + }, + { + "epoch": 0.87, + "learning_rate": 3.696228125818022e-06, + "loss": 1.1457, + "step": 15915 + }, + { + "epoch": 0.87, + "learning_rate": 3.6719890707470366e-06, + "loss": 1.0548, + "step": 15920 + }, + { + "epoch": 0.87, + "learning_rate": 3.6478282679947095e-06, + "loss": 1.1437, + "step": 15925 + }, + { + "epoch": 0.87, + "learning_rate": 3.6237457371881293e-06, + "loss": 1.0699, + "step": 15930 + }, + { + "epoch": 0.87, + "learning_rate": 3.599741497890825e-06, + "loss": 1.1399, + "step": 15935 + }, + { + "epoch": 0.87, + "learning_rate": 3.575815569602676e-06, + "loss": 1.0962, + "step": 15940 + }, + { + "epoch": 0.87, + "learning_rate": 3.5519679717599906e-06, + "loss": 1.0773, + "step": 15945 + }, + { + "epoch": 0.87, + "learning_rate": 3.5281987237354267e-06, + "loss": 1.1114, + "step": 15950 + }, + { + "epoch": 0.87, + "learning_rate": 3.5045078448380067e-06, + "loss": 1.1133, + "step": 15955 + }, + { + "epoch": 0.87, + "learning_rate": 3.4808953543130573e-06, + "loss": 1.0785, + "step": 15960 + }, + { + "epoch": 0.87, + "learning_rate": 3.457361271342257e-06, + "loss": 1.0664, + "step": 15965 + }, + { + "epoch": 0.87, + "learning_rate": 3.4339056150436027e-06, + "loss": 1.0698, + "step": 15970 + }, + { + "epoch": 0.88, + "learning_rate": 3.4105284044713407e-06, + "loss": 1.2109, + "step": 15975 + }, + { + "epoch": 0.88, + "learning_rate": 3.3872296586160133e-06, + "loss": 1.0391, + "step": 15980 + }, + { + "epoch": 0.88, + "learning_rate": 3.3640093964044033e-06, + "loss": 1.1354, + "step": 15985 + }, + { + "epoch": 0.88, + "learning_rate": 3.340867636699577e-06, + "loss": 1.0609, + "step": 15990 + }, + { + "epoch": 0.88, + "learning_rate": 3.3178043983007974e-06, + "loss": 1.1171, + "step": 15995 + }, + { + "epoch": 0.88, + "learning_rate": 3.2948196999435657e-06, + "loss": 1.0477, + "step": 16000 + }, + { + "epoch": 0.88, + "learning_rate": 3.2719135602995126e-06, + "loss": 1.0792, + "step": 16005 + }, + { + "epoch": 0.88, + "learning_rate": 3.249085997976555e-06, + "loss": 1.0743, + "step": 16010 + }, + { + "epoch": 0.88, + "learning_rate": 3.226337031518702e-06, + "loss": 1.0793, + "step": 16015 + }, + { + "epoch": 0.88, + "learning_rate": 3.2036666794061378e-06, + "loss": 1.1765, + "step": 16020 + }, + { + "epoch": 0.88, + "learning_rate": 3.1810749600551983e-06, + "loss": 1.1106, + "step": 16025 + }, + { + "epoch": 0.88, + "learning_rate": 3.1585618918183025e-06, + "loss": 1.1193, + "step": 16030 + }, + { + "epoch": 0.88, + "learning_rate": 3.136127492984021e-06, + "loss": 1.139, + "step": 16035 + }, + { + "epoch": 0.88, + "learning_rate": 3.113771781776987e-06, + "loss": 1.1757, + "step": 16040 + }, + { + "epoch": 0.88, + "learning_rate": 3.09149477635794e-06, + "loss": 1.1187, + "step": 16045 + }, + { + "epoch": 0.88, + "learning_rate": 3.0692964948236377e-06, + "loss": 1.1375, + "step": 16050 + }, + { + "epoch": 0.88, + "learning_rate": 3.0471769552069324e-06, + "loss": 1.0921, + "step": 16055 + }, + { + "epoch": 0.88, + "learning_rate": 3.025136175476684e-06, + "loss": 1.0905, + "step": 16060 + }, + { + "epoch": 0.88, + "learning_rate": 3.003174173537804e-06, + "loss": 1.1505, + "step": 16065 + }, + { + "epoch": 0.88, + "learning_rate": 2.9812909672311435e-06, + "loss": 1.1504, + "step": 16070 + }, + { + "epoch": 0.88, + "learning_rate": 2.9594865743336145e-06, + "loss": 1.1149, + "step": 16075 + }, + { + "epoch": 0.88, + "learning_rate": 2.9377610125580487e-06, + "loss": 1.1724, + "step": 16080 + }, + { + "epoch": 0.88, + "learning_rate": 2.9161142995533067e-06, + "loss": 1.0924, + "step": 16085 + }, + { + "epoch": 0.88, + "learning_rate": 2.8945464529041e-06, + "loss": 1.1057, + "step": 16090 + }, + { + "epoch": 0.88, + "learning_rate": 2.8730574901311692e-06, + "loss": 1.0862, + "step": 16095 + }, + { + "epoch": 0.88, + "learning_rate": 2.8516474286910942e-06, + "loss": 1.0754, + "step": 16100 + }, + { + "epoch": 0.88, + "learning_rate": 2.830316285976452e-06, + "loss": 1.1125, + "step": 16105 + }, + { + "epoch": 0.88, + "learning_rate": 2.809064079315591e-06, + "loss": 1.0761, + "step": 16110 + }, + { + "epoch": 0.88, + "learning_rate": 2.787890825972836e-06, + "loss": 1.1002, + "step": 16115 + }, + { + "epoch": 0.88, + "learning_rate": 2.766796543148306e-06, + "loss": 1.1259, + "step": 16120 + }, + { + "epoch": 0.88, + "learning_rate": 2.7457812479780493e-06, + "loss": 1.1162, + "step": 16125 + }, + { + "epoch": 0.88, + "learning_rate": 2.7248449575338536e-06, + "loss": 1.112, + "step": 16130 + }, + { + "epoch": 0.88, + "learning_rate": 2.7039876888233695e-06, + "loss": 1.146, + "step": 16135 + }, + { + "epoch": 0.88, + "learning_rate": 2.6832094587900884e-06, + "loss": 1.1394, + "step": 16140 + }, + { + "epoch": 0.88, + "learning_rate": 2.662510284313231e-06, + "loss": 1.0629, + "step": 16145 + }, + { + "epoch": 0.88, + "learning_rate": 2.6418901822078575e-06, + "loss": 1.0954, + "step": 16150 + }, + { + "epoch": 0.88, + "learning_rate": 2.6213491692247248e-06, + "loss": 1.1435, + "step": 16155 + }, + { + "epoch": 0.89, + "learning_rate": 2.6008872620504172e-06, + "loss": 1.1263, + "step": 16160 + }, + { + "epoch": 0.89, + "learning_rate": 2.580504477307183e-06, + "loss": 1.1138, + "step": 16165 + }, + { + "epoch": 0.89, + "learning_rate": 2.5602008315530776e-06, + "loss": 1.1343, + "step": 16170 + }, + { + "epoch": 0.89, + "learning_rate": 2.5399763412817743e-06, + "loss": 1.1663, + "step": 16175 + }, + { + "epoch": 0.89, + "learning_rate": 2.51983102292273e-06, + "loss": 1.0963, + "step": 16180 + }, + { + "epoch": 0.89, + "learning_rate": 2.499764892841011e-06, + "loss": 1.0759, + "step": 16185 + }, + { + "epoch": 0.89, + "learning_rate": 2.479777967337432e-06, + "loss": 1.0689, + "step": 16190 + }, + { + "epoch": 0.89, + "learning_rate": 2.459870262648405e-06, + "loss": 1.1193, + "step": 16195 + }, + { + "epoch": 0.89, + "learning_rate": 2.440041794946017e-06, + "loss": 1.0101, + "step": 16200 + }, + { + "epoch": 0.89, + "learning_rate": 2.4202925803379493e-06, + "loss": 1.229, + "step": 16205 + }, + { + "epoch": 0.89, + "learning_rate": 2.4006226348675694e-06, + "loss": 1.0983, + "step": 16210 + }, + { + "epoch": 0.89, + "learning_rate": 2.381031974513784e-06, + "loss": 1.2061, + "step": 16215 + }, + { + "epoch": 0.89, + "learning_rate": 2.361520615191129e-06, + "loss": 1.0888, + "step": 16220 + }, + { + "epoch": 0.89, + "learning_rate": 2.3420885727496924e-06, + "loss": 1.0964, + "step": 16225 + }, + { + "epoch": 0.89, + "learning_rate": 2.3227358629751696e-06, + "loss": 1.0691, + "step": 16230 + }, + { + "epoch": 0.89, + "learning_rate": 2.3034625015887848e-06, + "loss": 1.0945, + "step": 16235 + }, + { + "epoch": 0.89, + "learning_rate": 2.2842685042472802e-06, + "loss": 1.117, + "step": 16240 + }, + { + "epoch": 0.89, + "learning_rate": 2.265153886542983e-06, + "loss": 1.121, + "step": 16245 + }, + { + "epoch": 0.89, + "learning_rate": 2.246118664003673e-06, + "loss": 1.096, + "step": 16250 + }, + { + "epoch": 0.89, + "learning_rate": 2.2271628520926916e-06, + "loss": 1.0896, + "step": 16255 + }, + { + "epoch": 0.89, + "learning_rate": 2.2082864662088325e-06, + "loss": 1.1378, + "step": 16260 + }, + { + "epoch": 0.89, + "learning_rate": 2.189489521686384e-06, + "loss": 1.1348, + "step": 16265 + }, + { + "epoch": 0.89, + "learning_rate": 2.1707720337950773e-06, + "loss": 1.1282, + "step": 16270 + }, + { + "epoch": 0.89, + "learning_rate": 2.152134017740139e-06, + "loss": 1.1035, + "step": 16275 + }, + { + "epoch": 0.89, + "learning_rate": 2.1335754886621886e-06, + "loss": 1.0911, + "step": 16280 + }, + { + "epoch": 0.89, + "learning_rate": 2.115096461637313e-06, + "loss": 1.1321, + "step": 16285 + }, + { + "epoch": 0.89, + "learning_rate": 2.0966969516769816e-06, + "loss": 1.1175, + "step": 16290 + }, + { + "epoch": 0.89, + "learning_rate": 2.0783769737281155e-06, + "loss": 1.0464, + "step": 16295 + }, + { + "epoch": 0.89, + "learning_rate": 2.0601365426729763e-06, + "loss": 1.0491, + "step": 16300 + }, + { + "epoch": 0.89, + "learning_rate": 2.0419756733292233e-06, + "loss": 1.1104, + "step": 16305 + }, + { + "epoch": 0.89, + "learning_rate": 2.0238943804498886e-06, + "loss": 1.1716, + "step": 16310 + }, + { + "epoch": 0.89, + "learning_rate": 2.005892678723377e-06, + "loss": 1.1465, + "step": 16315 + }, + { + "epoch": 0.89, + "learning_rate": 1.987970582773414e-06, + "loss": 1.1245, + "step": 16320 + }, + { + "epoch": 0.89, + "learning_rate": 1.9701281071590427e-06, + "loss": 1.1121, + "step": 16325 + }, + { + "epoch": 0.89, + "learning_rate": 1.9523652663746682e-06, + "loss": 1.0917, + "step": 16330 + }, + { + "epoch": 0.89, + "learning_rate": 1.9346820748499707e-06, + "loss": 1.1602, + "step": 16335 + }, + { + "epoch": 0.89, + "learning_rate": 1.917078546949936e-06, + "loss": 1.104, + "step": 16340 + }, + { + "epoch": 0.9, + "learning_rate": 1.899554696974849e-06, + "loss": 1.1388, + "step": 16345 + }, + { + "epoch": 0.9, + "learning_rate": 1.8821105391602541e-06, + "loss": 1.1104, + "step": 16350 + }, + { + "epoch": 0.9, + "learning_rate": 1.8647460876769391e-06, + "loss": 1.1324, + "step": 16355 + }, + { + "epoch": 0.9, + "learning_rate": 1.8474613566309977e-06, + "loss": 1.121, + "step": 16360 + }, + { + "epoch": 0.9, + "learning_rate": 1.8302563600637091e-06, + "loss": 1.0966, + "step": 16365 + }, + { + "epoch": 0.9, + "learning_rate": 1.8131311119515934e-06, + "loss": 1.1196, + "step": 16370 + }, + { + "epoch": 0.9, + "learning_rate": 1.796085626206389e-06, + "loss": 1.1594, + "step": 16375 + }, + { + "epoch": 0.9, + "learning_rate": 1.779119916675053e-06, + "loss": 1.1135, + "step": 16380 + }, + { + "epoch": 0.9, + "learning_rate": 1.7622339971397384e-06, + "loss": 1.085, + "step": 16385 + }, + { + "epoch": 0.9, + "learning_rate": 1.7454278813177395e-06, + "loss": 1.0639, + "step": 16390 + }, + { + "epoch": 0.9, + "learning_rate": 1.7287015828615571e-06, + "loss": 1.1301, + "step": 16395 + }, + { + "epoch": 0.9, + "learning_rate": 1.7120551153588557e-06, + "loss": 1.1418, + "step": 16400 + }, + { + "epoch": 0.9, + "learning_rate": 1.6954884923324298e-06, + "loss": 1.1234, + "step": 16405 + }, + { + "epoch": 0.9, + "learning_rate": 1.6790017272402356e-06, + "loss": 1.1029, + "step": 16410 + }, + { + "epoch": 0.9, + "learning_rate": 1.6625948334753047e-06, + "loss": 1.0818, + "step": 16415 + }, + { + "epoch": 0.9, + "learning_rate": 1.6462678243658534e-06, + "loss": 1.1578, + "step": 16420 + }, + { + "epoch": 0.9, + "learning_rate": 1.6300207131751606e-06, + "loss": 1.1124, + "step": 16425 + }, + { + "epoch": 0.9, + "learning_rate": 1.6138535131016246e-06, + "loss": 1.1645, + "step": 16430 + }, + { + "epoch": 0.9, + "learning_rate": 1.5977662372786842e-06, + "loss": 1.1968, + "step": 16435 + }, + { + "epoch": 0.9, + "learning_rate": 1.5817588987749081e-06, + "loss": 1.1123, + "step": 16440 + }, + { + "epoch": 0.9, + "learning_rate": 1.5658315105938826e-06, + "loss": 1.1023, + "step": 16445 + }, + { + "epoch": 0.9, + "learning_rate": 1.5499840856742805e-06, + "loss": 1.1495, + "step": 16450 + }, + { + "epoch": 0.9, + "learning_rate": 1.534216636889793e-06, + "loss": 1.0852, + "step": 16455 + }, + { + "epoch": 0.9, + "learning_rate": 1.5185291770491406e-06, + "loss": 1.1549, + "step": 16460 + }, + { + "epoch": 0.9, + "learning_rate": 1.5029217188960855e-06, + "loss": 1.1739, + "step": 16465 + }, + { + "epoch": 0.9, + "learning_rate": 1.487394275109385e-06, + "loss": 1.0818, + "step": 16470 + }, + { + "epoch": 0.9, + "learning_rate": 1.4719468583028047e-06, + "loss": 1.0607, + "step": 16475 + }, + { + "epoch": 0.9, + "learning_rate": 1.4565794810250846e-06, + "loss": 1.1059, + "step": 16480 + }, + { + "epoch": 0.9, + "learning_rate": 1.4412921557599614e-06, + "loss": 1.0518, + "step": 16485 + }, + { + "epoch": 0.9, + "learning_rate": 1.4260848949261344e-06, + "loss": 1.0182, + "step": 16490 + }, + { + "epoch": 0.9, + "learning_rate": 1.4109577108772653e-06, + "loss": 1.1413, + "step": 16495 + }, + { + "epoch": 0.9, + "learning_rate": 1.3959106159019473e-06, + "loss": 1.1225, + "step": 16500 + }, + { + "epoch": 0.9, + "learning_rate": 1.3809436222237576e-06, + "loss": 1.1613, + "step": 16505 + }, + { + "epoch": 0.9, + "learning_rate": 1.3660567420011375e-06, + "loss": 1.1519, + "step": 16510 + }, + { + "epoch": 0.9, + "learning_rate": 1.3512499873275244e-06, + "loss": 1.1568, + "step": 16515 + }, + { + "epoch": 0.9, + "learning_rate": 1.3365233702311965e-06, + "loss": 1.1024, + "step": 16520 + }, + { + "epoch": 0.91, + "learning_rate": 1.3218769026753619e-06, + "loss": 1.1473, + "step": 16525 + }, + { + "epoch": 0.91, + "learning_rate": 1.3073105965581255e-06, + "loss": 1.1397, + "step": 16530 + }, + { + "epoch": 0.91, + "learning_rate": 1.2928244637124661e-06, + "loss": 1.14, + "step": 16535 + }, + { + "epoch": 0.91, + "learning_rate": 1.2784185159062376e-06, + "loss": 1.1294, + "step": 16540 + }, + { + "epoch": 0.91, + "learning_rate": 1.264092764842145e-06, + "loss": 1.1437, + "step": 16545 + }, + { + "epoch": 0.91, + "learning_rate": 1.249847222157735e-06, + "loss": 1.0628, + "step": 16550 + }, + { + "epoch": 0.91, + "learning_rate": 1.2356818994254386e-06, + "loss": 1.1149, + "step": 16555 + }, + { + "epoch": 0.91, + "learning_rate": 1.2215968081524741e-06, + "loss": 1.1076, + "step": 16560 + }, + { + "epoch": 0.91, + "learning_rate": 1.2075919597809113e-06, + "loss": 1.0568, + "step": 16565 + }, + { + "epoch": 0.91, + "learning_rate": 1.193667365687638e-06, + "loss": 1.085, + "step": 16570 + }, + { + "epoch": 0.91, + "learning_rate": 1.1798230371843058e-06, + "loss": 1.1119, + "step": 16575 + }, + { + "epoch": 0.91, + "learning_rate": 1.1660589855174297e-06, + "loss": 1.1855, + "step": 16580 + }, + { + "epoch": 0.91, + "learning_rate": 1.1523752218682435e-06, + "loss": 1.0604, + "step": 16585 + }, + { + "epoch": 0.91, + "learning_rate": 1.1387717573528108e-06, + "loss": 1.1328, + "step": 16590 + }, + { + "epoch": 0.91, + "learning_rate": 1.1252486030219139e-06, + "loss": 1.0743, + "step": 16595 + }, + { + "epoch": 0.91, + "learning_rate": 1.1118057698611544e-06, + "loss": 1.1038, + "step": 16600 + }, + { + "epoch": 0.91, + "learning_rate": 1.0984432687908412e-06, + "loss": 1.1044, + "step": 16605 + }, + { + "epoch": 0.91, + "learning_rate": 1.0851611106660467e-06, + "loss": 1.0952, + "step": 16610 + }, + { + "epoch": 0.91, + "learning_rate": 1.0719593062765512e-06, + "loss": 1.1749, + "step": 16615 + }, + { + "epoch": 0.91, + "learning_rate": 1.058837866346887e-06, + "loss": 1.1268, + "step": 16620 + }, + { + "epoch": 0.91, + "learning_rate": 1.045796801536283e-06, + "loss": 1.0945, + "step": 16625 + }, + { + "epoch": 0.91, + "learning_rate": 1.032836122438698e-06, + "loss": 1.0407, + "step": 16630 + }, + { + "epoch": 0.91, + "learning_rate": 1.019955839582744e-06, + "loss": 1.1344, + "step": 16635 + }, + { + "epoch": 0.91, + "learning_rate": 1.0071559634317957e-06, + "loss": 1.1925, + "step": 16640 + }, + { + "epoch": 0.91, + "learning_rate": 9.944365043838461e-07, + "loss": 1.1355, + "step": 16645 + }, + { + "epoch": 0.91, + "learning_rate": 9.817974727715744e-07, + "loss": 1.1649, + "step": 16650 + }, + { + "epoch": 0.91, + "learning_rate": 9.692388788623341e-07, + "loss": 1.1269, + "step": 16655 + }, + { + "epoch": 0.91, + "learning_rate": 9.567607328581428e-07, + "loss": 1.0955, + "step": 16660 + }, + { + "epoch": 0.91, + "learning_rate": 9.443630448956471e-07, + "loss": 1.1368, + "step": 16665 + }, + { + "epoch": 0.91, + "learning_rate": 9.320458250461577e-07, + "loss": 1.1313, + "step": 16670 + }, + { + "epoch": 0.91, + "learning_rate": 9.198090833155817e-07, + "loss": 1.0815, + "step": 16675 + }, + { + "epoch": 0.91, + "learning_rate": 9.076528296444788e-07, + "loss": 1.1637, + "step": 16680 + }, + { + "epoch": 0.91, + "learning_rate": 8.955770739080159e-07, + "loss": 1.1099, + "step": 16685 + }, + { + "epoch": 0.91, + "learning_rate": 8.835818259159576e-07, + "loss": 1.0788, + "step": 16690 + }, + { + "epoch": 0.91, + "learning_rate": 8.716670954126869e-07, + "loss": 1.1443, + "step": 16695 + }, + { + "epoch": 0.91, + "learning_rate": 8.5983289207715e-07, + "loss": 1.0945, + "step": 16700 + }, + { + "epoch": 0.91, + "learning_rate": 8.480792255229126e-07, + "loss": 1.0568, + "step": 16705 + }, + { + "epoch": 0.92, + "learning_rate": 8.364061052980813e-07, + "loss": 1.1136, + "step": 16710 + }, + { + "epoch": 0.92, + "learning_rate": 8.248135408853598e-07, + "loss": 1.1255, + "step": 16715 + }, + { + "epoch": 0.92, + "learning_rate": 8.133015417019929e-07, + "loss": 1.1145, + "step": 16720 + }, + { + "epoch": 0.92, + "learning_rate": 8.018701170997888e-07, + "loss": 1.0732, + "step": 16725 + }, + { + "epoch": 0.92, + "learning_rate": 7.905192763650856e-07, + "loss": 1.0835, + "step": 16730 + }, + { + "epoch": 0.92, + "learning_rate": 7.792490287187856e-07, + "loss": 1.0513, + "step": 16735 + }, + { + "epoch": 0.92, + "learning_rate": 7.680593833162869e-07, + "loss": 1.1288, + "step": 16740 + }, + { + "epoch": 0.92, + "learning_rate": 7.56950349247541e-07, + "loss": 1.1648, + "step": 16745 + }, + { + "epoch": 0.92, + "learning_rate": 7.459219355369955e-07, + "loss": 1.1551, + "step": 16750 + }, + { + "epoch": 0.92, + "learning_rate": 7.349741511436173e-07, + "loss": 1.1016, + "step": 16755 + }, + { + "epoch": 0.92, + "learning_rate": 7.241070049608591e-07, + "loss": 1.0629, + "step": 16760 + }, + { + "epoch": 0.92, + "learning_rate": 7.133205058166926e-07, + "loss": 1.1038, + "step": 16765 + }, + { + "epoch": 0.92, + "learning_rate": 7.026146624735308e-07, + "loss": 1.1666, + "step": 16770 + }, + { + "epoch": 0.92, + "learning_rate": 6.919894836283277e-07, + "loss": 1.0821, + "step": 16775 + }, + { + "epoch": 0.92, + "learning_rate": 6.814449779124462e-07, + "loss": 1.2119, + "step": 16780 + }, + { + "epoch": 0.92, + "learning_rate": 6.70981153891767e-07, + "loss": 1.1009, + "step": 16785 + }, + { + "epoch": 0.92, + "learning_rate": 6.605980200665911e-07, + "loss": 1.1142, + "step": 16790 + }, + { + "epoch": 0.92, + "learning_rate": 6.502955848716933e-07, + "loss": 1.0964, + "step": 16795 + }, + { + "epoch": 0.92, + "learning_rate": 6.4007385667629e-07, + "loss": 1.0647, + "step": 16800 + }, + { + "epoch": 0.92, + "learning_rate": 6.299328437840058e-07, + "loss": 1.1979, + "step": 16805 + }, + { + "epoch": 0.92, + "learning_rate": 6.198725544329508e-07, + "loss": 1.0791, + "step": 16810 + }, + { + "epoch": 0.92, + "learning_rate": 6.09892996795599e-07, + "loss": 1.0483, + "step": 16815 + }, + { + "epoch": 0.92, + "learning_rate": 5.999941789788985e-07, + "loss": 1.103, + "step": 16820 + }, + { + "epoch": 0.92, + "learning_rate": 5.901761090241506e-07, + "loss": 1.1139, + "step": 16825 + }, + { + "epoch": 0.92, + "learning_rate": 5.804387949071194e-07, + "loss": 1.1437, + "step": 16830 + }, + { + "epoch": 0.92, + "learning_rate": 5.707822445379223e-07, + "loss": 1.0156, + "step": 16835 + }, + { + "epoch": 0.92, + "learning_rate": 5.612064657610954e-07, + "loss": 1.0878, + "step": 16840 + }, + { + "epoch": 0.92, + "learning_rate": 5.517114663555271e-07, + "loss": 1.0785, + "step": 16845 + }, + { + "epoch": 0.92, + "learning_rate": 5.422972540345254e-07, + "loss": 1.0821, + "step": 16850 + }, + { + "epoch": 0.92, + "learning_rate": 5.329638364457391e-07, + "loss": 1.1105, + "step": 16855 + }, + { + "epoch": 0.92, + "learning_rate": 5.237112211712036e-07, + "loss": 1.1316, + "step": 16860 + }, + { + "epoch": 0.92, + "learning_rate": 5.145394157273064e-07, + "loss": 1.0996, + "step": 16865 + }, + { + "epoch": 0.92, + "learning_rate": 5.054484275647874e-07, + "loss": 1.0521, + "step": 16870 + }, + { + "epoch": 0.92, + "learning_rate": 4.964382640687169e-07, + "loss": 1.1272, + "step": 16875 + }, + { + "epoch": 0.92, + "learning_rate": 4.875089325585622e-07, + "loss": 1.1102, + "step": 16880 + }, + { + "epoch": 0.92, + "learning_rate": 4.786604402880541e-07, + "loss": 1.1545, + "step": 16885 + }, + { + "epoch": 0.93, + "learning_rate": 4.698927944452985e-07, + "loss": 1.1228, + "step": 16890 + }, + { + "epoch": 0.93, + "learning_rate": 4.612060021527314e-07, + "loss": 1.1967, + "step": 16895 + }, + { + "epoch": 0.93, + "learning_rate": 4.526000704670855e-07, + "loss": 1.1296, + "step": 16900 + }, + { + "epoch": 0.93, + "learning_rate": 4.440750063794022e-07, + "loss": 1.1456, + "step": 16905 + }, + { + "epoch": 0.93, + "learning_rate": 4.356308168150415e-07, + "loss": 1.0568, + "step": 16910 + }, + { + "epoch": 0.93, + "learning_rate": 4.272675086336831e-07, + "loss": 1.1082, + "step": 16915 + }, + { + "epoch": 0.93, + "learning_rate": 4.1898508862925897e-07, + "loss": 1.1298, + "step": 16920 + }, + { + "epoch": 0.93, + "learning_rate": 4.107835635300205e-07, + "loss": 1.0705, + "step": 16925 + }, + { + "epoch": 0.93, + "learning_rate": 4.026629399985158e-07, + "loss": 1.1249, + "step": 16930 + }, + { + "epoch": 0.93, + "learning_rate": 3.946232246315348e-07, + "loss": 1.0854, + "step": 16935 + }, + { + "epoch": 0.93, + "learning_rate": 3.8666442396016426e-07, + "loss": 1.1833, + "step": 16940 + }, + { + "epoch": 0.93, + "learning_rate": 3.787865444497658e-07, + "loss": 1.1123, + "step": 16945 + }, + { + "epoch": 0.93, + "learning_rate": 3.709895924999535e-07, + "loss": 1.0506, + "step": 16950 + }, + { + "epoch": 0.93, + "learning_rate": 3.6327357444460526e-07, + "loss": 1.0753, + "step": 16955 + }, + { + "epoch": 0.93, + "learning_rate": 3.5563849655184035e-07, + "loss": 1.0932, + "step": 16960 + }, + { + "epoch": 0.93, + "learning_rate": 3.4808436502404195e-07, + "loss": 1.1486, + "step": 16965 + }, + { + "epoch": 0.93, + "learning_rate": 3.4061118599783455e-07, + "loss": 1.0935, + "step": 16970 + }, + { + "epoch": 0.93, + "learning_rate": 3.33218965544051e-07, + "loss": 1.0516, + "step": 16975 + }, + { + "epoch": 0.93, + "learning_rate": 3.259077096678097e-07, + "loss": 1.1001, + "step": 16980 + }, + { + "epoch": 0.93, + "learning_rate": 3.1867742430841546e-07, + "loss": 1.0619, + "step": 16985 + }, + { + "epoch": 0.93, + "learning_rate": 3.115281153394143e-07, + "loss": 1.0998, + "step": 16990 + }, + { + "epoch": 0.93, + "learning_rate": 3.0445978856856037e-07, + "loss": 1.0915, + "step": 16995 + }, + { + "epoch": 0.93, + "learning_rate": 2.9747244973782734e-07, + "loss": 1.1161, + "step": 17000 + }, + { + "epoch": 0.93, + "learning_rate": 2.9056610452340806e-07, + "loss": 1.068, + "step": 17005 + }, + { + "epoch": 0.93, + "learning_rate": 2.8374075853568145e-07, + "loss": 1.0757, + "step": 17010 + }, + { + "epoch": 0.93, + "learning_rate": 2.7699641731924553e-07, + "loss": 1.0625, + "step": 17015 + }, + { + "epoch": 0.93, + "learning_rate": 2.703330863528844e-07, + "loss": 1.0942, + "step": 17020 + }, + { + "epoch": 0.93, + "learning_rate": 2.6375077104955705e-07, + "loss": 1.1192, + "step": 17025 + }, + { + "epoch": 0.93, + "learning_rate": 2.5724947675643055e-07, + "loss": 1.1503, + "step": 17030 + }, + { + "epoch": 0.93, + "learning_rate": 2.508292087548693e-07, + "loss": 1.0777, + "step": 17035 + }, + { + "epoch": 0.93, + "learning_rate": 2.4448997226036786e-07, + "loss": 1.1206, + "step": 17040 + }, + { + "epoch": 0.93, + "learning_rate": 2.382317724226291e-07, + "loss": 1.1276, + "step": 17045 + }, + { + "epoch": 0.93, + "learning_rate": 2.3205461432553085e-07, + "loss": 0.985, + "step": 17050 + }, + { + "epoch": 0.93, + "learning_rate": 2.259585029870924e-07, + "loss": 1.1301, + "step": 17055 + }, + { + "epoch": 0.93, + "learning_rate": 2.1994344335950796e-07, + "loss": 1.1161, + "step": 17060 + }, + { + "epoch": 0.93, + "learning_rate": 2.140094403291356e-07, + "loss": 1.0812, + "step": 17065 + }, + { + "epoch": 0.93, + "learning_rate": 2.081564987164637e-07, + "loss": 1.0704, + "step": 17070 + }, + { + "epoch": 0.94, + "learning_rate": 2.0238462327615564e-07, + "loss": 1.0955, + "step": 17075 + }, + { + "epoch": 0.94, + "learning_rate": 1.966938186970163e-07, + "loss": 1.0662, + "step": 17080 + }, + { + "epoch": 0.94, + "learning_rate": 1.910840896019699e-07, + "loss": 1.1334, + "step": 17085 + }, + { + "epoch": 0.94, + "learning_rate": 1.8555544054810452e-07, + "loss": 1.1211, + "step": 17090 + }, + { + "epoch": 0.94, + "learning_rate": 1.8010787602662758e-07, + "loss": 1.0898, + "step": 17095 + }, + { + "epoch": 0.94, + "learning_rate": 1.7474140046289915e-07, + "loss": 1.1766, + "step": 17100 + }, + { + "epoch": 0.94, + "learning_rate": 1.6945601821636532e-07, + "loss": 1.0579, + "step": 17105 + }, + { + "epoch": 0.94, + "learning_rate": 1.6425173358063605e-07, + "loss": 1.1526, + "step": 17110 + }, + { + "epoch": 0.94, + "learning_rate": 1.591285507834184e-07, + "loss": 1.1208, + "step": 17115 + }, + { + "epoch": 0.94, + "learning_rate": 1.5408647398654996e-07, + "loss": 1.1807, + "step": 17120 + }, + { + "epoch": 0.94, + "learning_rate": 1.4912550728597645e-07, + "loss": 1.1635, + "step": 17125 + }, + { + "epoch": 0.94, + "learning_rate": 1.4424565471175196e-07, + "loss": 1.0432, + "step": 17130 + }, + { + "epoch": 0.94, + "learning_rate": 1.3944692022802775e-07, + "loss": 1.1401, + "step": 17135 + }, + { + "epoch": 0.94, + "learning_rate": 1.347293077330855e-07, + "loss": 1.1203, + "step": 17140 + }, + { + "epoch": 0.94, + "learning_rate": 1.3009282105927082e-07, + "loss": 1.0678, + "step": 17145 + }, + { + "epoch": 0.94, + "learning_rate": 1.2553746397305962e-07, + "loss": 1.1245, + "step": 17150 + }, + { + "epoch": 0.94, + "learning_rate": 1.2106324017500294e-07, + "loss": 1.146, + "step": 17155 + }, + { + "epoch": 0.94, + "learning_rate": 1.1667015329976e-07, + "loss": 1.1014, + "step": 17160 + }, + { + "epoch": 0.94, + "learning_rate": 1.1235820691605392e-07, + "loss": 1.1257, + "step": 17165 + }, + { + "epoch": 0.94, + "learning_rate": 1.0812740452670492e-07, + "loss": 1.0855, + "step": 17170 + }, + { + "epoch": 0.94, + "learning_rate": 1.0397774956861934e-07, + "loss": 1.1447, + "step": 17175 + }, + { + "epoch": 0.94, + "learning_rate": 9.99092454127896e-08, + "loss": 1.0901, + "step": 17180 + }, + { + "epoch": 0.94, + "learning_rate": 9.592189536426074e-08, + "loss": 1.0761, + "step": 17185 + }, + { + "epoch": 0.94, + "learning_rate": 9.201570266218619e-08, + "loss": 1.0526, + "step": 17190 + }, + { + "epoch": 0.94, + "learning_rate": 8.819067047974989e-08, + "loss": 1.1825, + "step": 17195 + }, + { + "epoch": 0.94, + "learning_rate": 8.444680192424406e-08, + "loss": 1.1301, + "step": 17200 + }, + { + "epoch": 0.94, + "learning_rate": 8.078410003700251e-08, + "loss": 1.1353, + "step": 17205 + }, + { + "epoch": 0.94, + "learning_rate": 7.720256779343409e-08, + "loss": 1.0628, + "step": 17210 + }, + { + "epoch": 0.94, + "learning_rate": 7.370220810301142e-08, + "loss": 1.1315, + "step": 17215 + }, + { + "epoch": 0.94, + "learning_rate": 7.028302380924884e-08, + "loss": 1.106, + "step": 17220 + }, + { + "epoch": 0.94, + "learning_rate": 6.694501768973561e-08, + "loss": 1.1035, + "step": 17225 + }, + { + "epoch": 0.94, + "learning_rate": 6.368819245611376e-08, + "loss": 1.1582, + "step": 17230 + }, + { + "epoch": 0.94, + "learning_rate": 6.051255075406694e-08, + "loss": 1.166, + "step": 17235 + }, + { + "epoch": 0.94, + "learning_rate": 5.7418095163342706e-08, + "loss": 1.0483, + "step": 17240 + }, + { + "epoch": 0.94, + "learning_rate": 5.4404828197719124e-08, + "loss": 1.1408, + "step": 17245 + }, + { + "epoch": 0.94, + "learning_rate": 5.1472752305049216e-08, + "loss": 1.1449, + "step": 17250 + }, + { + "epoch": 0.95, + "learning_rate": 4.8621869867205465e-08, + "loss": 1.1723, + "step": 17255 + }, + { + "epoch": 0.95, + "learning_rate": 4.5852183200102e-08, + "loss": 1.0561, + "step": 17260 + }, + { + "epoch": 0.95, + "learning_rate": 4.316369455371681e-08, + "loss": 1.0003, + "step": 17265 + }, + { + "epoch": 0.95, + "learning_rate": 4.05564061120362e-08, + "loss": 1.1429, + "step": 17270 + }, + { + "epoch": 0.95, + "learning_rate": 3.803031999309926e-08, + "loss": 1.1214, + "step": 17275 + }, + { + "epoch": 0.95, + "learning_rate": 3.5585438248986716e-08, + "loss": 1.1354, + "step": 17280 + }, + { + "epoch": 0.95, + "learning_rate": 3.322176286579875e-08, + "loss": 1.0623, + "step": 17285 + }, + { + "epoch": 0.95, + "learning_rate": 3.0939295763677164e-08, + "loss": 1.144, + "step": 17290 + }, + { + "epoch": 0.95, + "learning_rate": 2.873803879678325e-08, + "loss": 1.1006, + "step": 17295 + }, + { + "epoch": 0.95, + "learning_rate": 2.661799375331992e-08, + "loss": 1.2163, + "step": 17300 + }, + { + "epoch": 0.95, + "learning_rate": 2.4579162355520632e-08, + "loss": 1.2416, + "step": 17305 + }, + { + "epoch": 0.95, + "learning_rate": 2.26215462596161e-08, + "loss": 1.1587, + "step": 17310 + }, + { + "epoch": 0.95, + "learning_rate": 2.0745147055900888e-08, + "loss": 1.078, + "step": 17315 + }, + { + "epoch": 0.95, + "learning_rate": 1.8949966268655696e-08, + "loss": 1.091, + "step": 17320 + }, + { + "epoch": 0.95, + "learning_rate": 1.723600535620288e-08, + "loss": 1.1071, + "step": 17325 + }, + { + "epoch": 0.95, + "learning_rate": 1.5603265710895336e-08, + "loss": 1.0908, + "step": 17330 + }, + { + "epoch": 0.95, + "learning_rate": 1.4051748659083202e-08, + "loss": 1.0811, + "step": 17335 + }, + { + "epoch": 0.95, + "learning_rate": 1.2581455461147166e-08, + "loss": 1.137, + "step": 17340 + }, + { + "epoch": 0.95, + "learning_rate": 1.1192387311487366e-08, + "loss": 1.0535, + "step": 17345 + }, + { + "epoch": 0.95, + "learning_rate": 9.88454533851228e-09, + "loss": 1.1146, + "step": 17350 + }, + { + "epoch": 0.95, + "learning_rate": 8.657930604660933e-09, + "loss": 1.1813, + "step": 17355 + }, + { + "epoch": 0.95, + "learning_rate": 7.51254410635849e-09, + "loss": 1.1633, + "step": 17360 + }, + { + "epoch": 0.95, + "learning_rate": 6.448386774082868e-09, + "loss": 1.1483, + "step": 17365 + }, + { + "epoch": 0.95, + "learning_rate": 5.465459472298123e-09, + "loss": 1.1346, + "step": 17370 + }, + { + "epoch": 0.95, + "learning_rate": 4.563762999476653e-09, + "loss": 1.0953, + "step": 17375 + }, + { + "epoch": 0.95, + "learning_rate": 3.743298088121406e-09, + "loss": 1.1749, + "step": 17380 + }, + { + "epoch": 0.95, + "learning_rate": 3.0040654047547757e-09, + "loss": 1.2187, + "step": 17385 + }, + { + "epoch": 0.95, + "learning_rate": 2.3460655498630877e-09, + "loss": 1.1367, + "step": 17390 + }, + { + "epoch": 0.95, + "learning_rate": 1.7692990580076276e-09, + "loss": 1.1282, + "step": 17395 + }, + { + "epoch": 0.95, + "learning_rate": 1.273766397702511e-09, + "loss": 1.1553, + "step": 17400 + }, + { + "epoch": 0.95, + "learning_rate": 8.594679715035046e-10, + "loss": 1.0399, + "step": 17405 + }, + { + "epoch": 0.95, + "learning_rate": 5.264041159636159e-10, + "loss": 1.1995, + "step": 17410 + }, + { + "epoch": 0.95, + "learning_rate": 2.745751016552985e-10, + "loss": 1.0904, + "step": 17415 + }, + { + "epoch": 0.95, + "learning_rate": 1.0398113314824695e-10, + "loss": 1.0648, + "step": 17420 + }, + { + "epoch": 0.95, + "learning_rate": 1.4622349031601304e-11, + "loss": 1.1043, + "step": 17425 + }, + { + "epoch": 0.95, + "learning_rate": 6.498821880640549e-12, + "loss": 1.0461, + "step": 17430 + }, + { + "epoch": 0.95, + "learning_rate": 7.961055830119079e-11, + "loss": 1.1392, + "step": 17435 + }, + { + "epoch": 0.96, + "learning_rate": 2.3395749890742223e-10, + "loss": 1.0029, + "step": 17440 + }, + { + "epoch": 0.96, + "learning_rate": 4.695395183107465e-10, + "loss": 1.1768, + "step": 17445 + }, + { + "epoch": 0.96, + "learning_rate": 7.863564251309187e-10, + "loss": 1.0712, + "step": 17450 + }, + { + "epoch": 0.96, + "learning_rate": 1.1844079620182416e-09, + "loss": 1.0349, + "step": 17455 + }, + { + "epoch": 0.96, + "learning_rate": 1.6636938055869524e-09, + "loss": 1.1187, + "step": 17460 + }, + { + "epoch": 0.96, + "learning_rate": 2.224213566515143e-09, + "loss": 1.0561, + "step": 17465 + }, + { + "epoch": 0.96, + "learning_rate": 2.8659667894337388e-09, + "loss": 1.1697, + "step": 17470 + }, + { + "epoch": 0.96, + "learning_rate": 3.5889529530486194e-09, + "loss": 1.069, + "step": 17475 + }, + { + "epoch": 0.96, + "learning_rate": 4.3931714700073956e-09, + "loss": 1.1701, + "step": 17480 + }, + { + "epoch": 0.96, + "learning_rate": 5.278621687021535e-09, + "loss": 1.0673, + "step": 17485 + }, + { + "epoch": 0.96, + "learning_rate": 6.245302884788639e-09, + "loss": 1.159, + "step": 17490 + }, + { + "epoch": 0.96, + "learning_rate": 7.293214278014658e-09, + "loss": 1.1931, + "step": 17495 + }, + { + "epoch": 0.96, + "learning_rate": 8.422355015424986e-09, + "loss": 1.0897, + "step": 17500 + }, + { + "epoch": 0.96, + "learning_rate": 9.632724179775562e-09, + "loss": 1.1737, + "step": 17505 + }, + { + "epoch": 0.96, + "learning_rate": 1.0924320787808472e-08, + "loss": 1.1856, + "step": 17510 + }, + { + "epoch": 0.96, + "learning_rate": 1.2297143790296336e-08, + "loss": 1.1193, + "step": 17515 + }, + { + "epoch": 0.96, + "learning_rate": 1.3751192072009026e-08, + "loss": 1.0823, + "step": 17520 + }, + { + "epoch": 0.96, + "learning_rate": 1.5286464451769163e-08, + "loss": 1.0698, + "step": 17525 + }, + { + "epoch": 0.96, + "learning_rate": 1.6902959682374414e-08, + "loss": 1.0934, + "step": 17530 + }, + { + "epoch": 0.96, + "learning_rate": 1.860067645067518e-08, + "loss": 1.0701, + "step": 17535 + }, + { + "epoch": 0.96, + "learning_rate": 2.0379613377508023e-08, + "loss": 1.091, + "step": 17540 + }, + { + "epoch": 0.96, + "learning_rate": 2.2239769017773337e-08, + "loss": 1.1573, + "step": 17545 + }, + { + "epoch": 0.96, + "learning_rate": 2.4181141860346563e-08, + "loss": 1.0598, + "step": 17550 + }, + { + "epoch": 0.96, + "learning_rate": 2.6203730328155897e-08, + "loss": 1.1075, + "step": 17555 + }, + { + "epoch": 0.96, + "learning_rate": 2.8307532778160077e-08, + "loss": 1.1165, + "step": 17560 + }, + { + "epoch": 0.96, + "learning_rate": 3.0492547501303994e-08, + "loss": 1.0897, + "step": 17565 + }, + { + "epoch": 0.96, + "learning_rate": 3.2758772722607476e-08, + "loss": 1.0659, + "step": 17570 + }, + { + "epoch": 0.96, + "learning_rate": 3.5106206601076506e-08, + "loss": 1.0895, + "step": 17575 + }, + { + "epoch": 0.96, + "learning_rate": 3.753484722978091e-08, + "loss": 1.0961, + "step": 17580 + }, + { + "epoch": 0.96, + "learning_rate": 4.004469263580996e-08, + "loss": 1.1173, + "step": 17585 + }, + { + "epoch": 0.96, + "learning_rate": 4.26357407802724e-08, + "loss": 1.0851, + "step": 17590 + }, + { + "epoch": 0.96, + "learning_rate": 4.5307989558329686e-08, + "loss": 1.1711, + "step": 17595 + }, + { + "epoch": 0.96, + "learning_rate": 4.806143679917382e-08, + "loss": 1.0771, + "step": 17600 + }, + { + "epoch": 0.96, + "learning_rate": 5.089608026603849e-08, + "loss": 1.046, + "step": 17605 + }, + { + "epoch": 0.96, + "learning_rate": 5.381191765618794e-08, + "loss": 1.1066, + "step": 17610 + }, + { + "epoch": 0.96, + "learning_rate": 5.680894660092802e-08, + "loss": 1.0681, + "step": 17615 + }, + { + "epoch": 0.97, + "learning_rate": 5.988716466561736e-08, + "loss": 1.0917, + "step": 17620 + }, + { + "epoch": 0.97, + "learning_rate": 6.304656934966734e-08, + "loss": 1.1276, + "step": 17625 + }, + { + "epoch": 0.97, + "learning_rate": 6.628715808653093e-08, + "loss": 1.1136, + "step": 17630 + }, + { + "epoch": 0.97, + "learning_rate": 6.960892824368069e-08, + "loss": 1.1298, + "step": 17635 + }, + { + "epoch": 0.97, + "learning_rate": 7.301187712268617e-08, + "loss": 1.1391, + "step": 17640 + }, + { + "epoch": 0.97, + "learning_rate": 7.64960019591587e-08, + "loss": 1.1331, + "step": 17645 + }, + { + "epoch": 0.97, + "learning_rate": 8.006129992274011e-08, + "loss": 1.0241, + "step": 17650 + }, + { + "epoch": 0.97, + "learning_rate": 8.37077681171694e-08, + "loss": 1.1279, + "step": 17655 + }, + { + "epoch": 0.97, + "learning_rate": 8.743540358021604e-08, + "loss": 1.0849, + "step": 17660 + }, + { + "epoch": 0.97, + "learning_rate": 9.124420328373574e-08, + "loss": 1.096, + "step": 17665 + }, + { + "epoch": 0.97, + "learning_rate": 9.513416413363673e-08, + "loss": 1.1706, + "step": 17670 + }, + { + "epoch": 0.97, + "learning_rate": 9.910528296989132e-08, + "loss": 1.0976, + "step": 17675 + }, + { + "epoch": 0.97, + "learning_rate": 1.0315755656655769e-07, + "loss": 1.089, + "step": 17680 + }, + { + "epoch": 0.97, + "learning_rate": 1.07290981631758e-07, + "loss": 1.1047, + "step": 17685 + }, + { + "epoch": 0.97, + "learning_rate": 1.1150555480770042e-07, + "loss": 1.0895, + "step": 17690 + }, + { + "epoch": 0.97, + "learning_rate": 1.1580127267064585e-07, + "loss": 1.1052, + "step": 17695 + }, + { + "epoch": 0.97, + "learning_rate": 1.2017813173098576e-07, + "loss": 1.1536, + "step": 17700 + }, + { + "epoch": 0.97, + "learning_rate": 1.2463612843313095e-07, + "loss": 1.1482, + "step": 17705 + }, + { + "epoch": 0.97, + "learning_rate": 1.2917525915564495e-07, + "loss": 1.1544, + "step": 17710 + }, + { + "epoch": 0.97, + "learning_rate": 1.337955202111551e-07, + "loss": 1.0969, + "step": 17715 + }, + { + "epoch": 0.97, + "learning_rate": 1.3849690784635265e-07, + "loss": 1.1826, + "step": 17720 + }, + { + "epoch": 0.97, + "learning_rate": 1.4327941824207048e-07, + "loss": 1.0546, + "step": 17725 + }, + { + "epoch": 0.97, + "learning_rate": 1.4814304751321617e-07, + "loss": 1.1012, + "step": 17730 + }, + { + "epoch": 0.97, + "learning_rate": 1.5308779170882804e-07, + "loss": 1.0676, + "step": 17735 + }, + { + "epoch": 0.97, + "learning_rate": 1.5811364681199704e-07, + "loss": 1.1352, + "step": 17740 + }, + { + "epoch": 0.97, + "learning_rate": 1.6322060873997795e-07, + "loss": 1.0641, + "step": 17745 + }, + { + "epoch": 0.97, + "learning_rate": 1.6840867334411152e-07, + "loss": 1.046, + "step": 17750 + }, + { + "epoch": 0.97, + "learning_rate": 1.736778364098691e-07, + "loss": 1.0918, + "step": 17755 + }, + { + "epoch": 0.97, + "learning_rate": 1.79028093656819e-07, + "loss": 1.1782, + "step": 17760 + }, + { + "epoch": 0.97, + "learning_rate": 1.8445944073866025e-07, + "loss": 1.0212, + "step": 17765 + }, + { + "epoch": 0.97, + "learning_rate": 1.8997187324323326e-07, + "loss": 1.0893, + "step": 17770 + }, + { + "epoch": 0.97, + "learning_rate": 1.9556538669249784e-07, + "loss": 1.1477, + "step": 17775 + }, + { + "epoch": 0.97, + "learning_rate": 2.0123997654255545e-07, + "loss": 1.1655, + "step": 17780 + }, + { + "epoch": 0.97, + "learning_rate": 2.0699563818362692e-07, + "loss": 1.0668, + "step": 17785 + }, + { + "epoch": 0.97, + "learning_rate": 2.1283236694008562e-07, + "loss": 1.1685, + "step": 17790 + }, + { + "epoch": 0.97, + "learning_rate": 2.187501580704466e-07, + "loss": 1.1222, + "step": 17795 + }, + { + "epoch": 0.97, + "learning_rate": 2.247490067673886e-07, + "loss": 1.1557, + "step": 17800 + }, + { + "epoch": 0.98, + "learning_rate": 2.30828908157732e-07, + "loss": 1.1365, + "step": 17805 + }, + { + "epoch": 0.98, + "learning_rate": 2.3698985730242768e-07, + "loss": 1.138, + "step": 17810 + }, + { + "epoch": 0.98, + "learning_rate": 2.432318491966346e-07, + "loss": 1.2195, + "step": 17815 + }, + { + "epoch": 0.98, + "learning_rate": 2.4955487876965333e-07, + "loss": 1.1511, + "step": 17820 + }, + { + "epoch": 0.98, + "learning_rate": 2.559589408849483e-07, + "loss": 1.134, + "step": 17825 + }, + { + "epoch": 0.98, + "learning_rate": 2.6244403034015876e-07, + "loss": 1.1204, + "step": 17830 + }, + { + "epoch": 0.98, + "learning_rate": 2.690101418671098e-07, + "loss": 1.0799, + "step": 17835 + }, + { + "epoch": 0.98, + "learning_rate": 2.756572701318017e-07, + "loss": 1.0545, + "step": 17840 + }, + { + "epoch": 0.98, + "learning_rate": 2.8238540973442034e-07, + "loss": 1.0499, + "step": 17845 + }, + { + "epoch": 0.98, + "learning_rate": 2.8919455520934887e-07, + "loss": 1.0304, + "step": 17850 + }, + { + "epoch": 0.98, + "learning_rate": 2.960847010251455e-07, + "loss": 1.15, + "step": 17855 + }, + { + "epoch": 0.98, + "learning_rate": 3.0305584158460966e-07, + "loss": 1.1098, + "step": 17860 + }, + { + "epoch": 0.98, + "learning_rate": 3.101079712246935e-07, + "loss": 1.1001, + "step": 17865 + }, + { + "epoch": 0.98, + "learning_rate": 3.1724108421657956e-07, + "loss": 1.0966, + "step": 17870 + }, + { + "epoch": 0.98, + "learning_rate": 3.244551747656698e-07, + "loss": 1.0998, + "step": 17875 + }, + { + "epoch": 0.98, + "learning_rate": 3.3175023701158535e-07, + "loss": 1.1206, + "step": 17880 + }, + { + "epoch": 0.98, + "learning_rate": 3.391262650281667e-07, + "loss": 1.1344, + "step": 17885 + }, + { + "epoch": 0.98, + "learning_rate": 3.4658325282345135e-07, + "loss": 1.0534, + "step": 17890 + }, + { + "epoch": 0.98, + "learning_rate": 3.541211943397626e-07, + "loss": 1.1075, + "step": 17895 + }, + { + "epoch": 0.98, + "learning_rate": 3.6174008345360996e-07, + "loss": 1.168, + "step": 17900 + }, + { + "epoch": 0.98, + "learning_rate": 3.694399139757997e-07, + "loss": 1.0723, + "step": 17905 + }, + { + "epoch": 0.98, + "learning_rate": 3.7722067965132436e-07, + "loss": 1.1394, + "step": 17910 + }, + { + "epoch": 0.98, + "learning_rate": 3.850823741594733e-07, + "loss": 1.2, + "step": 17915 + }, + { + "epoch": 0.98, + "learning_rate": 3.9302499111375516e-07, + "loss": 1.105, + "step": 17920 + }, + { + "epoch": 0.98, + "learning_rate": 4.010485240620088e-07, + "loss": 1.1311, + "step": 17925 + }, + { + "epoch": 0.98, + "learning_rate": 4.091529664862482e-07, + "loss": 1.1579, + "step": 17930 + }, + { + "epoch": 0.98, + "learning_rate": 4.173383118028395e-07, + "loss": 1.1507, + "step": 17935 + }, + { + "epoch": 0.98, + "learning_rate": 4.2560455336237975e-07, + "loss": 1.0406, + "step": 17940 + }, + { + "epoch": 0.98, + "learning_rate": 4.339516844497736e-07, + "loss": 1.0647, + "step": 17945 + }, + { + "epoch": 0.98, + "learning_rate": 4.423796982842009e-07, + "loss": 1.1317, + "step": 17950 + }, + { + "epoch": 0.98, + "learning_rate": 4.5088858801914935e-07, + "loss": 1.1455, + "step": 17955 + }, + { + "epoch": 0.98, + "learning_rate": 4.594783467423813e-07, + "loss": 1.0541, + "step": 17960 + }, + { + "epoch": 0.98, + "learning_rate": 4.681489674760008e-07, + "loss": 1.1335, + "step": 17965 + }, + { + "epoch": 0.98, + "learning_rate": 4.769004431764089e-07, + "loss": 1.0877, + "step": 17970 + }, + { + "epoch": 0.98, + "learning_rate": 4.857327667343145e-07, + "loss": 1.0963, + "step": 17975 + }, + { + "epoch": 0.98, + "learning_rate": 4.946459309747462e-07, + "loss": 1.071, + "step": 17980 + }, + { + "epoch": 0.99, + "learning_rate": 5.036399286570733e-07, + "loss": 1.1313, + "step": 17985 + }, + { + "epoch": 0.99, + "learning_rate": 5.127147524750183e-07, + "loss": 1.1129, + "step": 17990 + }, + { + "epoch": 0.99, + "learning_rate": 5.218703950566002e-07, + "loss": 1.1666, + "step": 17995 + }, + { + "epoch": 0.99, + "learning_rate": 5.311068489642132e-07, + "loss": 1.1177, + "step": 18000 + }, + { + "epoch": 0.99, + "learning_rate": 5.404241066946146e-07, + "loss": 1.0978, + "step": 18005 + }, + { + "epoch": 0.99, + "learning_rate": 5.498221606788811e-07, + "loss": 1.1234, + "step": 18010 + }, + { + "epoch": 0.99, + "learning_rate": 5.593010032824975e-07, + "loss": 1.106, + "step": 18015 + }, + { + "epoch": 0.99, + "learning_rate": 5.6886062680529e-07, + "loss": 1.0697, + "step": 18020 + }, + { + "epoch": 0.99, + "learning_rate": 5.785010234814815e-07, + "loss": 1.0271, + "step": 18025 + }, + { + "epoch": 0.99, + "learning_rate": 5.882221854796587e-07, + "loss": 1.156, + "step": 18030 + }, + { + "epoch": 0.99, + "learning_rate": 5.980241049028056e-07, + "loss": 1.0948, + "step": 18035 + }, + { + "epoch": 0.99, + "learning_rate": 6.079067737883358e-07, + "loss": 1.1019, + "step": 18040 + }, + { + "epoch": 0.99, + "learning_rate": 6.17870184108005e-07, + "loss": 1.1376, + "step": 18045 + }, + { + "epoch": 0.99, + "learning_rate": 6.279143277680201e-07, + "loss": 1.1264, + "step": 18050 + }, + { + "epoch": 0.99, + "learning_rate": 6.380391966090085e-07, + "loss": 1.1145, + "step": 18055 + }, + { + "epoch": 0.99, + "learning_rate": 6.482447824059825e-07, + "loss": 1.1042, + "step": 18060 + }, + { + "epoch": 0.99, + "learning_rate": 6.585310768684184e-07, + "loss": 1.1158, + "step": 18065 + }, + { + "epoch": 0.99, + "learning_rate": 6.688980716402227e-07, + "loss": 1.1082, + "step": 18070 + }, + { + "epoch": 0.99, + "learning_rate": 6.79345758299721e-07, + "loss": 1.1302, + "step": 18075 + }, + { + "epoch": 0.99, + "learning_rate": 6.898741283597353e-07, + "loss": 1.1616, + "step": 18080 + }, + { + "epoch": 0.99, + "learning_rate": 7.004831732674963e-07, + "loss": 1.1661, + "step": 18085 + }, + { + "epoch": 0.99, + "learning_rate": 7.1117288440472e-07, + "loss": 1.1061, + "step": 18090 + }, + { + "epoch": 0.99, + "learning_rate": 7.219432530875864e-07, + "loss": 1.1504, + "step": 18095 + }, + { + "epoch": 0.99, + "learning_rate": 7.327942705667606e-07, + "loss": 1.0058, + "step": 18100 + }, + { + "epoch": 0.99, + "learning_rate": 7.43725928027405e-07, + "loss": 1.1764, + "step": 18105 + }, + { + "epoch": 0.99, + "learning_rate": 7.547382165891342e-07, + "loss": 1.1138, + "step": 18110 + }, + { + "epoch": 0.99, + "learning_rate": 7.65831127306104e-07, + "loss": 1.0429, + "step": 18115 + }, + { + "epoch": 0.99, + "learning_rate": 7.770046511669453e-07, + "loss": 1.1505, + "step": 18120 + }, + { + "epoch": 0.99, + "learning_rate": 7.882587790948297e-07, + "loss": 1.1248, + "step": 18125 + }, + { + "epoch": 0.99, + "learning_rate": 7.995935019474259e-07, + "loss": 1.1732, + "step": 18130 + }, + { + "epoch": 0.99, + "learning_rate": 8.110088105169438e-07, + "loss": 1.1216, + "step": 18135 + }, + { + "epoch": 0.99, + "learning_rate": 8.225046955301236e-07, + "loss": 1.1376, + "step": 18140 + }, + { + "epoch": 0.99, + "learning_rate": 8.340811476482684e-07, + "loss": 1.17, + "step": 18145 + }, + { + "epoch": 0.99, + "learning_rate": 8.457381574671901e-07, + "loss": 1.1374, + "step": 18150 + }, + { + "epoch": 0.99, + "learning_rate": 8.574757155173083e-07, + "loss": 1.1066, + "step": 18155 + }, + { + "epoch": 0.99, + "learning_rate": 8.692938122635603e-07, + "loss": 1.1494, + "step": 18160 + }, + { + "epoch": 0.99, + "learning_rate": 8.811924381055153e-07, + "loss": 1.1208, + "step": 18165 + }, + { + "epoch": 1.0, + "learning_rate": 8.931715833772719e-07, + "loss": 1.0973, + "step": 18170 + }, + { + "epoch": 1.0, + "learning_rate": 9.052312383475475e-07, + "loss": 1.1403, + "step": 18175 + }, + { + "epoch": 1.0, + "learning_rate": 9.173713932196343e-07, + "loss": 1.0775, + "step": 18180 + }, + { + "epoch": 1.0, + "learning_rate": 9.295920381314763e-07, + "loss": 1.034, + "step": 18185 + }, + { + "epoch": 1.0, + "learning_rate": 9.41893163155571e-07, + "loss": 1.1289, + "step": 18190 + }, + { + "epoch": 1.0, + "learning_rate": 9.542747582990776e-07, + "loss": 1.1602, + "step": 18195 + }, + { + "epoch": 1.0, + "learning_rate": 9.667368135037646e-07, + "loss": 1.0516, + "step": 18200 + }, + { + "epoch": 1.0, + "learning_rate": 9.792793186460524e-07, + "loss": 1.0955, + "step": 18205 + }, + { + "epoch": 1.0, + "learning_rate": 9.919022635370244e-07, + "loss": 1.0446, + "step": 18210 + }, + { + "epoch": 1.0, + "learning_rate": 1.0046056379223612e-06, + "loss": 1.1256, + "step": 18215 + }, + { + "epoch": 1.0, + "learning_rate": 1.0173894314824724e-06, + "loss": 1.1325, + "step": 18220 + }, + { + "epoch": 1.0, + "learning_rate": 1.0302536338323771e-06, + "loss": 1.1127, + "step": 18225 + }, + { + "epoch": 1.0, + "learning_rate": 1.0431982345218455e-06, + "loss": 1.1377, + "step": 18230 + }, + { + "epoch": 1.0, + "learning_rate": 1.0562232230352669e-06, + "loss": 1.1232, + "step": 18235 + }, + { + "epoch": 1.0, + "learning_rate": 1.0693285887917827e-06, + "loss": 1.1032, + "step": 18240 + }, + { + "epoch": 1.0, + "learning_rate": 1.082514321145176e-06, + "loss": 1.0565, + "step": 18245 + }, + { + "epoch": 1.0, + "learning_rate": 1.095780409384015e-06, + "loss": 1.1019, + "step": 18250 + }, + { + "epoch": 1.0, + "learning_rate": 1.1091268427315424e-06, + "loss": 1.1256, + "step": 18255 + }, + { + "epoch": 1.0, + "eval_loss": 1.1316999197006226, + "eval_runtime": 1030.3828, + "eval_samples_per_second": 15.689, + "eval_steps_per_second": 15.689, + "step": 18257 + }, + { + "epoch": 1.0, + "step": 18257, + "total_flos": 1.8793185671034962e+18, + "train_loss": 1.1159586102780843, + "train_runtime": 26065.8571, + "train_samples_per_second": 5.603, + "train_steps_per_second": 0.7 + } + ], + "logging_steps": 5, + "max_steps": 18257, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 100, + "total_flos": 1.8793185671034962e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}