{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.2774720060732587, "global_step": 24000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.00019998102106661607, "loss": 9.952, "step": 10 }, { "epoch": 0.0, "learning_rate": 0.00019996204213323213, "loss": 8.9779, "step": 20 }, { "epoch": 0.0, "learning_rate": 0.00019994306319984817, "loss": 8.4733, "step": 30 }, { "epoch": 0.0, "learning_rate": 0.00019992408426646423, "loss": 8.3471, "step": 40 }, { "epoch": 0.0, "learning_rate": 0.0001999051053330803, "loss": 8.3499, "step": 50 }, { "epoch": 0.01, "learning_rate": 0.00019988612639969635, "loss": 8.3611, "step": 60 }, { "epoch": 0.01, "learning_rate": 0.0001998671474663124, "loss": 8.2644, "step": 70 }, { "epoch": 0.01, "learning_rate": 0.00019984816853292845, "loss": 8.2485, "step": 80 }, { "epoch": 0.01, "learning_rate": 0.0001998291895995445, "loss": 8.1744, "step": 90 }, { "epoch": 0.01, "learning_rate": 0.00019981021066616057, "loss": 8.1581, "step": 100 }, { "epoch": 0.01, "learning_rate": 0.00019979123173277663, "loss": 8.2968, "step": 110 }, { "epoch": 0.01, "learning_rate": 0.0001997722527993927, "loss": 8.2153, "step": 120 }, { "epoch": 0.01, "learning_rate": 0.00019975327386600875, "loss": 8.081, "step": 130 }, { "epoch": 0.01, "learning_rate": 0.00019973429493262479, "loss": 8.1176, "step": 140 }, { "epoch": 0.01, "learning_rate": 0.00019971531599924085, "loss": 8.1398, "step": 150 }, { "epoch": 0.02, "learning_rate": 0.0001996963370658569, "loss": 8.1933, "step": 160 }, { "epoch": 0.02, "learning_rate": 0.00019967735813247297, "loss": 8.1381, "step": 170 }, { "epoch": 0.02, "learning_rate": 0.00019965837919908903, "loss": 8.1712, "step": 180 }, { "epoch": 0.02, "learning_rate": 0.00019963940026570507, "loss": 8.1477, "step": 190 }, { "epoch": 0.02, "learning_rate": 0.00019962042133232113, "loss": 8.1891, "step": 200 }, { "epoch": 0.02, "learning_rate": 0.0001996014423989372, "loss": 8.0788, "step": 210 }, { "epoch": 0.02, "learning_rate": 0.00019958246346555325, "loss": 8.1196, "step": 220 }, { "epoch": 0.02, "learning_rate": 0.0001995634845321693, "loss": 8.0288, "step": 230 }, { "epoch": 0.02, "learning_rate": 0.00019954450559878534, "loss": 8.1236, "step": 240 }, { "epoch": 0.02, "learning_rate": 0.0001995255266654014, "loss": 8.0957, "step": 250 }, { "epoch": 0.02, "learning_rate": 0.00019950654773201747, "loss": 7.9825, "step": 260 }, { "epoch": 0.03, "learning_rate": 0.00019948756879863353, "loss": 8.1757, "step": 270 }, { "epoch": 0.03, "learning_rate": 0.0001994685898652496, "loss": 8.1677, "step": 280 }, { "epoch": 0.03, "learning_rate": 0.00019944961093186565, "loss": 8.0789, "step": 290 }, { "epoch": 0.03, "learning_rate": 0.00019943063199848169, "loss": 8.0367, "step": 300 }, { "epoch": 0.03, "learning_rate": 0.00019941165306509775, "loss": 8.1172, "step": 310 }, { "epoch": 0.03, "learning_rate": 0.0001993926741317138, "loss": 8.1456, "step": 320 }, { "epoch": 0.03, "learning_rate": 0.00019937369519832987, "loss": 8.0392, "step": 330 }, { "epoch": 0.03, "learning_rate": 0.00019935471626494593, "loss": 8.2246, "step": 340 }, { "epoch": 0.03, "learning_rate": 0.00019933573733156196, "loss": 8.0326, "step": 350 }, { "epoch": 0.03, "learning_rate": 0.00019931675839817803, "loss": 8.092, "step": 360 }, { "epoch": 0.04, "learning_rate": 0.0001992977794647941, "loss": 8.0419, "step": 370 }, { "epoch": 0.04, "learning_rate": 0.00019927880053141015, "loss": 8.1022, "step": 380 }, { "epoch": 0.04, "learning_rate": 0.0001992598215980262, "loss": 8.0943, "step": 390 }, { "epoch": 0.04, "learning_rate": 0.00019924084266464224, "loss": 8.0907, "step": 400 }, { "epoch": 0.04, "learning_rate": 0.0001992218637312583, "loss": 8.0365, "step": 410 }, { "epoch": 0.04, "learning_rate": 0.00019920288479787437, "loss": 7.9948, "step": 420 }, { "epoch": 0.04, "learning_rate": 0.00019918390586449043, "loss": 8.033, "step": 430 }, { "epoch": 0.04, "learning_rate": 0.0001991649269311065, "loss": 8.1033, "step": 440 }, { "epoch": 0.04, "learning_rate": 0.00019914594799772255, "loss": 8.1076, "step": 450 }, { "epoch": 0.04, "learning_rate": 0.00019912696906433858, "loss": 8.0816, "step": 460 }, { "epoch": 0.04, "learning_rate": 0.00019910799013095465, "loss": 8.0745, "step": 470 }, { "epoch": 0.05, "learning_rate": 0.0001990890111975707, "loss": 8.1358, "step": 480 }, { "epoch": 0.05, "learning_rate": 0.00019907003226418677, "loss": 7.9812, "step": 490 }, { "epoch": 0.05, "learning_rate": 0.00019905105333080283, "loss": 8.0595, "step": 500 }, { "epoch": 0.05, "learning_rate": 0.00019903207439741886, "loss": 8.0713, "step": 510 }, { "epoch": 0.05, "learning_rate": 0.00019901309546403492, "loss": 8.1596, "step": 520 }, { "epoch": 0.05, "learning_rate": 0.00019899411653065099, "loss": 8.0495, "step": 530 }, { "epoch": 0.05, "learning_rate": 0.00019897513759726705, "loss": 8.0031, "step": 540 }, { "epoch": 0.05, "learning_rate": 0.0001989561586638831, "loss": 8.1362, "step": 550 }, { "epoch": 0.05, "learning_rate": 0.00019893717973049914, "loss": 8.0954, "step": 560 }, { "epoch": 0.05, "learning_rate": 0.0001989182007971152, "loss": 8.1174, "step": 570 }, { "epoch": 0.06, "learning_rate": 0.00019889922186373127, "loss": 8.0747, "step": 580 }, { "epoch": 0.06, "learning_rate": 0.00019888024293034733, "loss": 8.0865, "step": 590 }, { "epoch": 0.06, "learning_rate": 0.0001988612639969634, "loss": 8.0401, "step": 600 }, { "epoch": 0.06, "learning_rate": 0.00019884228506357942, "loss": 8.0427, "step": 610 }, { "epoch": 0.06, "learning_rate": 0.00019882330613019548, "loss": 7.9791, "step": 620 }, { "epoch": 0.06, "learning_rate": 0.00019880432719681154, "loss": 8.0075, "step": 630 }, { "epoch": 0.06, "learning_rate": 0.0001987853482634276, "loss": 7.999, "step": 640 }, { "epoch": 0.06, "learning_rate": 0.00019876636933004367, "loss": 8.0756, "step": 650 }, { "epoch": 0.06, "learning_rate": 0.00019874739039665973, "loss": 8.0046, "step": 660 }, { "epoch": 0.06, "learning_rate": 0.00019872841146327576, "loss": 7.9885, "step": 670 }, { "epoch": 0.06, "learning_rate": 0.00019870943252989182, "loss": 8.065, "step": 680 }, { "epoch": 0.07, "learning_rate": 0.00019869045359650789, "loss": 8.0558, "step": 690 }, { "epoch": 0.07, "learning_rate": 0.00019867147466312395, "loss": 8.085, "step": 700 }, { "epoch": 0.07, "learning_rate": 0.00019865249572974, "loss": 8.0773, "step": 710 }, { "epoch": 0.07, "learning_rate": 0.00019863351679635604, "loss": 8.0463, "step": 720 }, { "epoch": 0.07, "learning_rate": 0.0001986145378629721, "loss": 8.0125, "step": 730 }, { "epoch": 0.07, "learning_rate": 0.00019859555892958816, "loss": 8.0906, "step": 740 }, { "epoch": 0.07, "learning_rate": 0.00019857657999620423, "loss": 8.038, "step": 750 }, { "epoch": 0.07, "learning_rate": 0.0001985576010628203, "loss": 8.0052, "step": 760 }, { "epoch": 0.07, "learning_rate": 0.00019853862212943632, "loss": 8.0353, "step": 770 }, { "epoch": 0.07, "learning_rate": 0.00019851964319605238, "loss": 8.0852, "step": 780 }, { "epoch": 0.07, "learning_rate": 0.00019850066426266844, "loss": 8.0317, "step": 790 }, { "epoch": 0.08, "learning_rate": 0.0001984816853292845, "loss": 7.9571, "step": 800 }, { "epoch": 0.08, "learning_rate": 0.00019846270639590057, "loss": 8.086, "step": 810 }, { "epoch": 0.08, "learning_rate": 0.00019844372746251663, "loss": 7.9966, "step": 820 }, { "epoch": 0.08, "learning_rate": 0.00019842474852913266, "loss": 8.0486, "step": 830 }, { "epoch": 0.08, "learning_rate": 0.00019840576959574872, "loss": 7.9632, "step": 840 }, { "epoch": 0.08, "learning_rate": 0.00019838679066236478, "loss": 8.0173, "step": 850 }, { "epoch": 0.08, "learning_rate": 0.00019836781172898085, "loss": 7.9798, "step": 860 }, { "epoch": 0.08, "learning_rate": 0.0001983488327955969, "loss": 7.8961, "step": 870 }, { "epoch": 0.08, "learning_rate": 0.00019832985386221294, "loss": 8.1219, "step": 880 }, { "epoch": 0.08, "learning_rate": 0.000198310874928829, "loss": 8.0091, "step": 890 }, { "epoch": 0.09, "learning_rate": 0.00019829189599544506, "loss": 8.0785, "step": 900 }, { "epoch": 0.09, "learning_rate": 0.00019827291706206113, "loss": 8.0095, "step": 910 }, { "epoch": 0.09, "learning_rate": 0.00019825393812867719, "loss": 7.9793, "step": 920 }, { "epoch": 0.09, "learning_rate": 0.00019823495919529322, "loss": 8.006, "step": 930 }, { "epoch": 0.09, "learning_rate": 0.00019821598026190928, "loss": 7.969, "step": 940 }, { "epoch": 0.09, "learning_rate": 0.00019819700132852534, "loss": 7.9929, "step": 950 }, { "epoch": 0.09, "learning_rate": 0.0001981780223951414, "loss": 8.1714, "step": 960 }, { "epoch": 0.09, "learning_rate": 0.00019815904346175747, "loss": 8.0096, "step": 970 }, { "epoch": 0.09, "learning_rate": 0.00019814006452837353, "loss": 7.9708, "step": 980 }, { "epoch": 0.09, "learning_rate": 0.00019812108559498956, "loss": 8.0766, "step": 990 }, { "epoch": 0.09, "learning_rate": 0.00019810210666160562, "loss": 7.9056, "step": 1000 }, { "epoch": 0.1, "learning_rate": 0.00019808312772822168, "loss": 8.0285, "step": 1010 }, { "epoch": 0.1, "learning_rate": 0.00019806414879483774, "loss": 8.0732, "step": 1020 }, { "epoch": 0.1, "learning_rate": 0.0001980451698614538, "loss": 7.8647, "step": 1030 }, { "epoch": 0.1, "learning_rate": 0.00019802619092806984, "loss": 8.0618, "step": 1040 }, { "epoch": 0.1, "learning_rate": 0.0001980072119946859, "loss": 7.8339, "step": 1050 }, { "epoch": 0.1, "learning_rate": 0.00019798823306130196, "loss": 7.9132, "step": 1060 }, { "epoch": 0.1, "learning_rate": 0.00019796925412791802, "loss": 7.913, "step": 1070 }, { "epoch": 0.1, "learning_rate": 0.00019795027519453409, "loss": 7.9198, "step": 1080 }, { "epoch": 0.1, "learning_rate": 0.00019793129626115012, "loss": 8.0008, "step": 1090 }, { "epoch": 0.1, "learning_rate": 0.00019791231732776618, "loss": 8.0339, "step": 1100 }, { "epoch": 0.11, "learning_rate": 0.00019789333839438224, "loss": 8.0553, "step": 1110 }, { "epoch": 0.11, "learning_rate": 0.0001978743594609983, "loss": 7.9269, "step": 1120 }, { "epoch": 0.11, "learning_rate": 0.00019785538052761436, "loss": 7.9504, "step": 1130 }, { "epoch": 0.11, "learning_rate": 0.0001978364015942304, "loss": 8.0064, "step": 1140 }, { "epoch": 0.11, "learning_rate": 0.00019781742266084646, "loss": 8.1134, "step": 1150 }, { "epoch": 0.11, "learning_rate": 0.00019779844372746252, "loss": 8.0186, "step": 1160 }, { "epoch": 0.11, "learning_rate": 0.00019777946479407858, "loss": 8.02, "step": 1170 }, { "epoch": 0.11, "learning_rate": 0.00019776048586069464, "loss": 8.1027, "step": 1180 }, { "epoch": 0.11, "learning_rate": 0.0001977415069273107, "loss": 7.9113, "step": 1190 }, { "epoch": 0.11, "learning_rate": 0.00019772252799392674, "loss": 7.983, "step": 1200 }, { "epoch": 0.11, "learning_rate": 0.0001977035490605428, "loss": 7.9472, "step": 1210 }, { "epoch": 0.12, "learning_rate": 0.00019768457012715886, "loss": 8.0294, "step": 1220 }, { "epoch": 0.12, "learning_rate": 0.00019766559119377492, "loss": 7.9201, "step": 1230 }, { "epoch": 0.12, "learning_rate": 0.00019764661226039098, "loss": 7.9851, "step": 1240 }, { "epoch": 0.12, "learning_rate": 0.00019762763332700702, "loss": 8.0097, "step": 1250 }, { "epoch": 0.12, "learning_rate": 0.00019760865439362308, "loss": 8.0801, "step": 1260 }, { "epoch": 0.12, "learning_rate": 0.00019758967546023914, "loss": 7.9854, "step": 1270 }, { "epoch": 0.12, "learning_rate": 0.0001975706965268552, "loss": 7.9648, "step": 1280 }, { "epoch": 0.12, "learning_rate": 0.00019755171759347126, "loss": 7.9651, "step": 1290 }, { "epoch": 0.12, "learning_rate": 0.0001975327386600873, "loss": 7.9788, "step": 1300 }, { "epoch": 0.12, "learning_rate": 0.00019751375972670336, "loss": 7.9753, "step": 1310 }, { "epoch": 0.13, "learning_rate": 0.00019749478079331942, "loss": 8.0325, "step": 1320 }, { "epoch": 0.13, "learning_rate": 0.00019747580185993548, "loss": 7.937, "step": 1330 }, { "epoch": 0.13, "learning_rate": 0.00019745682292655154, "loss": 8.0093, "step": 1340 }, { "epoch": 0.13, "learning_rate": 0.0001974378439931676, "loss": 8.0437, "step": 1350 }, { "epoch": 0.13, "learning_rate": 0.00019741886505978364, "loss": 8.0538, "step": 1360 }, { "epoch": 0.13, "learning_rate": 0.0001973998861263997, "loss": 7.9591, "step": 1370 }, { "epoch": 0.13, "learning_rate": 0.00019738090719301576, "loss": 8.0154, "step": 1380 }, { "epoch": 0.13, "learning_rate": 0.00019736192825963182, "loss": 7.9782, "step": 1390 }, { "epoch": 0.13, "learning_rate": 0.00019734294932624788, "loss": 7.9924, "step": 1400 }, { "epoch": 0.13, "learning_rate": 0.00019732397039286392, "loss": 7.9091, "step": 1410 }, { "epoch": 0.13, "learning_rate": 0.00019730499145947998, "loss": 7.9687, "step": 1420 }, { "epoch": 0.14, "learning_rate": 0.00019728601252609604, "loss": 8.0328, "step": 1430 }, { "epoch": 0.14, "learning_rate": 0.0001972670335927121, "loss": 7.8584, "step": 1440 }, { "epoch": 0.14, "learning_rate": 0.00019724805465932816, "loss": 8.0146, "step": 1450 }, { "epoch": 0.14, "learning_rate": 0.0001972290757259442, "loss": 7.8941, "step": 1460 }, { "epoch": 0.14, "learning_rate": 0.00019721009679256026, "loss": 7.9312, "step": 1470 }, { "epoch": 0.14, "learning_rate": 0.00019719111785917632, "loss": 7.9333, "step": 1480 }, { "epoch": 0.14, "learning_rate": 0.00019717213892579238, "loss": 7.9736, "step": 1490 }, { "epoch": 0.14, "learning_rate": 0.00019715315999240844, "loss": 7.9074, "step": 1500 }, { "epoch": 0.14, "learning_rate": 0.0001971341810590245, "loss": 7.9985, "step": 1510 }, { "epoch": 0.14, "learning_rate": 0.00019711520212564054, "loss": 7.9647, "step": 1520 }, { "epoch": 0.15, "learning_rate": 0.0001970962231922566, "loss": 7.9233, "step": 1530 }, { "epoch": 0.15, "learning_rate": 0.00019707724425887266, "loss": 7.9757, "step": 1540 }, { "epoch": 0.15, "learning_rate": 0.00019705826532548872, "loss": 8.0475, "step": 1550 }, { "epoch": 0.15, "learning_rate": 0.00019703928639210478, "loss": 7.974, "step": 1560 }, { "epoch": 0.15, "learning_rate": 0.00019702030745872082, "loss": 8.0162, "step": 1570 }, { "epoch": 0.15, "learning_rate": 0.00019700132852533688, "loss": 7.9094, "step": 1580 }, { "epoch": 0.15, "learning_rate": 0.00019698234959195294, "loss": 7.8877, "step": 1590 }, { "epoch": 0.15, "learning_rate": 0.000196963370658569, "loss": 7.964, "step": 1600 }, { "epoch": 0.15, "learning_rate": 0.00019694439172518506, "loss": 8.012, "step": 1610 }, { "epoch": 0.15, "learning_rate": 0.0001969254127918011, "loss": 8.0087, "step": 1620 }, { "epoch": 0.15, "learning_rate": 0.00019690643385841716, "loss": 8.0649, "step": 1630 }, { "epoch": 0.16, "learning_rate": 0.00019688745492503322, "loss": 7.9777, "step": 1640 }, { "epoch": 0.16, "learning_rate": 0.00019686847599164928, "loss": 7.9384, "step": 1650 }, { "epoch": 0.16, "learning_rate": 0.00019684949705826534, "loss": 7.9967, "step": 1660 }, { "epoch": 0.16, "learning_rate": 0.00019683051812488138, "loss": 7.924, "step": 1670 }, { "epoch": 0.16, "learning_rate": 0.00019681153919149744, "loss": 7.9114, "step": 1680 }, { "epoch": 0.16, "learning_rate": 0.0001967925602581135, "loss": 7.9128, "step": 1690 }, { "epoch": 0.16, "learning_rate": 0.00019677358132472956, "loss": 7.8818, "step": 1700 }, { "epoch": 0.16, "learning_rate": 0.00019675460239134562, "loss": 8.0632, "step": 1710 }, { "epoch": 0.16, "learning_rate": 0.00019673562345796168, "loss": 7.9477, "step": 1720 }, { "epoch": 0.16, "learning_rate": 0.00019671664452457772, "loss": 7.9508, "step": 1730 }, { "epoch": 0.17, "learning_rate": 0.00019669766559119378, "loss": 8.0061, "step": 1740 }, { "epoch": 0.17, "learning_rate": 0.00019667868665780984, "loss": 7.9196, "step": 1750 }, { "epoch": 0.17, "learning_rate": 0.0001966597077244259, "loss": 7.9596, "step": 1760 }, { "epoch": 0.17, "learning_rate": 0.00019664072879104196, "loss": 7.8292, "step": 1770 }, { "epoch": 0.17, "learning_rate": 0.000196621749857658, "loss": 7.9823, "step": 1780 }, { "epoch": 0.17, "learning_rate": 0.00019660277092427406, "loss": 7.9388, "step": 1790 }, { "epoch": 0.17, "learning_rate": 0.00019658379199089012, "loss": 8.0311, "step": 1800 }, { "epoch": 0.17, "learning_rate": 0.00019656481305750618, "loss": 7.9965, "step": 1810 }, { "epoch": 0.17, "learning_rate": 0.00019654583412412224, "loss": 7.92, "step": 1820 }, { "epoch": 0.17, "learning_rate": 0.00019652685519073827, "loss": 7.9755, "step": 1830 }, { "epoch": 0.17, "learning_rate": 0.00019650787625735434, "loss": 7.9663, "step": 1840 }, { "epoch": 0.18, "learning_rate": 0.0001964888973239704, "loss": 7.9034, "step": 1850 }, { "epoch": 0.18, "learning_rate": 0.00019646991839058646, "loss": 7.9657, "step": 1860 }, { "epoch": 0.18, "learning_rate": 0.00019645093945720252, "loss": 8.0662, "step": 1870 }, { "epoch": 0.18, "learning_rate": 0.00019643196052381858, "loss": 8.0137, "step": 1880 }, { "epoch": 0.18, "learning_rate": 0.00019641298159043462, "loss": 7.9988, "step": 1890 }, { "epoch": 0.18, "learning_rate": 0.00019639400265705068, "loss": 7.9998, "step": 1900 }, { "epoch": 0.18, "learning_rate": 0.00019637502372366674, "loss": 7.9599, "step": 1910 }, { "epoch": 0.18, "learning_rate": 0.0001963560447902828, "loss": 7.851, "step": 1920 }, { "epoch": 0.18, "learning_rate": 0.00019633706585689886, "loss": 7.8906, "step": 1930 }, { "epoch": 0.18, "learning_rate": 0.0001963180869235149, "loss": 7.9987, "step": 1940 }, { "epoch": 0.19, "learning_rate": 0.00019629910799013096, "loss": 7.949, "step": 1950 }, { "epoch": 0.19, "learning_rate": 0.00019628012905674702, "loss": 8.0121, "step": 1960 }, { "epoch": 0.19, "learning_rate": 0.00019626115012336308, "loss": 7.9445, "step": 1970 }, { "epoch": 0.19, "learning_rate": 0.00019624217118997914, "loss": 7.9355, "step": 1980 }, { "epoch": 0.19, "learning_rate": 0.00019622319225659517, "loss": 7.9748, "step": 1990 }, { "epoch": 0.19, "learning_rate": 0.00019620421332321124, "loss": 7.9165, "step": 2000 }, { "epoch": 0.19, "learning_rate": 0.0001961852343898273, "loss": 7.9001, "step": 2010 }, { "epoch": 0.19, "learning_rate": 0.00019616625545644336, "loss": 7.9579, "step": 2020 }, { "epoch": 0.19, "learning_rate": 0.00019614727652305942, "loss": 8.0491, "step": 2030 }, { "epoch": 0.19, "learning_rate": 0.00019612829758967548, "loss": 7.9823, "step": 2040 }, { "epoch": 0.19, "learning_rate": 0.00019610931865629151, "loss": 7.9317, "step": 2050 }, { "epoch": 0.2, "learning_rate": 0.00019609033972290758, "loss": 7.981, "step": 2060 }, { "epoch": 0.2, "learning_rate": 0.00019607136078952364, "loss": 7.9837, "step": 2070 }, { "epoch": 0.2, "learning_rate": 0.0001960523818561397, "loss": 7.9299, "step": 2080 }, { "epoch": 0.2, "learning_rate": 0.00019603340292275576, "loss": 7.885, "step": 2090 }, { "epoch": 0.2, "learning_rate": 0.0001960144239893718, "loss": 7.9707, "step": 2100 }, { "epoch": 0.2, "learning_rate": 0.00019599544505598785, "loss": 8.0994, "step": 2110 }, { "epoch": 0.2, "learning_rate": 0.00019597646612260392, "loss": 7.9866, "step": 2120 }, { "epoch": 0.2, "learning_rate": 0.00019595748718921998, "loss": 8.0013, "step": 2130 }, { "epoch": 0.2, "learning_rate": 0.00019593850825583604, "loss": 7.9703, "step": 2140 }, { "epoch": 0.2, "learning_rate": 0.00019591952932245207, "loss": 7.8933, "step": 2150 }, { "epoch": 0.2, "learning_rate": 0.00019590055038906813, "loss": 7.9128, "step": 2160 }, { "epoch": 0.21, "learning_rate": 0.0001958815714556842, "loss": 7.9134, "step": 2170 }, { "epoch": 0.21, "learning_rate": 0.00019586259252230026, "loss": 7.9388, "step": 2180 }, { "epoch": 0.21, "learning_rate": 0.00019584361358891632, "loss": 7.8159, "step": 2190 }, { "epoch": 0.21, "learning_rate": 0.00019582463465553235, "loss": 7.966, "step": 2200 }, { "epoch": 0.21, "learning_rate": 0.0001958056557221484, "loss": 7.9638, "step": 2210 }, { "epoch": 0.21, "learning_rate": 0.00019578667678876447, "loss": 7.9076, "step": 2220 }, { "epoch": 0.21, "learning_rate": 0.00019576769785538054, "loss": 7.8966, "step": 2230 }, { "epoch": 0.21, "learning_rate": 0.0001957487189219966, "loss": 8.0228, "step": 2240 }, { "epoch": 0.21, "learning_rate": 0.00019572973998861266, "loss": 7.963, "step": 2250 }, { "epoch": 0.21, "learning_rate": 0.0001957107610552287, "loss": 7.9361, "step": 2260 }, { "epoch": 0.22, "learning_rate": 0.00019569178212184475, "loss": 8.0444, "step": 2270 }, { "epoch": 0.22, "learning_rate": 0.00019567280318846082, "loss": 7.9489, "step": 2280 }, { "epoch": 0.22, "learning_rate": 0.00019565382425507688, "loss": 8.0002, "step": 2290 }, { "epoch": 0.22, "learning_rate": 0.00019563484532169294, "loss": 7.983, "step": 2300 }, { "epoch": 0.22, "learning_rate": 0.00019561586638830897, "loss": 7.9444, "step": 2310 }, { "epoch": 0.22, "learning_rate": 0.00019559688745492503, "loss": 7.9221, "step": 2320 }, { "epoch": 0.22, "learning_rate": 0.0001955779085215411, "loss": 8.0789, "step": 2330 }, { "epoch": 0.22, "learning_rate": 0.00019555892958815716, "loss": 7.9703, "step": 2340 }, { "epoch": 0.22, "learning_rate": 0.00019553995065477322, "loss": 7.8596, "step": 2350 }, { "epoch": 0.22, "learning_rate": 0.00019552097172138925, "loss": 8.0037, "step": 2360 }, { "epoch": 0.22, "learning_rate": 0.0001955019927880053, "loss": 7.919, "step": 2370 }, { "epoch": 0.23, "learning_rate": 0.00019548301385462137, "loss": 7.9221, "step": 2380 }, { "epoch": 0.23, "learning_rate": 0.00019546403492123744, "loss": 7.9257, "step": 2390 }, { "epoch": 0.23, "learning_rate": 0.0001954450559878535, "loss": 7.8881, "step": 2400 }, { "epoch": 0.23, "learning_rate": 0.00019542607705446956, "loss": 7.9372, "step": 2410 }, { "epoch": 0.23, "learning_rate": 0.0001954070981210856, "loss": 7.9647, "step": 2420 }, { "epoch": 0.23, "learning_rate": 0.00019538811918770165, "loss": 8.0126, "step": 2430 }, { "epoch": 0.23, "learning_rate": 0.00019536914025431771, "loss": 7.9594, "step": 2440 }, { "epoch": 0.23, "learning_rate": 0.00019535016132093378, "loss": 7.917, "step": 2450 }, { "epoch": 0.23, "learning_rate": 0.00019533118238754984, "loss": 8.013, "step": 2460 }, { "epoch": 0.23, "learning_rate": 0.00019531220345416587, "loss": 7.9512, "step": 2470 }, { "epoch": 0.24, "learning_rate": 0.00019529322452078193, "loss": 7.9876, "step": 2480 }, { "epoch": 0.24, "learning_rate": 0.000195274245587398, "loss": 7.9838, "step": 2490 }, { "epoch": 0.24, "learning_rate": 0.00019525526665401405, "loss": 7.9647, "step": 2500 }, { "epoch": 0.24, "learning_rate": 0.00019523628772063012, "loss": 7.8661, "step": 2510 }, { "epoch": 0.24, "learning_rate": 0.00019521730878724615, "loss": 7.9554, "step": 2520 }, { "epoch": 0.24, "learning_rate": 0.0001951983298538622, "loss": 7.9141, "step": 2530 }, { "epoch": 0.24, "learning_rate": 0.00019517935092047827, "loss": 7.939, "step": 2540 }, { "epoch": 0.24, "learning_rate": 0.00019516037198709433, "loss": 8.0098, "step": 2550 }, { "epoch": 0.24, "learning_rate": 0.0001951413930537104, "loss": 7.9881, "step": 2560 }, { "epoch": 0.24, "learning_rate": 0.00019512241412032646, "loss": 7.9937, "step": 2570 }, { "epoch": 0.24, "learning_rate": 0.0001951034351869425, "loss": 7.9844, "step": 2580 }, { "epoch": 0.25, "learning_rate": 0.00019508445625355855, "loss": 8.043, "step": 2590 }, { "epoch": 0.25, "learning_rate": 0.0001950654773201746, "loss": 7.9513, "step": 2600 }, { "epoch": 0.25, "learning_rate": 0.00019504649838679067, "loss": 7.9174, "step": 2610 }, { "epoch": 0.25, "learning_rate": 0.00019502751945340674, "loss": 7.9746, "step": 2620 }, { "epoch": 0.25, "learning_rate": 0.00019500854052002277, "loss": 7.9344, "step": 2630 }, { "epoch": 0.25, "learning_rate": 0.00019498956158663883, "loss": 7.8564, "step": 2640 }, { "epoch": 0.25, "learning_rate": 0.0001949705826532549, "loss": 7.8934, "step": 2650 }, { "epoch": 0.25, "learning_rate": 0.00019495160371987095, "loss": 7.8338, "step": 2660 }, { "epoch": 0.25, "learning_rate": 0.00019493262478648702, "loss": 7.9002, "step": 2670 }, { "epoch": 0.25, "learning_rate": 0.00019491364585310305, "loss": 7.957, "step": 2680 }, { "epoch": 0.26, "learning_rate": 0.0001948946669197191, "loss": 7.8777, "step": 2690 }, { "epoch": 0.26, "learning_rate": 0.00019487568798633517, "loss": 8.0054, "step": 2700 }, { "epoch": 0.26, "learning_rate": 0.00019485670905295123, "loss": 7.9225, "step": 2710 }, { "epoch": 0.26, "learning_rate": 0.0001948377301195673, "loss": 7.9895, "step": 2720 }, { "epoch": 0.26, "learning_rate": 0.00019481875118618333, "loss": 7.9711, "step": 2730 }, { "epoch": 0.26, "learning_rate": 0.0001947997722527994, "loss": 7.9268, "step": 2740 }, { "epoch": 0.26, "learning_rate": 0.00019478079331941545, "loss": 8.0199, "step": 2750 }, { "epoch": 0.26, "learning_rate": 0.0001947618143860315, "loss": 7.9409, "step": 2760 }, { "epoch": 0.26, "learning_rate": 0.00019474283545264757, "loss": 7.7781, "step": 2770 }, { "epoch": 0.26, "learning_rate": 0.00019472385651926364, "loss": 7.787, "step": 2780 }, { "epoch": 0.26, "learning_rate": 0.00019470487758587967, "loss": 8.0509, "step": 2790 }, { "epoch": 0.27, "learning_rate": 0.00019468589865249573, "loss": 7.8997, "step": 2800 }, { "epoch": 0.27, "learning_rate": 0.0001946669197191118, "loss": 8.0071, "step": 2810 }, { "epoch": 0.27, "learning_rate": 0.00019464794078572785, "loss": 7.9269, "step": 2820 }, { "epoch": 0.27, "learning_rate": 0.00019462896185234391, "loss": 7.9551, "step": 2830 }, { "epoch": 0.27, "learning_rate": 0.00019460998291895995, "loss": 7.9637, "step": 2840 }, { "epoch": 0.27, "learning_rate": 0.000194591003985576, "loss": 7.9432, "step": 2850 }, { "epoch": 0.27, "learning_rate": 0.00019457202505219207, "loss": 7.8711, "step": 2860 }, { "epoch": 0.27, "learning_rate": 0.00019455304611880813, "loss": 7.8302, "step": 2870 }, { "epoch": 0.27, "learning_rate": 0.0001945340671854242, "loss": 7.8777, "step": 2880 }, { "epoch": 0.27, "learning_rate": 0.00019451508825204023, "loss": 7.9643, "step": 2890 }, { "epoch": 0.28, "learning_rate": 0.0001944961093186563, "loss": 7.8927, "step": 2900 }, { "epoch": 0.28, "learning_rate": 0.00019447713038527235, "loss": 7.9123, "step": 2910 }, { "epoch": 0.28, "learning_rate": 0.0001944581514518884, "loss": 7.9644, "step": 2920 }, { "epoch": 0.28, "learning_rate": 0.00019443917251850447, "loss": 7.891, "step": 2930 }, { "epoch": 0.28, "learning_rate": 0.00019442019358512053, "loss": 8.0191, "step": 2940 }, { "epoch": 0.28, "learning_rate": 0.00019440121465173657, "loss": 7.8945, "step": 2950 }, { "epoch": 0.28, "learning_rate": 0.00019438223571835263, "loss": 7.9478, "step": 2960 }, { "epoch": 0.28, "learning_rate": 0.0001943632567849687, "loss": 7.9027, "step": 2970 }, { "epoch": 0.28, "learning_rate": 0.00019434427785158475, "loss": 7.8986, "step": 2980 }, { "epoch": 0.28, "learning_rate": 0.00019432529891820081, "loss": 8.0644, "step": 2990 }, { "epoch": 0.28, "learning_rate": 0.00019430631998481685, "loss": 7.9844, "step": 3000 }, { "epoch": 0.29, "learning_rate": 0.0001942873410514329, "loss": 7.9241, "step": 3010 }, { "epoch": 0.29, "learning_rate": 0.00019426836211804897, "loss": 7.8227, "step": 3020 }, { "epoch": 0.29, "learning_rate": 0.00019424938318466503, "loss": 7.8803, "step": 3030 }, { "epoch": 0.29, "learning_rate": 0.0001942304042512811, "loss": 7.9341, "step": 3040 }, { "epoch": 0.29, "learning_rate": 0.00019421142531789713, "loss": 7.927, "step": 3050 }, { "epoch": 0.29, "learning_rate": 0.0001941924463845132, "loss": 7.9502, "step": 3060 }, { "epoch": 0.29, "learning_rate": 0.00019417346745112925, "loss": 7.921, "step": 3070 }, { "epoch": 0.29, "learning_rate": 0.0001941544885177453, "loss": 7.9221, "step": 3080 }, { "epoch": 0.29, "learning_rate": 0.00019413550958436137, "loss": 7.8924, "step": 3090 }, { "epoch": 0.29, "learning_rate": 0.00019411653065097743, "loss": 8.0156, "step": 3100 }, { "epoch": 0.3, "learning_rate": 0.00019409755171759347, "loss": 7.9395, "step": 3110 }, { "epoch": 0.3, "learning_rate": 0.00019407857278420953, "loss": 7.9926, "step": 3120 }, { "epoch": 0.3, "learning_rate": 0.0001940595938508256, "loss": 7.9102, "step": 3130 }, { "epoch": 0.3, "learning_rate": 0.00019404061491744165, "loss": 7.8352, "step": 3140 }, { "epoch": 0.3, "learning_rate": 0.0001940216359840577, "loss": 7.8719, "step": 3150 }, { "epoch": 0.3, "learning_rate": 0.00019400265705067375, "loss": 7.9277, "step": 3160 }, { "epoch": 0.3, "learning_rate": 0.0001939836781172898, "loss": 7.9376, "step": 3170 }, { "epoch": 0.3, "learning_rate": 0.00019396469918390587, "loss": 7.9999, "step": 3180 }, { "epoch": 0.3, "learning_rate": 0.00019394572025052193, "loss": 7.8309, "step": 3190 }, { "epoch": 0.3, "learning_rate": 0.000193926741317138, "loss": 7.8541, "step": 3200 }, { "epoch": 0.3, "learning_rate": 0.00019390776238375403, "loss": 7.9434, "step": 3210 }, { "epoch": 0.31, "learning_rate": 0.0001938887834503701, "loss": 7.9548, "step": 3220 }, { "epoch": 0.31, "learning_rate": 0.00019386980451698615, "loss": 7.9358, "step": 3230 }, { "epoch": 0.31, "learning_rate": 0.0001938508255836022, "loss": 7.9035, "step": 3240 }, { "epoch": 0.31, "learning_rate": 0.00019383184665021827, "loss": 7.934, "step": 3250 }, { "epoch": 0.31, "learning_rate": 0.0001938128677168343, "loss": 7.9353, "step": 3260 }, { "epoch": 0.31, "learning_rate": 0.00019379388878345037, "loss": 7.9028, "step": 3270 }, { "epoch": 0.31, "learning_rate": 0.00019377490985006643, "loss": 7.9257, "step": 3280 }, { "epoch": 0.31, "learning_rate": 0.0001937559309166825, "loss": 7.8557, "step": 3290 }, { "epoch": 0.31, "learning_rate": 0.00019373695198329855, "loss": 7.8651, "step": 3300 }, { "epoch": 0.31, "learning_rate": 0.0001937179730499146, "loss": 7.901, "step": 3310 }, { "epoch": 0.32, "learning_rate": 0.00019369899411653065, "loss": 7.9915, "step": 3320 }, { "epoch": 0.32, "learning_rate": 0.0001936800151831467, "loss": 7.9375, "step": 3330 }, { "epoch": 0.32, "learning_rate": 0.00019366103624976277, "loss": 7.8321, "step": 3340 }, { "epoch": 0.32, "learning_rate": 0.00019364205731637883, "loss": 7.8932, "step": 3350 }, { "epoch": 0.32, "learning_rate": 0.0001936230783829949, "loss": 7.9586, "step": 3360 }, { "epoch": 0.32, "learning_rate": 0.00019360409944961093, "loss": 7.8609, "step": 3370 }, { "epoch": 0.32, "learning_rate": 0.000193585120516227, "loss": 7.9284, "step": 3380 }, { "epoch": 0.32, "learning_rate": 0.00019356614158284305, "loss": 7.877, "step": 3390 }, { "epoch": 0.32, "learning_rate": 0.0001935471626494591, "loss": 7.9125, "step": 3400 }, { "epoch": 0.32, "learning_rate": 0.00019352818371607517, "loss": 7.8638, "step": 3410 }, { "epoch": 0.32, "learning_rate": 0.0001935092047826912, "loss": 7.9896, "step": 3420 }, { "epoch": 0.33, "learning_rate": 0.00019349022584930727, "loss": 8.0264, "step": 3430 }, { "epoch": 0.33, "learning_rate": 0.00019347124691592333, "loss": 7.9667, "step": 3440 }, { "epoch": 0.33, "learning_rate": 0.0001934522679825394, "loss": 7.7931, "step": 3450 }, { "epoch": 0.33, "learning_rate": 0.00019343328904915545, "loss": 8.0166, "step": 3460 }, { "epoch": 0.33, "learning_rate": 0.0001934143101157715, "loss": 7.844, "step": 3470 }, { "epoch": 0.33, "learning_rate": 0.00019339533118238755, "loss": 7.9468, "step": 3480 }, { "epoch": 0.33, "learning_rate": 0.0001933763522490036, "loss": 7.9775, "step": 3490 }, { "epoch": 0.33, "learning_rate": 0.00019335737331561967, "loss": 7.8543, "step": 3500 }, { "epoch": 0.33, "learning_rate": 0.00019333839438223573, "loss": 7.8744, "step": 3510 }, { "epoch": 0.33, "learning_rate": 0.0001933194154488518, "loss": 7.8954, "step": 3520 }, { "epoch": 0.33, "learning_rate": 0.00019330043651546782, "loss": 7.9898, "step": 3530 }, { "epoch": 0.34, "learning_rate": 0.00019328145758208389, "loss": 7.8642, "step": 3540 }, { "epoch": 0.34, "learning_rate": 0.00019326247864869995, "loss": 8.0018, "step": 3550 }, { "epoch": 0.34, "learning_rate": 0.000193243499715316, "loss": 8.0016, "step": 3560 }, { "epoch": 0.34, "learning_rate": 0.00019322452078193207, "loss": 8.0801, "step": 3570 }, { "epoch": 0.34, "learning_rate": 0.0001932055418485481, "loss": 8.0127, "step": 3580 }, { "epoch": 0.34, "learning_rate": 0.00019318656291516416, "loss": 7.8582, "step": 3590 }, { "epoch": 0.34, "learning_rate": 0.00019316758398178023, "loss": 7.9344, "step": 3600 }, { "epoch": 0.34, "learning_rate": 0.0001931486050483963, "loss": 7.953, "step": 3610 }, { "epoch": 0.34, "learning_rate": 0.00019312962611501235, "loss": 7.9068, "step": 3620 }, { "epoch": 0.34, "learning_rate": 0.0001931106471816284, "loss": 7.9032, "step": 3630 }, { "epoch": 0.35, "learning_rate": 0.00019309166824824444, "loss": 7.8135, "step": 3640 }, { "epoch": 0.35, "learning_rate": 0.0001930726893148605, "loss": 7.8799, "step": 3650 }, { "epoch": 0.35, "learning_rate": 0.00019305371038147657, "loss": 7.9762, "step": 3660 }, { "epoch": 0.35, "learning_rate": 0.00019303473144809263, "loss": 7.9839, "step": 3670 }, { "epoch": 0.35, "learning_rate": 0.0001930157525147087, "loss": 7.8639, "step": 3680 }, { "epoch": 0.35, "learning_rate": 0.00019299677358132472, "loss": 8.0189, "step": 3690 }, { "epoch": 0.35, "learning_rate": 0.00019297779464794078, "loss": 7.9332, "step": 3700 }, { "epoch": 0.35, "learning_rate": 0.00019295881571455685, "loss": 8.1368, "step": 3710 }, { "epoch": 0.35, "learning_rate": 0.0001929398367811729, "loss": 7.8899, "step": 3720 }, { "epoch": 0.35, "learning_rate": 0.00019292085784778897, "loss": 7.9733, "step": 3730 }, { "epoch": 0.35, "learning_rate": 0.000192901878914405, "loss": 8.0364, "step": 3740 }, { "epoch": 0.36, "learning_rate": 0.00019288289998102106, "loss": 7.9229, "step": 3750 }, { "epoch": 0.36, "learning_rate": 0.00019286392104763713, "loss": 7.9838, "step": 3760 }, { "epoch": 0.36, "learning_rate": 0.0001928449421142532, "loss": 7.7698, "step": 3770 }, { "epoch": 0.36, "learning_rate": 0.00019282596318086925, "loss": 7.8598, "step": 3780 }, { "epoch": 0.36, "learning_rate": 0.0001928069842474853, "loss": 8.0224, "step": 3790 }, { "epoch": 0.36, "learning_rate": 0.00019278800531410134, "loss": 7.899, "step": 3800 }, { "epoch": 0.36, "learning_rate": 0.0001927690263807174, "loss": 7.8593, "step": 3810 }, { "epoch": 0.36, "learning_rate": 0.00019275004744733347, "loss": 7.9592, "step": 3820 }, { "epoch": 0.36, "learning_rate": 0.00019273106851394953, "loss": 7.9444, "step": 3830 }, { "epoch": 0.36, "learning_rate": 0.0001927120895805656, "loss": 7.9079, "step": 3840 }, { "epoch": 0.37, "learning_rate": 0.00019269311064718162, "loss": 7.9192, "step": 3850 }, { "epoch": 0.37, "learning_rate": 0.00019267413171379768, "loss": 7.8443, "step": 3860 }, { "epoch": 0.37, "learning_rate": 0.00019265515278041375, "loss": 7.7676, "step": 3870 }, { "epoch": 0.37, "learning_rate": 0.0001926361738470298, "loss": 7.9781, "step": 3880 }, { "epoch": 0.37, "learning_rate": 0.00019261719491364587, "loss": 7.9639, "step": 3890 }, { "epoch": 0.37, "learning_rate": 0.0001925982159802619, "loss": 7.9449, "step": 3900 }, { "epoch": 0.37, "learning_rate": 0.00019257923704687796, "loss": 7.9286, "step": 3910 }, { "epoch": 0.37, "learning_rate": 0.00019256025811349402, "loss": 7.9555, "step": 3920 }, { "epoch": 0.37, "learning_rate": 0.00019254127918011009, "loss": 7.8571, "step": 3930 }, { "epoch": 0.37, "learning_rate": 0.00019252230024672615, "loss": 7.9362, "step": 3940 }, { "epoch": 0.37, "learning_rate": 0.00019250332131334218, "loss": 7.8598, "step": 3950 }, { "epoch": 0.38, "learning_rate": 0.00019248434237995824, "loss": 7.9286, "step": 3960 }, { "epoch": 0.38, "learning_rate": 0.0001924653634465743, "loss": 7.9102, "step": 3970 }, { "epoch": 0.38, "learning_rate": 0.00019244638451319037, "loss": 8.0461, "step": 3980 }, { "epoch": 0.38, "learning_rate": 0.00019242740557980643, "loss": 8.019, "step": 3990 }, { "epoch": 0.38, "learning_rate": 0.0001924084266464225, "loss": 7.9759, "step": 4000 }, { "epoch": 0.38, "learning_rate": 0.00019238944771303852, "loss": 7.8909, "step": 4010 }, { "epoch": 0.38, "learning_rate": 0.00019237046877965458, "loss": 7.8641, "step": 4020 }, { "epoch": 0.38, "learning_rate": 0.00019235148984627064, "loss": 7.9116, "step": 4030 }, { "epoch": 0.38, "learning_rate": 0.0001923325109128867, "loss": 8.1006, "step": 4040 }, { "epoch": 0.38, "learning_rate": 0.00019231353197950277, "loss": 7.9186, "step": 4050 }, { "epoch": 0.39, "learning_rate": 0.0001922945530461188, "loss": 7.9467, "step": 4060 }, { "epoch": 0.39, "learning_rate": 0.00019227557411273486, "loss": 7.9013, "step": 4070 }, { "epoch": 0.39, "learning_rate": 0.00019225659517935092, "loss": 7.8616, "step": 4080 }, { "epoch": 0.39, "learning_rate": 0.00019223761624596698, "loss": 7.972, "step": 4090 }, { "epoch": 0.39, "learning_rate": 0.00019221863731258305, "loss": 7.8126, "step": 4100 }, { "epoch": 0.39, "learning_rate": 0.00019219965837919908, "loss": 7.9782, "step": 4110 }, { "epoch": 0.39, "learning_rate": 0.00019218067944581514, "loss": 7.8078, "step": 4120 }, { "epoch": 0.39, "learning_rate": 0.0001921617005124312, "loss": 7.9655, "step": 4130 }, { "epoch": 0.39, "learning_rate": 0.00019214272157904726, "loss": 7.914, "step": 4140 }, { "epoch": 0.39, "learning_rate": 0.00019212374264566333, "loss": 7.9165, "step": 4150 }, { "epoch": 0.39, "learning_rate": 0.0001921047637122794, "loss": 7.8859, "step": 4160 }, { "epoch": 0.4, "learning_rate": 0.00019208578477889542, "loss": 7.92, "step": 4170 }, { "epoch": 0.4, "learning_rate": 0.00019206680584551148, "loss": 7.8548, "step": 4180 }, { "epoch": 0.4, "learning_rate": 0.00019204782691212754, "loss": 7.8462, "step": 4190 }, { "epoch": 0.4, "learning_rate": 0.0001920288479787436, "loss": 7.9479, "step": 4200 }, { "epoch": 0.4, "learning_rate": 0.00019200986904535967, "loss": 7.9687, "step": 4210 }, { "epoch": 0.4, "learning_rate": 0.0001919908901119757, "loss": 7.8412, "step": 4220 }, { "epoch": 0.4, "learning_rate": 0.00019197191117859176, "loss": 7.9112, "step": 4230 }, { "epoch": 0.4, "learning_rate": 0.00019195293224520782, "loss": 7.8358, "step": 4240 }, { "epoch": 0.4, "learning_rate": 0.00019193395331182388, "loss": 7.9411, "step": 4250 }, { "epoch": 0.4, "learning_rate": 0.00019191497437843995, "loss": 7.8077, "step": 4260 }, { "epoch": 0.41, "learning_rate": 0.00019189599544505598, "loss": 7.9192, "step": 4270 }, { "epoch": 0.41, "learning_rate": 0.00019187701651167204, "loss": 7.9694, "step": 4280 }, { "epoch": 0.41, "learning_rate": 0.0001918580375782881, "loss": 7.8397, "step": 4290 }, { "epoch": 0.41, "learning_rate": 0.00019183905864490416, "loss": 7.8919, "step": 4300 }, { "epoch": 0.41, "learning_rate": 0.00019182007971152022, "loss": 7.9859, "step": 4310 }, { "epoch": 0.41, "learning_rate": 0.00019180110077813629, "loss": 7.8927, "step": 4320 }, { "epoch": 0.41, "learning_rate": 0.00019178212184475232, "loss": 7.9667, "step": 4330 }, { "epoch": 0.41, "learning_rate": 0.00019176314291136838, "loss": 7.9561, "step": 4340 }, { "epoch": 0.41, "learning_rate": 0.00019174416397798444, "loss": 7.8562, "step": 4350 }, { "epoch": 0.41, "learning_rate": 0.0001917251850446005, "loss": 7.8994, "step": 4360 }, { "epoch": 0.41, "learning_rate": 0.00019170620611121657, "loss": 7.921, "step": 4370 }, { "epoch": 0.42, "learning_rate": 0.0001916872271778326, "loss": 7.8508, "step": 4380 }, { "epoch": 0.42, "learning_rate": 0.00019166824824444866, "loss": 7.9523, "step": 4390 }, { "epoch": 0.42, "learning_rate": 0.00019164926931106472, "loss": 7.8375, "step": 4400 }, { "epoch": 0.42, "learning_rate": 0.00019163029037768078, "loss": 7.9494, "step": 4410 }, { "epoch": 0.42, "learning_rate": 0.00019161131144429684, "loss": 7.9264, "step": 4420 }, { "epoch": 0.42, "learning_rate": 0.00019159233251091288, "loss": 7.8938, "step": 4430 }, { "epoch": 0.42, "learning_rate": 0.00019157335357752894, "loss": 7.898, "step": 4440 }, { "epoch": 0.42, "learning_rate": 0.000191554374644145, "loss": 7.8928, "step": 4450 }, { "epoch": 0.42, "learning_rate": 0.00019153539571076106, "loss": 7.9245, "step": 4460 }, { "epoch": 0.42, "learning_rate": 0.00019151641677737712, "loss": 7.994, "step": 4470 }, { "epoch": 0.43, "learning_rate": 0.00019149743784399316, "loss": 7.9778, "step": 4480 }, { "epoch": 0.43, "learning_rate": 0.00019147845891060922, "loss": 7.8502, "step": 4490 }, { "epoch": 0.43, "learning_rate": 0.00019145947997722528, "loss": 7.8904, "step": 4500 }, { "epoch": 0.43, "learning_rate": 0.00019144050104384134, "loss": 7.9738, "step": 4510 }, { "epoch": 0.43, "learning_rate": 0.0001914215221104574, "loss": 7.8975, "step": 4520 }, { "epoch": 0.43, "learning_rate": 0.00019140254317707346, "loss": 7.8833, "step": 4530 }, { "epoch": 0.43, "learning_rate": 0.0001913835642436895, "loss": 7.9091, "step": 4540 }, { "epoch": 0.43, "learning_rate": 0.00019136458531030556, "loss": 8.0118, "step": 4550 }, { "epoch": 0.43, "learning_rate": 0.00019134560637692162, "loss": 7.9777, "step": 4560 }, { "epoch": 0.43, "learning_rate": 0.00019132662744353768, "loss": 7.8288, "step": 4570 }, { "epoch": 0.43, "learning_rate": 0.00019130764851015374, "loss": 7.864, "step": 4580 }, { "epoch": 0.44, "learning_rate": 0.00019128866957676978, "loss": 7.9451, "step": 4590 }, { "epoch": 0.44, "learning_rate": 0.00019126969064338584, "loss": 7.9331, "step": 4600 }, { "epoch": 0.44, "learning_rate": 0.0001912507117100019, "loss": 7.958, "step": 4610 }, { "epoch": 0.44, "learning_rate": 0.00019123173277661796, "loss": 7.8932, "step": 4620 }, { "epoch": 0.44, "learning_rate": 0.00019121275384323402, "loss": 7.9253, "step": 4630 }, { "epoch": 0.44, "learning_rate": 0.00019119377490985006, "loss": 7.849, "step": 4640 }, { "epoch": 0.44, "learning_rate": 0.00019117479597646612, "loss": 7.9173, "step": 4650 }, { "epoch": 0.44, "learning_rate": 0.00019115581704308218, "loss": 7.8063, "step": 4660 }, { "epoch": 0.44, "learning_rate": 0.00019113683810969824, "loss": 7.8727, "step": 4670 }, { "epoch": 0.44, "learning_rate": 0.0001911178591763143, "loss": 7.9849, "step": 4680 }, { "epoch": 0.45, "learning_rate": 0.00019109888024293036, "loss": 7.917, "step": 4690 }, { "epoch": 0.45, "learning_rate": 0.0001910799013095464, "loss": 7.8493, "step": 4700 }, { "epoch": 0.45, "learning_rate": 0.00019106092237616246, "loss": 7.9853, "step": 4710 }, { "epoch": 0.45, "learning_rate": 0.00019104194344277852, "loss": 7.8777, "step": 4720 }, { "epoch": 0.45, "learning_rate": 0.00019102296450939458, "loss": 8.0427, "step": 4730 }, { "epoch": 0.45, "learning_rate": 0.00019100398557601064, "loss": 7.9599, "step": 4740 }, { "epoch": 0.45, "learning_rate": 0.00019098500664262668, "loss": 8.0004, "step": 4750 }, { "epoch": 0.45, "learning_rate": 0.00019096602770924274, "loss": 7.8401, "step": 4760 }, { "epoch": 0.45, "learning_rate": 0.0001909470487758588, "loss": 7.9153, "step": 4770 }, { "epoch": 0.45, "learning_rate": 0.00019092806984247486, "loss": 7.9396, "step": 4780 }, { "epoch": 0.45, "learning_rate": 0.00019090909090909092, "loss": 7.9395, "step": 4790 }, { "epoch": 0.46, "learning_rate": 0.00019089011197570696, "loss": 7.9248, "step": 4800 }, { "epoch": 0.46, "learning_rate": 0.00019087113304232302, "loss": 7.983, "step": 4810 }, { "epoch": 0.46, "learning_rate": 0.00019085215410893908, "loss": 7.9105, "step": 4820 }, { "epoch": 0.46, "learning_rate": 0.00019083317517555514, "loss": 7.8619, "step": 4830 }, { "epoch": 0.46, "learning_rate": 0.0001908141962421712, "loss": 7.8528, "step": 4840 }, { "epoch": 0.46, "learning_rate": 0.00019079521730878726, "loss": 7.8596, "step": 4850 }, { "epoch": 0.46, "learning_rate": 0.0001907762383754033, "loss": 7.82, "step": 4860 }, { "epoch": 0.46, "learning_rate": 0.00019075725944201936, "loss": 7.8662, "step": 4870 }, { "epoch": 0.46, "learning_rate": 0.00019073828050863542, "loss": 7.8878, "step": 4880 }, { "epoch": 0.46, "learning_rate": 0.00019071930157525148, "loss": 7.9529, "step": 4890 }, { "epoch": 0.46, "learning_rate": 0.00019070032264186754, "loss": 7.8718, "step": 4900 }, { "epoch": 0.47, "learning_rate": 0.00019068134370848358, "loss": 7.9205, "step": 4910 }, { "epoch": 0.47, "learning_rate": 0.00019066236477509964, "loss": 7.8675, "step": 4920 }, { "epoch": 0.47, "learning_rate": 0.0001906433858417157, "loss": 7.9425, "step": 4930 }, { "epoch": 0.47, "learning_rate": 0.00019062440690833176, "loss": 7.8226, "step": 4940 }, { "epoch": 0.47, "learning_rate": 0.00019060542797494782, "loss": 7.9104, "step": 4950 }, { "epoch": 0.47, "learning_rate": 0.00019058644904156386, "loss": 7.9396, "step": 4960 }, { "epoch": 0.47, "learning_rate": 0.00019056747010817992, "loss": 7.9223, "step": 4970 }, { "epoch": 0.47, "learning_rate": 0.00019054849117479598, "loss": 7.8131, "step": 4980 }, { "epoch": 0.47, "learning_rate": 0.00019052951224141204, "loss": 7.8441, "step": 4990 }, { "epoch": 0.47, "learning_rate": 0.0001905105333080281, "loss": 7.8416, "step": 5000 }, { "epoch": 0.48, "learning_rate": 0.00019049155437464413, "loss": 7.977, "step": 5010 }, { "epoch": 0.48, "learning_rate": 0.0001904725754412602, "loss": 7.9648, "step": 5020 }, { "epoch": 0.48, "learning_rate": 0.00019045359650787626, "loss": 7.8183, "step": 5030 }, { "epoch": 0.48, "learning_rate": 0.00019043461757449232, "loss": 7.9049, "step": 5040 }, { "epoch": 0.48, "learning_rate": 0.00019041563864110838, "loss": 7.9121, "step": 5050 }, { "epoch": 0.48, "learning_rate": 0.00019039665970772444, "loss": 7.9363, "step": 5060 }, { "epoch": 0.48, "learning_rate": 0.00019037768077434048, "loss": 7.7851, "step": 5070 }, { "epoch": 0.48, "learning_rate": 0.00019035870184095654, "loss": 7.9085, "step": 5080 }, { "epoch": 0.48, "learning_rate": 0.0001903397229075726, "loss": 7.931, "step": 5090 }, { "epoch": 0.48, "learning_rate": 0.00019032074397418866, "loss": 7.9747, "step": 5100 }, { "epoch": 0.48, "learning_rate": 0.00019030176504080472, "loss": 7.8948, "step": 5110 }, { "epoch": 0.49, "learning_rate": 0.00019028278610742075, "loss": 7.8823, "step": 5120 }, { "epoch": 0.49, "learning_rate": 0.00019026380717403682, "loss": 7.9353, "step": 5130 }, { "epoch": 0.49, "learning_rate": 0.00019024482824065288, "loss": 7.8635, "step": 5140 }, { "epoch": 0.49, "learning_rate": 0.00019022584930726894, "loss": 7.954, "step": 5150 }, { "epoch": 0.49, "learning_rate": 0.000190206870373885, "loss": 7.8627, "step": 5160 }, { "epoch": 0.49, "learning_rate": 0.00019018789144050103, "loss": 7.8782, "step": 5170 }, { "epoch": 0.49, "learning_rate": 0.0001901689125071171, "loss": 7.9441, "step": 5180 }, { "epoch": 0.49, "learning_rate": 0.00019014993357373316, "loss": 7.8237, "step": 5190 }, { "epoch": 0.49, "learning_rate": 0.00019013095464034922, "loss": 7.9041, "step": 5200 }, { "epoch": 0.49, "learning_rate": 0.00019011197570696528, "loss": 7.8748, "step": 5210 }, { "epoch": 0.5, "learning_rate": 0.00019009299677358134, "loss": 7.8806, "step": 5220 }, { "epoch": 0.5, "learning_rate": 0.00019007401784019737, "loss": 7.9769, "step": 5230 }, { "epoch": 0.5, "learning_rate": 0.00019005503890681344, "loss": 7.9438, "step": 5240 }, { "epoch": 0.5, "learning_rate": 0.0001900360599734295, "loss": 7.9547, "step": 5250 }, { "epoch": 0.5, "learning_rate": 0.00019001708104004556, "loss": 7.7906, "step": 5260 }, { "epoch": 0.5, "learning_rate": 0.00018999810210666162, "loss": 7.9561, "step": 5270 }, { "epoch": 0.5, "learning_rate": 0.00018997912317327765, "loss": 7.9207, "step": 5280 }, { "epoch": 0.5, "learning_rate": 0.00018996014423989371, "loss": 7.9381, "step": 5290 }, { "epoch": 0.5, "learning_rate": 0.00018994116530650978, "loss": 7.8762, "step": 5300 }, { "epoch": 0.5, "learning_rate": 0.00018992218637312584, "loss": 8.0111, "step": 5310 }, { "epoch": 0.5, "learning_rate": 0.0001899032074397419, "loss": 7.9814, "step": 5320 }, { "epoch": 0.51, "learning_rate": 0.00018988422850635793, "loss": 7.9152, "step": 5330 }, { "epoch": 0.51, "learning_rate": 0.000189865249572974, "loss": 7.8724, "step": 5340 }, { "epoch": 0.51, "learning_rate": 0.00018984627063959006, "loss": 8.0027, "step": 5350 }, { "epoch": 0.51, "learning_rate": 0.00018982729170620612, "loss": 7.8769, "step": 5360 }, { "epoch": 0.51, "learning_rate": 0.00018980831277282218, "loss": 7.864, "step": 5370 }, { "epoch": 0.51, "learning_rate": 0.00018978933383943824, "loss": 7.8941, "step": 5380 }, { "epoch": 0.51, "learning_rate": 0.00018977035490605427, "loss": 7.9021, "step": 5390 }, { "epoch": 0.51, "learning_rate": 0.00018975137597267033, "loss": 7.7893, "step": 5400 }, { "epoch": 0.51, "learning_rate": 0.0001897323970392864, "loss": 7.8462, "step": 5410 }, { "epoch": 0.51, "learning_rate": 0.00018971341810590246, "loss": 7.8329, "step": 5420 }, { "epoch": 0.52, "learning_rate": 0.00018969443917251852, "loss": 7.9414, "step": 5430 }, { "epoch": 0.52, "learning_rate": 0.00018967546023913455, "loss": 7.9004, "step": 5440 }, { "epoch": 0.52, "learning_rate": 0.00018965648130575061, "loss": 7.911, "step": 5450 }, { "epoch": 0.52, "learning_rate": 0.00018963750237236668, "loss": 7.9274, "step": 5460 }, { "epoch": 0.52, "learning_rate": 0.00018961852343898274, "loss": 7.899, "step": 5470 }, { "epoch": 0.52, "learning_rate": 0.0001895995445055988, "loss": 7.8951, "step": 5480 }, { "epoch": 0.52, "learning_rate": 0.00018958056557221483, "loss": 7.9019, "step": 5490 }, { "epoch": 0.52, "learning_rate": 0.0001895615866388309, "loss": 7.9009, "step": 5500 }, { "epoch": 0.52, "learning_rate": 0.00018954260770544695, "loss": 7.9239, "step": 5510 }, { "epoch": 0.52, "learning_rate": 0.00018952362877206302, "loss": 7.9005, "step": 5520 }, { "epoch": 0.52, "learning_rate": 0.00018950464983867908, "loss": 7.9392, "step": 5530 }, { "epoch": 0.53, "learning_rate": 0.0001894856709052951, "loss": 7.8666, "step": 5540 }, { "epoch": 0.53, "learning_rate": 0.00018946669197191117, "loss": 7.8467, "step": 5550 }, { "epoch": 0.53, "learning_rate": 0.00018944771303852723, "loss": 7.9866, "step": 5560 }, { "epoch": 0.53, "learning_rate": 0.0001894287341051433, "loss": 7.9338, "step": 5570 }, { "epoch": 0.53, "learning_rate": 0.00018940975517175936, "loss": 7.9074, "step": 5580 }, { "epoch": 0.53, "learning_rate": 0.00018939077623837542, "loss": 7.8457, "step": 5590 }, { "epoch": 0.53, "learning_rate": 0.00018937179730499145, "loss": 7.9165, "step": 5600 }, { "epoch": 0.53, "learning_rate": 0.0001893528183716075, "loss": 7.8931, "step": 5610 }, { "epoch": 0.53, "learning_rate": 0.00018933383943822357, "loss": 7.9497, "step": 5620 }, { "epoch": 0.53, "learning_rate": 0.00018931486050483964, "loss": 7.9797, "step": 5630 }, { "epoch": 0.54, "learning_rate": 0.0001892958815714557, "loss": 7.8169, "step": 5640 }, { "epoch": 0.54, "learning_rate": 0.00018927690263807173, "loss": 7.9766, "step": 5650 }, { "epoch": 0.54, "learning_rate": 0.0001892579237046878, "loss": 7.8768, "step": 5660 }, { "epoch": 0.54, "learning_rate": 0.00018923894477130385, "loss": 7.8953, "step": 5670 }, { "epoch": 0.54, "learning_rate": 0.00018921996583791991, "loss": 7.851, "step": 5680 }, { "epoch": 0.54, "learning_rate": 0.00018920098690453598, "loss": 7.9436, "step": 5690 }, { "epoch": 0.54, "learning_rate": 0.000189182007971152, "loss": 7.882, "step": 5700 }, { "epoch": 0.54, "learning_rate": 0.00018916302903776807, "loss": 7.8709, "step": 5710 }, { "epoch": 0.54, "learning_rate": 0.00018914405010438413, "loss": 7.8128, "step": 5720 }, { "epoch": 0.54, "learning_rate": 0.0001891250711710002, "loss": 7.9213, "step": 5730 }, { "epoch": 0.54, "learning_rate": 0.00018910609223761626, "loss": 7.8444, "step": 5740 }, { "epoch": 0.55, "learning_rate": 0.00018908711330423232, "loss": 7.9045, "step": 5750 }, { "epoch": 0.55, "learning_rate": 0.00018906813437084835, "loss": 8.0001, "step": 5760 }, { "epoch": 0.55, "learning_rate": 0.0001890491554374644, "loss": 7.9054, "step": 5770 }, { "epoch": 0.55, "learning_rate": 0.00018903017650408047, "loss": 7.9683, "step": 5780 }, { "epoch": 0.55, "learning_rate": 0.00018901119757069653, "loss": 7.8151, "step": 5790 }, { "epoch": 0.55, "learning_rate": 0.0001889922186373126, "loss": 7.8192, "step": 5800 }, { "epoch": 0.55, "learning_rate": 0.00018897323970392863, "loss": 7.9276, "step": 5810 }, { "epoch": 0.55, "learning_rate": 0.0001889542607705447, "loss": 7.9805, "step": 5820 }, { "epoch": 0.55, "learning_rate": 0.00018893528183716075, "loss": 7.8192, "step": 5830 }, { "epoch": 0.55, "learning_rate": 0.00018891630290377681, "loss": 7.9176, "step": 5840 }, { "epoch": 0.56, "learning_rate": 0.00018889732397039288, "loss": 7.8999, "step": 5850 }, { "epoch": 0.56, "learning_rate": 0.0001888783450370089, "loss": 7.8994, "step": 5860 }, { "epoch": 0.56, "learning_rate": 0.00018885936610362497, "loss": 7.8313, "step": 5870 }, { "epoch": 0.56, "learning_rate": 0.00018884038717024103, "loss": 7.924, "step": 5880 }, { "epoch": 0.56, "learning_rate": 0.0001888214082368571, "loss": 7.8946, "step": 5890 }, { "epoch": 0.56, "learning_rate": 0.00018880242930347315, "loss": 7.9005, "step": 5900 }, { "epoch": 0.56, "learning_rate": 0.00018878345037008922, "loss": 7.8146, "step": 5910 }, { "epoch": 0.56, "learning_rate": 0.00018876447143670525, "loss": 7.916, "step": 5920 }, { "epoch": 0.56, "learning_rate": 0.0001887454925033213, "loss": 7.9194, "step": 5930 }, { "epoch": 0.56, "learning_rate": 0.00018872651356993737, "loss": 7.9068, "step": 5940 }, { "epoch": 0.56, "learning_rate": 0.00018870753463655343, "loss": 7.8296, "step": 5950 }, { "epoch": 0.57, "learning_rate": 0.0001886885557031695, "loss": 7.9821, "step": 5960 }, { "epoch": 0.57, "learning_rate": 0.00018866957676978553, "loss": 7.9248, "step": 5970 }, { "epoch": 0.57, "learning_rate": 0.0001886505978364016, "loss": 7.9169, "step": 5980 }, { "epoch": 0.57, "learning_rate": 0.00018863161890301765, "loss": 7.9041, "step": 5990 }, { "epoch": 0.57, "learning_rate": 0.0001886126399696337, "loss": 7.9649, "step": 6000 }, { "epoch": 0.57, "learning_rate": 0.00018859366103624977, "loss": 7.9127, "step": 6010 }, { "epoch": 0.57, "learning_rate": 0.0001885746821028658, "loss": 8.0069, "step": 6020 }, { "epoch": 0.57, "learning_rate": 0.00018855570316948187, "loss": 7.9172, "step": 6030 }, { "epoch": 0.57, "learning_rate": 0.00018853672423609793, "loss": 7.83, "step": 6040 }, { "epoch": 0.57, "learning_rate": 0.000188517745302714, "loss": 8.0185, "step": 6050 }, { "epoch": 0.58, "learning_rate": 0.00018849876636933005, "loss": 7.96, "step": 6060 }, { "epoch": 0.58, "learning_rate": 0.0001884797874359461, "loss": 7.8452, "step": 6070 }, { "epoch": 0.58, "learning_rate": 0.00018846080850256215, "loss": 7.9495, "step": 6080 }, { "epoch": 0.58, "learning_rate": 0.0001884418295691782, "loss": 7.8226, "step": 6090 }, { "epoch": 0.58, "learning_rate": 0.00018842285063579427, "loss": 7.8803, "step": 6100 }, { "epoch": 0.58, "learning_rate": 0.00018840387170241033, "loss": 7.887, "step": 6110 }, { "epoch": 0.58, "learning_rate": 0.0001883848927690264, "loss": 7.7835, "step": 6120 }, { "epoch": 0.58, "learning_rate": 0.00018836591383564243, "loss": 7.8716, "step": 6130 }, { "epoch": 0.58, "learning_rate": 0.0001883469349022585, "loss": 7.8583, "step": 6140 }, { "epoch": 0.58, "learning_rate": 0.00018832795596887455, "loss": 7.925, "step": 6150 }, { "epoch": 0.58, "learning_rate": 0.0001883089770354906, "loss": 7.885, "step": 6160 }, { "epoch": 0.59, "learning_rate": 0.00018828999810210667, "loss": 7.8988, "step": 6170 }, { "epoch": 0.59, "learning_rate": 0.0001882710191687227, "loss": 7.8836, "step": 6180 }, { "epoch": 0.59, "learning_rate": 0.00018825204023533877, "loss": 7.8954, "step": 6190 }, { "epoch": 0.59, "learning_rate": 0.00018823306130195483, "loss": 7.8208, "step": 6200 }, { "epoch": 0.59, "learning_rate": 0.0001882140823685709, "loss": 7.8342, "step": 6210 }, { "epoch": 0.59, "learning_rate": 0.00018819510343518695, "loss": 7.9022, "step": 6220 }, { "epoch": 0.59, "learning_rate": 0.000188176124501803, "loss": 7.8636, "step": 6230 }, { "epoch": 0.59, "learning_rate": 0.00018815714556841905, "loss": 7.8913, "step": 6240 }, { "epoch": 0.59, "learning_rate": 0.0001881381666350351, "loss": 7.8727, "step": 6250 }, { "epoch": 0.59, "learning_rate": 0.00018811918770165117, "loss": 7.7914, "step": 6260 }, { "epoch": 0.59, "learning_rate": 0.00018810020876826723, "loss": 7.9805, "step": 6270 }, { "epoch": 0.6, "learning_rate": 0.0001880812298348833, "loss": 7.9156, "step": 6280 }, { "epoch": 0.6, "learning_rate": 0.00018806225090149933, "loss": 7.8462, "step": 6290 }, { "epoch": 0.6, "learning_rate": 0.0001880432719681154, "loss": 7.8603, "step": 6300 }, { "epoch": 0.6, "learning_rate": 0.00018802429303473145, "loss": 7.9177, "step": 6310 }, { "epoch": 0.6, "learning_rate": 0.0001880053141013475, "loss": 8.0463, "step": 6320 }, { "epoch": 0.6, "learning_rate": 0.00018798633516796357, "loss": 7.8587, "step": 6330 }, { "epoch": 0.6, "learning_rate": 0.0001879673562345796, "loss": 7.8777, "step": 6340 }, { "epoch": 0.6, "learning_rate": 0.00018794837730119567, "loss": 7.9968, "step": 6350 }, { "epoch": 0.6, "learning_rate": 0.00018792939836781173, "loss": 7.8663, "step": 6360 }, { "epoch": 0.6, "learning_rate": 0.0001879104194344278, "loss": 7.959, "step": 6370 }, { "epoch": 0.61, "learning_rate": 0.00018789144050104385, "loss": 7.8974, "step": 6380 }, { "epoch": 0.61, "learning_rate": 0.00018787246156765989, "loss": 7.9797, "step": 6390 }, { "epoch": 0.61, "learning_rate": 0.00018785348263427595, "loss": 7.9455, "step": 6400 }, { "epoch": 0.61, "learning_rate": 0.000187834503700892, "loss": 7.8779, "step": 6410 }, { "epoch": 0.61, "learning_rate": 0.00018781552476750807, "loss": 7.9653, "step": 6420 }, { "epoch": 0.61, "learning_rate": 0.00018779654583412413, "loss": 7.9065, "step": 6430 }, { "epoch": 0.61, "learning_rate": 0.0001877775669007402, "loss": 7.9129, "step": 6440 }, { "epoch": 0.61, "learning_rate": 0.00018775858796735623, "loss": 7.8326, "step": 6450 }, { "epoch": 0.61, "learning_rate": 0.0001877396090339723, "loss": 7.8568, "step": 6460 }, { "epoch": 0.61, "learning_rate": 0.00018772063010058835, "loss": 7.8862, "step": 6470 }, { "epoch": 0.61, "learning_rate": 0.0001877016511672044, "loss": 7.9105, "step": 6480 }, { "epoch": 0.62, "learning_rate": 0.00018768267223382047, "loss": 7.9036, "step": 6490 }, { "epoch": 0.62, "learning_rate": 0.0001876636933004365, "loss": 7.8985, "step": 6500 }, { "epoch": 0.62, "learning_rate": 0.00018764471436705257, "loss": 7.9402, "step": 6510 }, { "epoch": 0.62, "learning_rate": 0.00018762573543366863, "loss": 7.9015, "step": 6520 }, { "epoch": 0.62, "learning_rate": 0.0001876067565002847, "loss": 7.9234, "step": 6530 }, { "epoch": 0.62, "learning_rate": 0.00018758777756690075, "loss": 7.927, "step": 6540 }, { "epoch": 0.62, "learning_rate": 0.00018756879863351679, "loss": 7.9412, "step": 6550 }, { "epoch": 0.62, "learning_rate": 0.00018754981970013285, "loss": 7.8886, "step": 6560 }, { "epoch": 0.62, "learning_rate": 0.0001875308407667489, "loss": 7.8395, "step": 6570 }, { "epoch": 0.62, "learning_rate": 0.00018751186183336497, "loss": 7.8293, "step": 6580 }, { "epoch": 0.63, "learning_rate": 0.00018749288289998103, "loss": 7.9124, "step": 6590 }, { "epoch": 0.63, "learning_rate": 0.00018747390396659706, "loss": 7.903, "step": 6600 }, { "epoch": 0.63, "learning_rate": 0.00018745492503321313, "loss": 7.9087, "step": 6610 }, { "epoch": 0.63, "learning_rate": 0.0001874359460998292, "loss": 7.9055, "step": 6620 }, { "epoch": 0.63, "learning_rate": 0.00018741696716644525, "loss": 7.9397, "step": 6630 }, { "epoch": 0.63, "learning_rate": 0.0001873979882330613, "loss": 7.8537, "step": 6640 }, { "epoch": 0.63, "learning_rate": 0.00018737900929967737, "loss": 7.9236, "step": 6650 }, { "epoch": 0.63, "learning_rate": 0.0001873600303662934, "loss": 7.8514, "step": 6660 }, { "epoch": 0.63, "learning_rate": 0.00018734105143290947, "loss": 7.9212, "step": 6670 }, { "epoch": 0.63, "learning_rate": 0.00018732207249952553, "loss": 7.8364, "step": 6680 }, { "epoch": 0.63, "learning_rate": 0.0001873030935661416, "loss": 7.8725, "step": 6690 }, { "epoch": 0.64, "learning_rate": 0.00018728411463275765, "loss": 7.8552, "step": 6700 }, { "epoch": 0.64, "learning_rate": 0.00018726513569937368, "loss": 7.8767, "step": 6710 }, { "epoch": 0.64, "learning_rate": 0.00018724615676598975, "loss": 7.8403, "step": 6720 }, { "epoch": 0.64, "learning_rate": 0.0001872271778326058, "loss": 7.9066, "step": 6730 }, { "epoch": 0.64, "learning_rate": 0.00018720819889922187, "loss": 7.8817, "step": 6740 }, { "epoch": 0.64, "learning_rate": 0.00018718921996583793, "loss": 7.9498, "step": 6750 }, { "epoch": 0.64, "learning_rate": 0.00018717024103245396, "loss": 7.9311, "step": 6760 }, { "epoch": 0.64, "learning_rate": 0.00018715126209907002, "loss": 7.7898, "step": 6770 }, { "epoch": 0.64, "learning_rate": 0.00018713228316568609, "loss": 7.9264, "step": 6780 }, { "epoch": 0.64, "learning_rate": 0.00018711330423230215, "loss": 7.827, "step": 6790 }, { "epoch": 0.65, "learning_rate": 0.0001870943252989182, "loss": 7.9209, "step": 6800 }, { "epoch": 0.65, "learning_rate": 0.00018707534636553427, "loss": 7.8619, "step": 6810 }, { "epoch": 0.65, "learning_rate": 0.0001870563674321503, "loss": 7.8741, "step": 6820 }, { "epoch": 0.65, "learning_rate": 0.00018703738849876637, "loss": 7.806, "step": 6830 }, { "epoch": 0.65, "learning_rate": 0.00018701840956538243, "loss": 7.7915, "step": 6840 }, { "epoch": 0.65, "learning_rate": 0.0001869994306319985, "loss": 7.9415, "step": 6850 }, { "epoch": 0.65, "learning_rate": 0.00018698045169861455, "loss": 7.8782, "step": 6860 }, { "epoch": 0.65, "learning_rate": 0.00018696147276523058, "loss": 7.9309, "step": 6870 }, { "epoch": 0.65, "learning_rate": 0.00018694249383184664, "loss": 7.897, "step": 6880 }, { "epoch": 0.65, "learning_rate": 0.0001869235148984627, "loss": 7.7669, "step": 6890 }, { "epoch": 0.65, "learning_rate": 0.00018690453596507877, "loss": 7.8783, "step": 6900 }, { "epoch": 0.66, "learning_rate": 0.00018688555703169483, "loss": 7.9449, "step": 6910 }, { "epoch": 0.66, "learning_rate": 0.00018686657809831086, "loss": 7.839, "step": 6920 }, { "epoch": 0.66, "learning_rate": 0.00018684759916492692, "loss": 7.864, "step": 6930 }, { "epoch": 0.66, "learning_rate": 0.00018682862023154299, "loss": 7.8597, "step": 6940 }, { "epoch": 0.66, "learning_rate": 0.00018680964129815905, "loss": 7.7478, "step": 6950 }, { "epoch": 0.66, "learning_rate": 0.0001867906623647751, "loss": 7.835, "step": 6960 }, { "epoch": 0.66, "learning_rate": 0.00018677168343139117, "loss": 7.8262, "step": 6970 }, { "epoch": 0.66, "learning_rate": 0.0001867527044980072, "loss": 7.9301, "step": 6980 }, { "epoch": 0.66, "learning_rate": 0.00018673372556462326, "loss": 7.9125, "step": 6990 }, { "epoch": 0.66, "learning_rate": 0.00018671474663123933, "loss": 7.7855, "step": 7000 }, { "epoch": 0.67, "learning_rate": 0.0001866957676978554, "loss": 7.9173, "step": 7010 }, { "epoch": 0.67, "learning_rate": 0.00018667678876447145, "loss": 7.8071, "step": 7020 }, { "epoch": 0.67, "learning_rate": 0.00018665780983108748, "loss": 7.8352, "step": 7030 }, { "epoch": 0.67, "learning_rate": 0.00018663883089770354, "loss": 8.0009, "step": 7040 }, { "epoch": 0.67, "learning_rate": 0.0001866198519643196, "loss": 7.8807, "step": 7050 }, { "epoch": 0.67, "learning_rate": 0.00018660087303093567, "loss": 7.9295, "step": 7060 }, { "epoch": 0.67, "learning_rate": 0.00018658189409755173, "loss": 7.9473, "step": 7070 }, { "epoch": 0.67, "learning_rate": 0.00018656291516416776, "loss": 7.7944, "step": 7080 }, { "epoch": 0.67, "learning_rate": 0.00018654393623078382, "loss": 7.929, "step": 7090 }, { "epoch": 0.67, "learning_rate": 0.00018652495729739988, "loss": 7.9429, "step": 7100 }, { "epoch": 0.67, "learning_rate": 0.00018650597836401595, "loss": 7.8215, "step": 7110 }, { "epoch": 0.68, "learning_rate": 0.000186486999430632, "loss": 7.9725, "step": 7120 }, { "epoch": 0.68, "learning_rate": 0.00018646802049724804, "loss": 7.8404, "step": 7130 }, { "epoch": 0.68, "learning_rate": 0.0001864490415638641, "loss": 7.8327, "step": 7140 }, { "epoch": 0.68, "learning_rate": 0.00018643006263048016, "loss": 7.9532, "step": 7150 }, { "epoch": 0.68, "learning_rate": 0.00018641108369709622, "loss": 7.8497, "step": 7160 }, { "epoch": 0.68, "learning_rate": 0.00018639210476371229, "loss": 7.9127, "step": 7170 }, { "epoch": 0.68, "learning_rate": 0.00018637312583032835, "loss": 7.8786, "step": 7180 }, { "epoch": 0.68, "learning_rate": 0.00018635414689694438, "loss": 7.9397, "step": 7190 }, { "epoch": 0.68, "learning_rate": 0.00018633516796356044, "loss": 7.9493, "step": 7200 }, { "epoch": 0.68, "learning_rate": 0.0001863161890301765, "loss": 7.9002, "step": 7210 }, { "epoch": 0.69, "learning_rate": 0.00018629721009679257, "loss": 7.8782, "step": 7220 }, { "epoch": 0.69, "learning_rate": 0.00018627823116340863, "loss": 7.8344, "step": 7230 }, { "epoch": 0.69, "learning_rate": 0.00018625925223002466, "loss": 7.8414, "step": 7240 }, { "epoch": 0.69, "learning_rate": 0.00018624027329664072, "loss": 8.0188, "step": 7250 }, { "epoch": 0.69, "learning_rate": 0.00018622129436325678, "loss": 8.009, "step": 7260 }, { "epoch": 0.69, "learning_rate": 0.00018620231542987284, "loss": 7.8582, "step": 7270 }, { "epoch": 0.69, "learning_rate": 0.0001861833364964889, "loss": 7.9226, "step": 7280 }, { "epoch": 0.69, "learning_rate": 0.00018616435756310494, "loss": 7.7506, "step": 7290 }, { "epoch": 0.69, "learning_rate": 0.000186145378629721, "loss": 7.8297, "step": 7300 }, { "epoch": 0.69, "learning_rate": 0.00018612639969633706, "loss": 7.8306, "step": 7310 }, { "epoch": 0.69, "learning_rate": 0.00018610742076295312, "loss": 7.9155, "step": 7320 }, { "epoch": 0.7, "learning_rate": 0.00018608844182956919, "loss": 7.906, "step": 7330 }, { "epoch": 0.7, "learning_rate": 0.00018606946289618525, "loss": 7.8682, "step": 7340 }, { "epoch": 0.7, "learning_rate": 0.00018605048396280128, "loss": 7.8501, "step": 7350 }, { "epoch": 0.7, "learning_rate": 0.00018603150502941734, "loss": 7.8741, "step": 7360 }, { "epoch": 0.7, "learning_rate": 0.0001860125260960334, "loss": 7.8392, "step": 7370 }, { "epoch": 0.7, "learning_rate": 0.00018599354716264946, "loss": 7.9128, "step": 7380 }, { "epoch": 0.7, "learning_rate": 0.00018597456822926553, "loss": 7.9862, "step": 7390 }, { "epoch": 0.7, "learning_rate": 0.00018595558929588156, "loss": 7.8779, "step": 7400 }, { "epoch": 0.7, "learning_rate": 0.00018593661036249762, "loss": 7.8301, "step": 7410 }, { "epoch": 0.7, "learning_rate": 0.00018591763142911368, "loss": 7.8788, "step": 7420 }, { "epoch": 0.71, "learning_rate": 0.00018589865249572974, "loss": 7.9279, "step": 7430 }, { "epoch": 0.71, "learning_rate": 0.0001858796735623458, "loss": 7.8394, "step": 7440 }, { "epoch": 0.71, "learning_rate": 0.00018586069462896184, "loss": 7.8696, "step": 7450 }, { "epoch": 0.71, "learning_rate": 0.0001858417156955779, "loss": 7.9632, "step": 7460 }, { "epoch": 0.71, "learning_rate": 0.00018582273676219396, "loss": 7.8598, "step": 7470 }, { "epoch": 0.71, "learning_rate": 0.00018580375782881002, "loss": 7.9166, "step": 7480 }, { "epoch": 0.71, "learning_rate": 0.00018578477889542608, "loss": 7.9847, "step": 7490 }, { "epoch": 0.71, "learning_rate": 0.00018576579996204215, "loss": 7.9102, "step": 7500 }, { "epoch": 0.71, "learning_rate": 0.00018574682102865818, "loss": 7.8375, "step": 7510 }, { "epoch": 0.71, "learning_rate": 0.00018572784209527424, "loss": 7.8349, "step": 7520 }, { "epoch": 0.71, "learning_rate": 0.0001857088631618903, "loss": 7.8491, "step": 7530 }, { "epoch": 0.72, "learning_rate": 0.00018568988422850636, "loss": 7.9786, "step": 7540 }, { "epoch": 0.72, "learning_rate": 0.00018567090529512242, "loss": 7.9327, "step": 7550 }, { "epoch": 0.72, "learning_rate": 0.00018565192636173846, "loss": 7.77, "step": 7560 }, { "epoch": 0.72, "learning_rate": 0.00018563294742835452, "loss": 7.9486, "step": 7570 }, { "epoch": 0.72, "learning_rate": 0.00018561396849497058, "loss": 7.8833, "step": 7580 }, { "epoch": 0.72, "learning_rate": 0.00018559498956158664, "loss": 7.9048, "step": 7590 }, { "epoch": 0.72, "learning_rate": 0.0001855760106282027, "loss": 7.874, "step": 7600 }, { "epoch": 0.72, "learning_rate": 0.00018555703169481874, "loss": 7.8719, "step": 7610 }, { "epoch": 0.72, "learning_rate": 0.0001855380527614348, "loss": 7.851, "step": 7620 }, { "epoch": 0.72, "learning_rate": 0.00018551907382805086, "loss": 7.9377, "step": 7630 }, { "epoch": 0.72, "learning_rate": 0.00018550009489466692, "loss": 7.9001, "step": 7640 }, { "epoch": 0.73, "learning_rate": 0.00018548111596128298, "loss": 7.8736, "step": 7650 }, { "epoch": 0.73, "learning_rate": 0.00018546213702789902, "loss": 7.8838, "step": 7660 }, { "epoch": 0.73, "learning_rate": 0.00018544315809451508, "loss": 7.8924, "step": 7670 }, { "epoch": 0.73, "learning_rate": 0.00018542417916113114, "loss": 7.8862, "step": 7680 }, { "epoch": 0.73, "learning_rate": 0.0001854052002277472, "loss": 7.8334, "step": 7690 }, { "epoch": 0.73, "learning_rate": 0.00018538622129436326, "loss": 7.8363, "step": 7700 }, { "epoch": 0.73, "learning_rate": 0.00018536724236097932, "loss": 7.905, "step": 7710 }, { "epoch": 0.73, "learning_rate": 0.00018534826342759536, "loss": 7.9271, "step": 7720 }, { "epoch": 0.73, "learning_rate": 0.00018532928449421142, "loss": 7.8476, "step": 7730 }, { "epoch": 0.73, "learning_rate": 0.00018531030556082748, "loss": 7.8614, "step": 7740 }, { "epoch": 0.74, "learning_rate": 0.00018529132662744354, "loss": 7.8769, "step": 7750 }, { "epoch": 0.74, "learning_rate": 0.0001852723476940596, "loss": 7.9633, "step": 7760 }, { "epoch": 0.74, "learning_rate": 0.00018525336876067564, "loss": 7.9126, "step": 7770 }, { "epoch": 0.74, "learning_rate": 0.0001852343898272917, "loss": 7.8547, "step": 7780 }, { "epoch": 0.74, "learning_rate": 0.00018521541089390776, "loss": 7.9311, "step": 7790 }, { "epoch": 0.74, "learning_rate": 0.00018519643196052382, "loss": 7.9018, "step": 7800 }, { "epoch": 0.74, "learning_rate": 0.00018517745302713988, "loss": 7.9094, "step": 7810 }, { "epoch": 0.74, "learning_rate": 0.00018515847409375592, "loss": 7.8283, "step": 7820 }, { "epoch": 0.74, "learning_rate": 0.00018513949516037198, "loss": 7.8136, "step": 7830 }, { "epoch": 0.74, "learning_rate": 0.00018512051622698804, "loss": 7.7893, "step": 7840 }, { "epoch": 0.74, "learning_rate": 0.0001851015372936041, "loss": 7.835, "step": 7850 }, { "epoch": 0.75, "learning_rate": 0.00018508255836022016, "loss": 7.9367, "step": 7860 }, { "epoch": 0.75, "learning_rate": 0.00018506357942683622, "loss": 7.8171, "step": 7870 }, { "epoch": 0.75, "learning_rate": 0.00018504460049345226, "loss": 7.7815, "step": 7880 }, { "epoch": 0.75, "learning_rate": 0.00018502562156006832, "loss": 7.8454, "step": 7890 }, { "epoch": 0.75, "learning_rate": 0.00018500664262668438, "loss": 7.9403, "step": 7900 }, { "epoch": 0.75, "learning_rate": 0.00018498766369330044, "loss": 7.8852, "step": 7910 }, { "epoch": 0.75, "learning_rate": 0.0001849686847599165, "loss": 7.8311, "step": 7920 }, { "epoch": 0.75, "learning_rate": 0.00018494970582653254, "loss": 7.8699, "step": 7930 }, { "epoch": 0.75, "learning_rate": 0.0001849307268931486, "loss": 7.8523, "step": 7940 }, { "epoch": 0.75, "learning_rate": 0.00018491174795976466, "loss": 7.7491, "step": 7950 }, { "epoch": 0.76, "learning_rate": 0.00018489276902638072, "loss": 7.9315, "step": 7960 }, { "epoch": 0.76, "learning_rate": 0.00018487379009299678, "loss": 7.9393, "step": 7970 }, { "epoch": 0.76, "learning_rate": 0.00018485481115961282, "loss": 7.8317, "step": 7980 }, { "epoch": 0.76, "learning_rate": 0.00018483583222622888, "loss": 7.9529, "step": 7990 }, { "epoch": 0.76, "learning_rate": 0.00018481685329284494, "loss": 7.8806, "step": 8000 }, { "epoch": 0.76, "learning_rate": 0.000184797874359461, "loss": 7.9167, "step": 8010 }, { "epoch": 0.76, "learning_rate": 0.00018477889542607706, "loss": 7.8738, "step": 8020 }, { "epoch": 0.76, "learning_rate": 0.00018475991649269312, "loss": 8.0231, "step": 8030 }, { "epoch": 0.76, "learning_rate": 0.00018474093755930916, "loss": 7.9135, "step": 8040 }, { "epoch": 0.76, "learning_rate": 0.00018472195862592522, "loss": 7.8255, "step": 8050 }, { "epoch": 0.76, "learning_rate": 0.00018470297969254128, "loss": 7.8249, "step": 8060 }, { "epoch": 0.77, "learning_rate": 0.00018468400075915734, "loss": 7.8805, "step": 8070 }, { "epoch": 0.77, "learning_rate": 0.0001846650218257734, "loss": 7.8936, "step": 8080 }, { "epoch": 0.77, "learning_rate": 0.00018464604289238944, "loss": 7.8101, "step": 8090 }, { "epoch": 0.77, "learning_rate": 0.0001846270639590055, "loss": 7.9686, "step": 8100 }, { "epoch": 0.77, "learning_rate": 0.00018460808502562156, "loss": 7.8303, "step": 8110 }, { "epoch": 0.77, "learning_rate": 0.00018458910609223762, "loss": 7.8838, "step": 8120 }, { "epoch": 0.77, "learning_rate": 0.00018457012715885368, "loss": 7.8232, "step": 8130 }, { "epoch": 0.77, "learning_rate": 0.00018455114822546972, "loss": 7.8239, "step": 8140 }, { "epoch": 0.77, "learning_rate": 0.00018453216929208578, "loss": 7.8823, "step": 8150 }, { "epoch": 0.77, "learning_rate": 0.00018451319035870184, "loss": 7.9962, "step": 8160 }, { "epoch": 0.78, "learning_rate": 0.0001844942114253179, "loss": 7.9401, "step": 8170 }, { "epoch": 0.78, "learning_rate": 0.00018447523249193396, "loss": 7.9221, "step": 8180 }, { "epoch": 0.78, "learning_rate": 0.00018445625355855, "loss": 7.8689, "step": 8190 }, { "epoch": 0.78, "learning_rate": 0.00018443727462516606, "loss": 7.9139, "step": 8200 }, { "epoch": 0.78, "learning_rate": 0.00018441829569178212, "loss": 8.0207, "step": 8210 }, { "epoch": 0.78, "learning_rate": 0.00018439931675839818, "loss": 7.8523, "step": 8220 }, { "epoch": 0.78, "learning_rate": 0.00018438033782501424, "loss": 7.8425, "step": 8230 }, { "epoch": 0.78, "learning_rate": 0.0001843613588916303, "loss": 7.8204, "step": 8240 }, { "epoch": 0.78, "learning_rate": 0.00018434237995824633, "loss": 7.8536, "step": 8250 }, { "epoch": 0.78, "learning_rate": 0.0001843234010248624, "loss": 7.8367, "step": 8260 }, { "epoch": 0.78, "learning_rate": 0.00018430442209147846, "loss": 7.7924, "step": 8270 }, { "epoch": 0.79, "learning_rate": 0.00018428544315809452, "loss": 7.8558, "step": 8280 }, { "epoch": 0.79, "learning_rate": 0.00018426646422471058, "loss": 7.9187, "step": 8290 }, { "epoch": 0.79, "learning_rate": 0.00018424748529132661, "loss": 7.7554, "step": 8300 }, { "epoch": 0.79, "learning_rate": 0.00018422850635794268, "loss": 7.8642, "step": 8310 }, { "epoch": 0.79, "learning_rate": 0.00018420952742455874, "loss": 7.8859, "step": 8320 }, { "epoch": 0.79, "learning_rate": 0.0001841905484911748, "loss": 7.8936, "step": 8330 }, { "epoch": 0.79, "learning_rate": 0.00018417156955779086, "loss": 7.9325, "step": 8340 }, { "epoch": 0.79, "learning_rate": 0.0001841525906244069, "loss": 7.9461, "step": 8350 }, { "epoch": 0.79, "learning_rate": 0.00018413361169102295, "loss": 7.8525, "step": 8360 }, { "epoch": 0.79, "learning_rate": 0.00018411463275763902, "loss": 7.9894, "step": 8370 }, { "epoch": 0.8, "learning_rate": 0.00018409565382425508, "loss": 7.7846, "step": 8380 }, { "epoch": 0.8, "learning_rate": 0.00018407667489087114, "loss": 7.8432, "step": 8390 }, { "epoch": 0.8, "learning_rate": 0.0001840576959574872, "loss": 7.9854, "step": 8400 }, { "epoch": 0.8, "learning_rate": 0.00018403871702410323, "loss": 7.7578, "step": 8410 }, { "epoch": 0.8, "learning_rate": 0.0001840197380907193, "loss": 7.9038, "step": 8420 }, { "epoch": 0.8, "learning_rate": 0.00018400075915733536, "loss": 7.9633, "step": 8430 }, { "epoch": 0.8, "learning_rate": 0.00018398178022395142, "loss": 7.8895, "step": 8440 }, { "epoch": 0.8, "learning_rate": 0.00018396280129056748, "loss": 7.9363, "step": 8450 }, { "epoch": 0.8, "learning_rate": 0.0001839438223571835, "loss": 7.8365, "step": 8460 }, { "epoch": 0.8, "learning_rate": 0.00018392484342379957, "loss": 7.8543, "step": 8470 }, { "epoch": 0.8, "learning_rate": 0.00018390586449041564, "loss": 7.8508, "step": 8480 }, { "epoch": 0.81, "learning_rate": 0.0001838868855570317, "loss": 7.7221, "step": 8490 }, { "epoch": 0.81, "learning_rate": 0.00018386790662364776, "loss": 7.8354, "step": 8500 }, { "epoch": 0.81, "learning_rate": 0.0001838489276902638, "loss": 7.8542, "step": 8510 }, { "epoch": 0.81, "learning_rate": 0.00018382994875687985, "loss": 8.0115, "step": 8520 }, { "epoch": 0.81, "learning_rate": 0.00018381096982349592, "loss": 7.9047, "step": 8530 }, { "epoch": 0.81, "learning_rate": 0.00018379199089011198, "loss": 7.9698, "step": 8540 }, { "epoch": 0.81, "learning_rate": 0.00018377301195672804, "loss": 7.8554, "step": 8550 }, { "epoch": 0.81, "learning_rate": 0.0001837540330233441, "loss": 8.0107, "step": 8560 }, { "epoch": 0.81, "learning_rate": 0.00018373505408996013, "loss": 7.9775, "step": 8570 }, { "epoch": 0.81, "learning_rate": 0.0001837160751565762, "loss": 7.8525, "step": 8580 }, { "epoch": 0.82, "learning_rate": 0.00018369709622319226, "loss": 8.0003, "step": 8590 }, { "epoch": 0.82, "learning_rate": 0.00018367811728980832, "loss": 7.9092, "step": 8600 }, { "epoch": 0.82, "learning_rate": 0.00018365913835642438, "loss": 7.8022, "step": 8610 }, { "epoch": 0.82, "learning_rate": 0.0001836401594230404, "loss": 7.9229, "step": 8620 }, { "epoch": 0.82, "learning_rate": 0.00018362118048965647, "loss": 7.9276, "step": 8630 }, { "epoch": 0.82, "learning_rate": 0.00018360220155627254, "loss": 7.8804, "step": 8640 }, { "epoch": 0.82, "learning_rate": 0.0001835832226228886, "loss": 7.917, "step": 8650 }, { "epoch": 0.82, "learning_rate": 0.00018356424368950466, "loss": 7.8381, "step": 8660 }, { "epoch": 0.82, "learning_rate": 0.0001835452647561207, "loss": 7.8699, "step": 8670 }, { "epoch": 0.82, "learning_rate": 0.00018352628582273675, "loss": 7.8897, "step": 8680 }, { "epoch": 0.82, "learning_rate": 0.00018350730688935281, "loss": 7.88, "step": 8690 }, { "epoch": 0.83, "learning_rate": 0.00018348832795596888, "loss": 7.8877, "step": 8700 }, { "epoch": 0.83, "learning_rate": 0.00018346934902258494, "loss": 7.959, "step": 8710 }, { "epoch": 0.83, "learning_rate": 0.000183450370089201, "loss": 7.8917, "step": 8720 }, { "epoch": 0.83, "learning_rate": 0.00018343139115581703, "loss": 7.819, "step": 8730 }, { "epoch": 0.83, "learning_rate": 0.0001834124122224331, "loss": 7.8435, "step": 8740 }, { "epoch": 0.83, "learning_rate": 0.00018339343328904915, "loss": 7.8497, "step": 8750 }, { "epoch": 0.83, "learning_rate": 0.00018337445435566522, "loss": 7.9061, "step": 8760 }, { "epoch": 0.83, "learning_rate": 0.00018335547542228128, "loss": 7.9324, "step": 8770 }, { "epoch": 0.83, "learning_rate": 0.0001833364964888973, "loss": 7.9109, "step": 8780 }, { "epoch": 0.83, "learning_rate": 0.00018331751755551337, "loss": 7.9571, "step": 8790 }, { "epoch": 0.84, "learning_rate": 0.00018329853862212943, "loss": 7.8536, "step": 8800 }, { "epoch": 0.84, "learning_rate": 0.0001832795596887455, "loss": 7.8108, "step": 8810 }, { "epoch": 0.84, "learning_rate": 0.00018326058075536156, "loss": 7.9172, "step": 8820 }, { "epoch": 0.84, "learning_rate": 0.0001832416018219776, "loss": 7.8601, "step": 8830 }, { "epoch": 0.84, "learning_rate": 0.00018322262288859365, "loss": 7.8404, "step": 8840 }, { "epoch": 0.84, "learning_rate": 0.0001832036439552097, "loss": 7.9074, "step": 8850 }, { "epoch": 0.84, "learning_rate": 0.00018318466502182577, "loss": 7.9285, "step": 8860 }, { "epoch": 0.84, "learning_rate": 0.00018316568608844184, "loss": 7.8839, "step": 8870 }, { "epoch": 0.84, "learning_rate": 0.0001831467071550579, "loss": 7.8742, "step": 8880 }, { "epoch": 0.84, "learning_rate": 0.00018312772822167393, "loss": 7.8799, "step": 8890 }, { "epoch": 0.84, "learning_rate": 0.00018310874928829, "loss": 7.9588, "step": 8900 }, { "epoch": 0.85, "learning_rate": 0.00018308977035490605, "loss": 7.9399, "step": 8910 }, { "epoch": 0.85, "learning_rate": 0.00018307079142152212, "loss": 7.879, "step": 8920 }, { "epoch": 0.85, "learning_rate": 0.00018305181248813818, "loss": 7.9172, "step": 8930 }, { "epoch": 0.85, "learning_rate": 0.0001830328335547542, "loss": 7.9306, "step": 8940 }, { "epoch": 0.85, "learning_rate": 0.00018301385462137027, "loss": 7.9562, "step": 8950 }, { "epoch": 0.85, "learning_rate": 0.00018299487568798633, "loss": 7.7876, "step": 8960 }, { "epoch": 0.85, "learning_rate": 0.0001829758967546024, "loss": 7.9089, "step": 8970 }, { "epoch": 0.85, "learning_rate": 0.00018295691782121846, "loss": 7.9165, "step": 8980 }, { "epoch": 0.85, "learning_rate": 0.0001829379388878345, "loss": 7.9097, "step": 8990 }, { "epoch": 0.85, "learning_rate": 0.00018291895995445055, "loss": 7.9582, "step": 9000 }, { "epoch": 0.86, "learning_rate": 0.0001828999810210666, "loss": 7.9603, "step": 9010 }, { "epoch": 0.86, "learning_rate": 0.00018288100208768267, "loss": 7.8963, "step": 9020 }, { "epoch": 0.86, "learning_rate": 0.00018286202315429874, "loss": 7.9486, "step": 9030 }, { "epoch": 0.86, "learning_rate": 0.0001828430442209148, "loss": 7.9271, "step": 9040 }, { "epoch": 0.86, "learning_rate": 0.00018282406528753083, "loss": 7.8293, "step": 9050 }, { "epoch": 0.86, "learning_rate": 0.0001828050863541469, "loss": 7.9049, "step": 9060 }, { "epoch": 0.86, "learning_rate": 0.00018278610742076295, "loss": 7.8041, "step": 9070 }, { "epoch": 0.86, "learning_rate": 0.00018276712848737901, "loss": 7.8856, "step": 9080 }, { "epoch": 0.86, "learning_rate": 0.00018274814955399508, "loss": 7.9318, "step": 9090 }, { "epoch": 0.86, "learning_rate": 0.0001827291706206111, "loss": 7.8903, "step": 9100 }, { "epoch": 0.86, "learning_rate": 0.00018271019168722717, "loss": 7.86, "step": 9110 }, { "epoch": 0.87, "learning_rate": 0.00018269121275384323, "loss": 7.9715, "step": 9120 }, { "epoch": 0.87, "learning_rate": 0.0001826722338204593, "loss": 7.8544, "step": 9130 }, { "epoch": 0.87, "learning_rate": 0.00018265325488707535, "loss": 7.9638, "step": 9140 }, { "epoch": 0.87, "learning_rate": 0.00018263427595369142, "loss": 7.8734, "step": 9150 }, { "epoch": 0.87, "learning_rate": 0.00018261529702030745, "loss": 7.901, "step": 9160 }, { "epoch": 0.87, "learning_rate": 0.0001825963180869235, "loss": 7.8865, "step": 9170 }, { "epoch": 0.87, "learning_rate": 0.00018257733915353957, "loss": 7.8662, "step": 9180 }, { "epoch": 0.87, "learning_rate": 0.00018255836022015563, "loss": 7.8327, "step": 9190 }, { "epoch": 0.87, "learning_rate": 0.0001825393812867717, "loss": 7.9404, "step": 9200 }, { "epoch": 0.87, "learning_rate": 0.00018252040235338773, "loss": 7.8854, "step": 9210 }, { "epoch": 0.87, "learning_rate": 0.0001825014234200038, "loss": 7.8985, "step": 9220 }, { "epoch": 0.88, "learning_rate": 0.00018248244448661985, "loss": 7.9045, "step": 9230 }, { "epoch": 0.88, "learning_rate": 0.0001824634655532359, "loss": 7.9535, "step": 9240 }, { "epoch": 0.88, "learning_rate": 0.00018244448661985197, "loss": 7.8169, "step": 9250 }, { "epoch": 0.88, "learning_rate": 0.000182425507686468, "loss": 7.8822, "step": 9260 }, { "epoch": 0.88, "learning_rate": 0.00018240652875308407, "loss": 7.9416, "step": 9270 }, { "epoch": 0.88, "learning_rate": 0.00018238754981970013, "loss": 7.871, "step": 9280 }, { "epoch": 0.88, "learning_rate": 0.0001823685708863162, "loss": 7.9359, "step": 9290 }, { "epoch": 0.88, "learning_rate": 0.00018234959195293225, "loss": 7.865, "step": 9300 }, { "epoch": 0.88, "learning_rate": 0.00018233061301954832, "loss": 7.823, "step": 9310 }, { "epoch": 0.88, "learning_rate": 0.00018231163408616435, "loss": 7.8769, "step": 9320 }, { "epoch": 0.89, "learning_rate": 0.0001822926551527804, "loss": 7.8701, "step": 9330 }, { "epoch": 0.89, "learning_rate": 0.00018227367621939647, "loss": 7.8604, "step": 9340 }, { "epoch": 0.89, "learning_rate": 0.00018225469728601253, "loss": 7.8605, "step": 9350 }, { "epoch": 0.89, "learning_rate": 0.0001822357183526286, "loss": 7.989, "step": 9360 }, { "epoch": 0.89, "learning_rate": 0.00018221673941924463, "loss": 7.9755, "step": 9370 }, { "epoch": 0.89, "learning_rate": 0.0001821977604858607, "loss": 7.9113, "step": 9380 }, { "epoch": 0.89, "learning_rate": 0.00018217878155247675, "loss": 7.9352, "step": 9390 }, { "epoch": 0.89, "learning_rate": 0.0001821598026190928, "loss": 7.8909, "step": 9400 }, { "epoch": 0.89, "learning_rate": 0.00018214082368570887, "loss": 7.9461, "step": 9410 }, { "epoch": 0.89, "learning_rate": 0.0001821218447523249, "loss": 7.9076, "step": 9420 }, { "epoch": 0.89, "learning_rate": 0.00018210286581894097, "loss": 7.8942, "step": 9430 }, { "epoch": 0.9, "learning_rate": 0.00018208388688555703, "loss": 7.9481, "step": 9440 }, { "epoch": 0.9, "learning_rate": 0.0001820649079521731, "loss": 7.9706, "step": 9450 }, { "epoch": 0.9, "learning_rate": 0.00018204592901878915, "loss": 7.862, "step": 9460 }, { "epoch": 0.9, "learning_rate": 0.00018202695008540521, "loss": 7.8994, "step": 9470 }, { "epoch": 0.9, "learning_rate": 0.00018200797115202125, "loss": 7.9575, "step": 9480 }, { "epoch": 0.9, "learning_rate": 0.0001819889922186373, "loss": 7.8784, "step": 9490 }, { "epoch": 0.9, "learning_rate": 0.00018197001328525337, "loss": 7.7745, "step": 9500 }, { "epoch": 0.9, "learning_rate": 0.00018195103435186943, "loss": 7.8305, "step": 9510 }, { "epoch": 0.9, "learning_rate": 0.0001819320554184855, "loss": 7.9674, "step": 9520 }, { "epoch": 0.9, "learning_rate": 0.00018191307648510153, "loss": 7.8213, "step": 9530 }, { "epoch": 0.91, "learning_rate": 0.0001818940975517176, "loss": 7.9048, "step": 9540 }, { "epoch": 0.91, "learning_rate": 0.00018187511861833365, "loss": 7.9176, "step": 9550 }, { "epoch": 0.91, "learning_rate": 0.0001818561396849497, "loss": 7.8689, "step": 9560 }, { "epoch": 0.91, "learning_rate": 0.00018183716075156577, "loss": 7.7798, "step": 9570 }, { "epoch": 0.91, "learning_rate": 0.00018181818181818183, "loss": 7.9013, "step": 9580 }, { "epoch": 0.91, "learning_rate": 0.00018179920288479787, "loss": 7.8507, "step": 9590 }, { "epoch": 0.91, "learning_rate": 0.00018178022395141393, "loss": 7.8261, "step": 9600 }, { "epoch": 0.91, "learning_rate": 0.00018176124501803, "loss": 7.9989, "step": 9610 }, { "epoch": 0.91, "learning_rate": 0.00018174226608464605, "loss": 7.8337, "step": 9620 }, { "epoch": 0.91, "learning_rate": 0.00018172328715126211, "loss": 7.8822, "step": 9630 }, { "epoch": 0.91, "learning_rate": 0.00018170430821787815, "loss": 7.8731, "step": 9640 }, { "epoch": 0.92, "learning_rate": 0.0001816853292844942, "loss": 7.8363, "step": 9650 }, { "epoch": 0.92, "learning_rate": 0.00018166635035111027, "loss": 7.8563, "step": 9660 }, { "epoch": 0.92, "learning_rate": 0.00018164737141772633, "loss": 7.869, "step": 9670 }, { "epoch": 0.92, "learning_rate": 0.0001816283924843424, "loss": 7.8723, "step": 9680 }, { "epoch": 0.92, "learning_rate": 0.00018160941355095843, "loss": 7.8806, "step": 9690 }, { "epoch": 0.92, "learning_rate": 0.0001815904346175745, "loss": 7.9023, "step": 9700 }, { "epoch": 0.92, "learning_rate": 0.00018157145568419055, "loss": 7.8605, "step": 9710 }, { "epoch": 0.92, "learning_rate": 0.0001815524767508066, "loss": 7.9172, "step": 9720 }, { "epoch": 0.92, "learning_rate": 0.00018153349781742267, "loss": 7.8754, "step": 9730 }, { "epoch": 0.92, "learning_rate": 0.00018151451888403873, "loss": 7.8568, "step": 9740 }, { "epoch": 0.93, "learning_rate": 0.00018149553995065477, "loss": 7.8626, "step": 9750 }, { "epoch": 0.93, "learning_rate": 0.00018147656101727083, "loss": 7.9, "step": 9760 }, { "epoch": 0.93, "learning_rate": 0.0001814575820838869, "loss": 7.8833, "step": 9770 }, { "epoch": 0.93, "learning_rate": 0.00018143860315050295, "loss": 7.928, "step": 9780 }, { "epoch": 0.93, "learning_rate": 0.000181419624217119, "loss": 7.9062, "step": 9790 }, { "epoch": 0.93, "learning_rate": 0.00018140064528373505, "loss": 7.9355, "step": 9800 }, { "epoch": 0.93, "learning_rate": 0.0001813816663503511, "loss": 7.8508, "step": 9810 }, { "epoch": 0.93, "learning_rate": 0.00018136268741696717, "loss": 7.974, "step": 9820 }, { "epoch": 0.93, "learning_rate": 0.00018134370848358323, "loss": 7.8582, "step": 9830 }, { "epoch": 0.93, "learning_rate": 0.0001813247295501993, "loss": 7.9101, "step": 9840 }, { "epoch": 0.93, "learning_rate": 0.00018130575061681533, "loss": 7.8904, "step": 9850 }, { "epoch": 0.94, "learning_rate": 0.0001812867716834314, "loss": 7.8692, "step": 9860 }, { "epoch": 0.94, "learning_rate": 0.00018126779275004745, "loss": 7.85, "step": 9870 }, { "epoch": 0.94, "learning_rate": 0.0001812488138166635, "loss": 7.8922, "step": 9880 }, { "epoch": 0.94, "learning_rate": 0.00018122983488327957, "loss": 7.8379, "step": 9890 }, { "epoch": 0.94, "learning_rate": 0.00018121085594989563, "loss": 7.9406, "step": 9900 }, { "epoch": 0.94, "learning_rate": 0.00018119187701651167, "loss": 7.8289, "step": 9910 }, { "epoch": 0.94, "learning_rate": 0.00018117289808312773, "loss": 7.8563, "step": 9920 }, { "epoch": 0.94, "learning_rate": 0.0001811539191497438, "loss": 7.834, "step": 9930 }, { "epoch": 0.94, "learning_rate": 0.00018113494021635985, "loss": 8.0085, "step": 9940 }, { "epoch": 0.94, "learning_rate": 0.0001811159612829759, "loss": 7.9363, "step": 9950 }, { "epoch": 0.95, "learning_rate": 0.00018109698234959195, "loss": 7.9089, "step": 9960 }, { "epoch": 0.95, "learning_rate": 0.000181078003416208, "loss": 7.8335, "step": 9970 }, { "epoch": 0.95, "learning_rate": 0.00018105902448282407, "loss": 7.8081, "step": 9980 }, { "epoch": 0.95, "learning_rate": 0.00018104004554944013, "loss": 7.873, "step": 9990 }, { "epoch": 0.95, "learning_rate": 0.0001810210666160562, "loss": 7.8915, "step": 10000 }, { "epoch": 0.95, "learning_rate": 0.00018100208768267225, "loss": 7.9186, "step": 10010 }, { "epoch": 0.95, "learning_rate": 0.0001809831087492883, "loss": 7.8812, "step": 10020 }, { "epoch": 0.95, "learning_rate": 0.00018096412981590435, "loss": 7.9097, "step": 10030 }, { "epoch": 0.95, "learning_rate": 0.0001809451508825204, "loss": 7.9482, "step": 10040 }, { "epoch": 0.95, "learning_rate": 0.00018092617194913647, "loss": 7.8154, "step": 10050 }, { "epoch": 0.95, "learning_rate": 0.00018090719301575253, "loss": 7.8891, "step": 10060 }, { "epoch": 0.96, "learning_rate": 0.00018088821408236857, "loss": 7.9105, "step": 10070 }, { "epoch": 0.96, "learning_rate": 0.00018086923514898463, "loss": 7.7747, "step": 10080 }, { "epoch": 0.96, "learning_rate": 0.0001808502562156007, "loss": 7.8896, "step": 10090 }, { "epoch": 0.96, "learning_rate": 0.00018083127728221675, "loss": 7.8707, "step": 10100 }, { "epoch": 0.96, "learning_rate": 0.0001808122983488328, "loss": 7.8592, "step": 10110 }, { "epoch": 0.96, "learning_rate": 0.00018079331941544885, "loss": 7.898, "step": 10120 }, { "epoch": 0.96, "learning_rate": 0.0001807743404820649, "loss": 7.9184, "step": 10130 }, { "epoch": 0.96, "learning_rate": 0.00018075536154868097, "loss": 7.8481, "step": 10140 }, { "epoch": 0.96, "learning_rate": 0.00018073638261529703, "loss": 7.8695, "step": 10150 }, { "epoch": 0.96, "learning_rate": 0.0001807174036819131, "loss": 7.8226, "step": 10160 }, { "epoch": 0.97, "learning_rate": 0.00018069842474852915, "loss": 7.8663, "step": 10170 }, { "epoch": 0.97, "learning_rate": 0.00018067944581514519, "loss": 7.9406, "step": 10180 }, { "epoch": 0.97, "learning_rate": 0.00018066046688176125, "loss": 7.9082, "step": 10190 }, { "epoch": 0.97, "learning_rate": 0.0001806414879483773, "loss": 7.9394, "step": 10200 }, { "epoch": 0.97, "learning_rate": 0.00018062250901499337, "loss": 7.9245, "step": 10210 }, { "epoch": 0.97, "learning_rate": 0.00018060353008160943, "loss": 7.8805, "step": 10220 }, { "epoch": 0.97, "learning_rate": 0.00018058455114822546, "loss": 7.9478, "step": 10230 }, { "epoch": 0.97, "learning_rate": 0.00018056557221484153, "loss": 7.9049, "step": 10240 }, { "epoch": 0.97, "learning_rate": 0.0001805465932814576, "loss": 7.9602, "step": 10250 }, { "epoch": 0.97, "learning_rate": 0.00018052761434807365, "loss": 7.901, "step": 10260 }, { "epoch": 0.97, "learning_rate": 0.0001805086354146897, "loss": 7.9045, "step": 10270 }, { "epoch": 0.98, "learning_rate": 0.00018048965648130574, "loss": 7.901, "step": 10280 }, { "epoch": 0.98, "learning_rate": 0.0001804706775479218, "loss": 7.8334, "step": 10290 }, { "epoch": 0.98, "learning_rate": 0.00018045169861453787, "loss": 7.9435, "step": 10300 }, { "epoch": 0.98, "learning_rate": 0.00018043271968115393, "loss": 7.8548, "step": 10310 }, { "epoch": 0.98, "learning_rate": 0.00018041374074777, "loss": 7.7928, "step": 10320 }, { "epoch": 0.98, "learning_rate": 0.00018039476181438605, "loss": 7.8831, "step": 10330 }, { "epoch": 0.98, "learning_rate": 0.00018037578288100208, "loss": 7.8562, "step": 10340 }, { "epoch": 0.98, "learning_rate": 0.00018035680394761815, "loss": 7.836, "step": 10350 }, { "epoch": 0.98, "learning_rate": 0.0001803378250142342, "loss": 7.8384, "step": 10360 }, { "epoch": 0.98, "learning_rate": 0.00018031884608085027, "loss": 7.9001, "step": 10370 }, { "epoch": 0.99, "learning_rate": 0.00018029986714746633, "loss": 7.8501, "step": 10380 }, { "epoch": 0.99, "learning_rate": 0.00018028088821408236, "loss": 7.8157, "step": 10390 }, { "epoch": 0.99, "learning_rate": 0.00018026190928069843, "loss": 7.885, "step": 10400 }, { "epoch": 0.99, "learning_rate": 0.0001802429303473145, "loss": 7.9899, "step": 10410 }, { "epoch": 0.99, "learning_rate": 0.00018022395141393055, "loss": 7.9357, "step": 10420 }, { "epoch": 0.99, "learning_rate": 0.0001802049724805466, "loss": 7.9355, "step": 10430 }, { "epoch": 0.99, "learning_rate": 0.00018018599354716267, "loss": 7.9414, "step": 10440 }, { "epoch": 0.99, "learning_rate": 0.0001801670146137787, "loss": 7.9003, "step": 10450 }, { "epoch": 0.99, "learning_rate": 0.00018014803568039477, "loss": 7.8847, "step": 10460 }, { "epoch": 0.99, "learning_rate": 0.00018012905674701083, "loss": 7.8374, "step": 10470 }, { "epoch": 0.99, "learning_rate": 0.0001801100778136269, "loss": 7.8259, "step": 10480 }, { "epoch": 1.0, "learning_rate": 0.00018009109888024295, "loss": 7.8256, "step": 10490 }, { "epoch": 1.0, "learning_rate": 0.00018007211994685898, "loss": 7.8138, "step": 10500 }, { "epoch": 1.0, "learning_rate": 0.00018005314101347505, "loss": 7.7975, "step": 10510 }, { "epoch": 1.0, "learning_rate": 0.0001800341620800911, "loss": 7.7755, "step": 10520 }, { "epoch": 1.0, "learning_rate": 0.00018001518314670717, "loss": 7.9293, "step": 10530 }, { "epoch": 1.0, "learning_rate": 0.00017999620421332323, "loss": 7.8856, "step": 10540 }, { "epoch": 1.0, "learning_rate": 0.00017997722527993926, "loss": 7.8915, "step": 10550 }, { "epoch": 1.0, "learning_rate": 0.00017995824634655532, "loss": 7.891, "step": 10560 }, { "epoch": 1.0, "learning_rate": 0.00017993926741317139, "loss": 7.8232, "step": 10570 }, { "epoch": 1.0, "learning_rate": 0.00017992028847978745, "loss": 7.9083, "step": 10580 }, { "epoch": 1.0, "learning_rate": 0.0001799013095464035, "loss": 7.9282, "step": 10590 }, { "epoch": 1.01, "learning_rate": 0.00017988233061301957, "loss": 7.8374, "step": 10600 }, { "epoch": 1.01, "learning_rate": 0.0001798633516796356, "loss": 7.9513, "step": 10610 }, { "epoch": 1.01, "learning_rate": 0.00017984437274625167, "loss": 7.8343, "step": 10620 }, { "epoch": 1.01, "learning_rate": 0.00017982539381286773, "loss": 7.7961, "step": 10630 }, { "epoch": 1.01, "learning_rate": 0.0001798064148794838, "loss": 7.9153, "step": 10640 }, { "epoch": 1.01, "learning_rate": 0.00017978743594609985, "loss": 7.8247, "step": 10650 }, { "epoch": 1.01, "learning_rate": 0.00017976845701271588, "loss": 7.8539, "step": 10660 }, { "epoch": 1.01, "learning_rate": 0.00017974947807933194, "loss": 7.9135, "step": 10670 }, { "epoch": 1.01, "learning_rate": 0.000179730499145948, "loss": 7.8577, "step": 10680 }, { "epoch": 1.01, "learning_rate": 0.00017971152021256407, "loss": 7.8242, "step": 10690 }, { "epoch": 1.02, "learning_rate": 0.00017969254127918013, "loss": 7.8424, "step": 10700 }, { "epoch": 1.02, "learning_rate": 0.0001796735623457962, "loss": 7.8466, "step": 10710 }, { "epoch": 1.02, "learning_rate": 0.00017965458341241222, "loss": 7.8525, "step": 10720 }, { "epoch": 1.02, "learning_rate": 0.00017963560447902828, "loss": 7.9175, "step": 10730 }, { "epoch": 1.02, "learning_rate": 0.00017961662554564435, "loss": 7.8664, "step": 10740 }, { "epoch": 1.02, "learning_rate": 0.0001795976466122604, "loss": 7.9202, "step": 10750 }, { "epoch": 1.02, "learning_rate": 0.00017957866767887647, "loss": 7.8973, "step": 10760 }, { "epoch": 1.02, "learning_rate": 0.0001795596887454925, "loss": 7.8988, "step": 10770 }, { "epoch": 1.02, "learning_rate": 0.00017954070981210856, "loss": 7.8721, "step": 10780 }, { "epoch": 1.02, "learning_rate": 0.00017952173087872463, "loss": 7.8283, "step": 10790 }, { "epoch": 1.02, "learning_rate": 0.0001795027519453407, "loss": 7.8389, "step": 10800 }, { "epoch": 1.03, "learning_rate": 0.00017948377301195675, "loss": 7.8965, "step": 10810 }, { "epoch": 1.03, "learning_rate": 0.00017946479407857278, "loss": 7.7578, "step": 10820 }, { "epoch": 1.03, "learning_rate": 0.00017944581514518884, "loss": 7.8069, "step": 10830 }, { "epoch": 1.03, "learning_rate": 0.0001794268362118049, "loss": 7.8509, "step": 10840 }, { "epoch": 1.03, "learning_rate": 0.00017940785727842097, "loss": 8.0003, "step": 10850 }, { "epoch": 1.03, "learning_rate": 0.00017938887834503703, "loss": 7.8455, "step": 10860 }, { "epoch": 1.03, "learning_rate": 0.0001793698994116531, "loss": 7.8861, "step": 10870 }, { "epoch": 1.03, "learning_rate": 0.00017935092047826912, "loss": 7.996, "step": 10880 }, { "epoch": 1.03, "learning_rate": 0.00017933194154488518, "loss": 7.8577, "step": 10890 }, { "epoch": 1.03, "learning_rate": 0.00017931296261150125, "loss": 7.8246, "step": 10900 }, { "epoch": 1.04, "learning_rate": 0.0001792939836781173, "loss": 7.7759, "step": 10910 }, { "epoch": 1.04, "learning_rate": 0.00017927500474473337, "loss": 7.817, "step": 10920 }, { "epoch": 1.04, "learning_rate": 0.0001792560258113494, "loss": 7.8713, "step": 10930 }, { "epoch": 1.04, "learning_rate": 0.00017923704687796546, "loss": 7.8883, "step": 10940 }, { "epoch": 1.04, "learning_rate": 0.00017921806794458152, "loss": 7.8941, "step": 10950 }, { "epoch": 1.04, "learning_rate": 0.00017919908901119759, "loss": 7.991, "step": 10960 }, { "epoch": 1.04, "learning_rate": 0.00017918011007781365, "loss": 7.9228, "step": 10970 }, { "epoch": 1.04, "learning_rate": 0.00017916113114442968, "loss": 7.8441, "step": 10980 }, { "epoch": 1.04, "learning_rate": 0.00017914215221104574, "loss": 7.9454, "step": 10990 }, { "epoch": 1.04, "learning_rate": 0.0001791231732776618, "loss": 7.9598, "step": 11000 }, { "epoch": 1.04, "learning_rate": 0.00017910419434427787, "loss": 7.9049, "step": 11010 }, { "epoch": 1.05, "learning_rate": 0.00017908521541089393, "loss": 7.8028, "step": 11020 }, { "epoch": 1.05, "learning_rate": 0.00017906623647751, "loss": 7.8796, "step": 11030 }, { "epoch": 1.05, "learning_rate": 0.00017904725754412602, "loss": 7.8912, "step": 11040 }, { "epoch": 1.05, "learning_rate": 0.00017902827861074208, "loss": 7.9389, "step": 11050 }, { "epoch": 1.05, "learning_rate": 0.00017900929967735814, "loss": 7.8783, "step": 11060 }, { "epoch": 1.05, "learning_rate": 0.0001789903207439742, "loss": 7.7984, "step": 11070 }, { "epoch": 1.05, "learning_rate": 0.00017897134181059027, "loss": 7.8839, "step": 11080 }, { "epoch": 1.05, "learning_rate": 0.0001789523628772063, "loss": 7.9485, "step": 11090 }, { "epoch": 1.05, "learning_rate": 0.00017893338394382236, "loss": 7.8604, "step": 11100 }, { "epoch": 1.05, "learning_rate": 0.00017891440501043842, "loss": 7.8475, "step": 11110 }, { "epoch": 1.06, "learning_rate": 0.00017889542607705448, "loss": 7.8361, "step": 11120 }, { "epoch": 1.06, "learning_rate": 0.00017887644714367055, "loss": 7.9018, "step": 11130 }, { "epoch": 1.06, "learning_rate": 0.0001788574682102866, "loss": 7.8377, "step": 11140 }, { "epoch": 1.06, "learning_rate": 0.00017883848927690264, "loss": 7.9123, "step": 11150 }, { "epoch": 1.06, "learning_rate": 0.0001788195103435187, "loss": 7.8456, "step": 11160 }, { "epoch": 1.06, "learning_rate": 0.00017880053141013476, "loss": 8.0092, "step": 11170 }, { "epoch": 1.06, "learning_rate": 0.00017878155247675083, "loss": 7.9217, "step": 11180 }, { "epoch": 1.06, "learning_rate": 0.0001787625735433669, "loss": 7.8548, "step": 11190 }, { "epoch": 1.06, "learning_rate": 0.00017874359460998292, "loss": 7.8086, "step": 11200 }, { "epoch": 1.06, "learning_rate": 0.00017872461567659898, "loss": 7.9422, "step": 11210 }, { "epoch": 1.06, "learning_rate": 0.00017870563674321504, "loss": 7.8733, "step": 11220 }, { "epoch": 1.07, "learning_rate": 0.0001786866578098311, "loss": 7.9344, "step": 11230 }, { "epoch": 1.07, "learning_rate": 0.00017866767887644717, "loss": 7.8834, "step": 11240 }, { "epoch": 1.07, "learning_rate": 0.0001786486999430632, "loss": 7.9546, "step": 11250 }, { "epoch": 1.07, "learning_rate": 0.00017862972100967926, "loss": 7.8716, "step": 11260 }, { "epoch": 1.07, "learning_rate": 0.00017861074207629532, "loss": 7.8437, "step": 11270 }, { "epoch": 1.07, "learning_rate": 0.00017859176314291138, "loss": 7.9324, "step": 11280 }, { "epoch": 1.07, "learning_rate": 0.00017857278420952745, "loss": 7.9234, "step": 11290 }, { "epoch": 1.07, "learning_rate": 0.0001785538052761435, "loss": 7.9025, "step": 11300 }, { "epoch": 1.07, "learning_rate": 0.00017853482634275954, "loss": 7.9273, "step": 11310 }, { "epoch": 1.07, "learning_rate": 0.0001785158474093756, "loss": 7.921, "step": 11320 }, { "epoch": 1.08, "learning_rate": 0.00017849686847599166, "loss": 7.8048, "step": 11330 }, { "epoch": 1.08, "learning_rate": 0.00017847788954260772, "loss": 7.9139, "step": 11340 }, { "epoch": 1.08, "learning_rate": 0.00017845891060922379, "loss": 7.8946, "step": 11350 }, { "epoch": 1.08, "learning_rate": 0.00017843993167583982, "loss": 7.8894, "step": 11360 }, { "epoch": 1.08, "learning_rate": 0.00017842095274245588, "loss": 7.929, "step": 11370 }, { "epoch": 1.08, "learning_rate": 0.00017840197380907194, "loss": 7.8031, "step": 11380 }, { "epoch": 1.08, "learning_rate": 0.000178382994875688, "loss": 7.8458, "step": 11390 }, { "epoch": 1.08, "learning_rate": 0.00017836401594230407, "loss": 7.7878, "step": 11400 }, { "epoch": 1.08, "learning_rate": 0.0001783450370089201, "loss": 7.8181, "step": 11410 }, { "epoch": 1.08, "learning_rate": 0.00017832605807553616, "loss": 7.9258, "step": 11420 }, { "epoch": 1.08, "learning_rate": 0.00017830707914215222, "loss": 7.8423, "step": 11430 }, { "epoch": 1.09, "learning_rate": 0.00017828810020876828, "loss": 7.8946, "step": 11440 }, { "epoch": 1.09, "learning_rate": 0.00017826912127538434, "loss": 7.8909, "step": 11450 }, { "epoch": 1.09, "learning_rate": 0.0001782501423420004, "loss": 7.9299, "step": 11460 }, { "epoch": 1.09, "learning_rate": 0.00017823116340861644, "loss": 7.8795, "step": 11470 }, { "epoch": 1.09, "learning_rate": 0.0001782121844752325, "loss": 7.9029, "step": 11480 }, { "epoch": 1.09, "learning_rate": 0.00017819320554184856, "loss": 7.9417, "step": 11490 }, { "epoch": 1.09, "learning_rate": 0.00017817422660846462, "loss": 7.8398, "step": 11500 }, { "epoch": 1.09, "learning_rate": 0.00017815524767508069, "loss": 7.8438, "step": 11510 }, { "epoch": 1.09, "learning_rate": 0.00017813626874169672, "loss": 7.8947, "step": 11520 }, { "epoch": 1.09, "learning_rate": 0.00017811728980831278, "loss": 7.8327, "step": 11530 }, { "epoch": 1.1, "learning_rate": 0.00017809831087492884, "loss": 7.8483, "step": 11540 }, { "epoch": 1.1, "learning_rate": 0.0001780793319415449, "loss": 7.8825, "step": 11550 }, { "epoch": 1.1, "learning_rate": 0.00017806035300816096, "loss": 7.9321, "step": 11560 }, { "epoch": 1.1, "learning_rate": 0.00017804137407477703, "loss": 7.8518, "step": 11570 }, { "epoch": 1.1, "learning_rate": 0.00017802239514139306, "loss": 7.8816, "step": 11580 }, { "epoch": 1.1, "learning_rate": 0.00017800341620800912, "loss": 7.8593, "step": 11590 }, { "epoch": 1.1, "learning_rate": 0.00017798443727462518, "loss": 8.0316, "step": 11600 }, { "epoch": 1.1, "learning_rate": 0.00017796545834124124, "loss": 7.8125, "step": 11610 }, { "epoch": 1.1, "learning_rate": 0.0001779464794078573, "loss": 7.8441, "step": 11620 }, { "epoch": 1.1, "learning_rate": 0.00017792750047447334, "loss": 7.8297, "step": 11630 }, { "epoch": 1.1, "learning_rate": 0.0001779085215410894, "loss": 7.8087, "step": 11640 }, { "epoch": 1.11, "learning_rate": 0.00017788954260770546, "loss": 7.8674, "step": 11650 }, { "epoch": 1.11, "learning_rate": 0.00017787056367432152, "loss": 7.8526, "step": 11660 }, { "epoch": 1.11, "learning_rate": 0.00017785158474093758, "loss": 7.8793, "step": 11670 }, { "epoch": 1.11, "learning_rate": 0.00017783260580755362, "loss": 7.895, "step": 11680 }, { "epoch": 1.11, "learning_rate": 0.00017781362687416968, "loss": 7.7946, "step": 11690 }, { "epoch": 1.11, "learning_rate": 0.00017779464794078574, "loss": 7.8457, "step": 11700 }, { "epoch": 1.11, "learning_rate": 0.0001777756690074018, "loss": 7.8713, "step": 11710 }, { "epoch": 1.11, "learning_rate": 0.00017775669007401786, "loss": 7.8753, "step": 11720 }, { "epoch": 1.11, "learning_rate": 0.00017773771114063392, "loss": 7.8126, "step": 11730 }, { "epoch": 1.11, "learning_rate": 0.00017771873220724996, "loss": 7.7843, "step": 11740 }, { "epoch": 1.12, "learning_rate": 0.00017769975327386602, "loss": 7.9056, "step": 11750 }, { "epoch": 1.12, "learning_rate": 0.00017768077434048208, "loss": 7.7945, "step": 11760 }, { "epoch": 1.12, "learning_rate": 0.00017766179540709814, "loss": 7.9491, "step": 11770 }, { "epoch": 1.12, "learning_rate": 0.0001776428164737142, "loss": 7.8972, "step": 11780 }, { "epoch": 1.12, "learning_rate": 0.00017762383754033024, "loss": 7.889, "step": 11790 }, { "epoch": 1.12, "learning_rate": 0.0001776048586069463, "loss": 7.7927, "step": 11800 }, { "epoch": 1.12, "learning_rate": 0.00017758587967356236, "loss": 7.8745, "step": 11810 }, { "epoch": 1.12, "learning_rate": 0.00017756690074017842, "loss": 8.038, "step": 11820 }, { "epoch": 1.12, "learning_rate": 0.00017754792180679448, "loss": 7.8502, "step": 11830 }, { "epoch": 1.12, "learning_rate": 0.00017752894287341052, "loss": 7.8414, "step": 11840 }, { "epoch": 1.12, "learning_rate": 0.00017750996394002658, "loss": 7.8466, "step": 11850 }, { "epoch": 1.13, "learning_rate": 0.00017749098500664264, "loss": 7.8095, "step": 11860 }, { "epoch": 1.13, "learning_rate": 0.0001774720060732587, "loss": 7.9301, "step": 11870 }, { "epoch": 1.13, "learning_rate": 0.00017745302713987476, "loss": 7.8592, "step": 11880 }, { "epoch": 1.13, "learning_rate": 0.00017743404820649082, "loss": 7.9105, "step": 11890 }, { "epoch": 1.13, "learning_rate": 0.00017741506927310686, "loss": 7.8377, "step": 11900 }, { "epoch": 1.13, "learning_rate": 0.00017739609033972292, "loss": 7.8799, "step": 11910 }, { "epoch": 1.13, "learning_rate": 0.00017737711140633898, "loss": 7.8186, "step": 11920 }, { "epoch": 1.13, "learning_rate": 0.00017735813247295504, "loss": 7.8921, "step": 11930 }, { "epoch": 1.13, "learning_rate": 0.0001773391535395711, "loss": 7.8325, "step": 11940 }, { "epoch": 1.13, "learning_rate": 0.00017732017460618714, "loss": 7.8533, "step": 11950 }, { "epoch": 1.13, "learning_rate": 0.0001773011956728032, "loss": 7.7791, "step": 11960 }, { "epoch": 1.14, "learning_rate": 0.00017728221673941926, "loss": 7.841, "step": 11970 }, { "epoch": 1.14, "learning_rate": 0.00017726323780603532, "loss": 7.8712, "step": 11980 }, { "epoch": 1.14, "learning_rate": 0.00017724425887265138, "loss": 7.8939, "step": 11990 }, { "epoch": 1.14, "learning_rate": 0.00017722527993926744, "loss": 7.8825, "step": 12000 }, { "epoch": 1.14, "learning_rate": 0.00017720630100588348, "loss": 7.9506, "step": 12010 }, { "epoch": 1.14, "learning_rate": 0.00017718732207249954, "loss": 7.855, "step": 12020 }, { "epoch": 1.14, "learning_rate": 0.0001771683431391156, "loss": 7.7315, "step": 12030 }, { "epoch": 1.14, "learning_rate": 0.00017714936420573166, "loss": 7.791, "step": 12040 }, { "epoch": 1.14, "learning_rate": 0.00017713038527234772, "loss": 7.8175, "step": 12050 }, { "epoch": 1.14, "learning_rate": 0.00017711140633896376, "loss": 7.9103, "step": 12060 }, { "epoch": 1.15, "learning_rate": 0.00017709242740557982, "loss": 7.7869, "step": 12070 }, { "epoch": 1.15, "learning_rate": 0.00017707344847219588, "loss": 7.952, "step": 12080 }, { "epoch": 1.15, "learning_rate": 0.00017705446953881194, "loss": 7.8767, "step": 12090 }, { "epoch": 1.15, "learning_rate": 0.000177035490605428, "loss": 7.7993, "step": 12100 }, { "epoch": 1.15, "learning_rate": 0.00017701651167204404, "loss": 7.7995, "step": 12110 }, { "epoch": 1.15, "learning_rate": 0.0001769975327386601, "loss": 7.869, "step": 12120 }, { "epoch": 1.15, "learning_rate": 0.00017697855380527616, "loss": 7.8906, "step": 12130 }, { "epoch": 1.15, "learning_rate": 0.00017695957487189222, "loss": 7.9035, "step": 12140 }, { "epoch": 1.15, "learning_rate": 0.00017694059593850828, "loss": 7.9041, "step": 12150 }, { "epoch": 1.15, "learning_rate": 0.00017692161700512434, "loss": 7.8036, "step": 12160 }, { "epoch": 1.15, "learning_rate": 0.00017690263807174038, "loss": 7.9532, "step": 12170 }, { "epoch": 1.16, "learning_rate": 0.00017688365913835644, "loss": 7.8963, "step": 12180 }, { "epoch": 1.16, "learning_rate": 0.0001768646802049725, "loss": 7.876, "step": 12190 }, { "epoch": 1.16, "learning_rate": 0.00017684570127158856, "loss": 7.8861, "step": 12200 }, { "epoch": 1.16, "learning_rate": 0.00017682672233820462, "loss": 7.9612, "step": 12210 }, { "epoch": 1.16, "learning_rate": 0.00017680774340482066, "loss": 7.8485, "step": 12220 }, { "epoch": 1.16, "learning_rate": 0.00017678876447143672, "loss": 7.8881, "step": 12230 }, { "epoch": 1.16, "learning_rate": 0.00017676978553805278, "loss": 7.8058, "step": 12240 }, { "epoch": 1.16, "learning_rate": 0.00017675080660466884, "loss": 7.8773, "step": 12250 }, { "epoch": 1.16, "learning_rate": 0.0001767318276712849, "loss": 7.8872, "step": 12260 }, { "epoch": 1.16, "learning_rate": 0.00017671284873790094, "loss": 7.8364, "step": 12270 }, { "epoch": 1.17, "learning_rate": 0.000176693869804517, "loss": 7.8533, "step": 12280 }, { "epoch": 1.17, "learning_rate": 0.00017667489087113306, "loss": 7.9749, "step": 12290 }, { "epoch": 1.17, "learning_rate": 0.00017665591193774912, "loss": 7.9904, "step": 12300 }, { "epoch": 1.17, "learning_rate": 0.00017663693300436518, "loss": 7.8272, "step": 12310 }, { "epoch": 1.17, "learning_rate": 0.00017661795407098124, "loss": 7.8786, "step": 12320 }, { "epoch": 1.17, "learning_rate": 0.00017659897513759728, "loss": 7.8607, "step": 12330 }, { "epoch": 1.17, "learning_rate": 0.00017657999620421334, "loss": 7.9109, "step": 12340 }, { "epoch": 1.17, "learning_rate": 0.0001765610172708294, "loss": 7.8558, "step": 12350 }, { "epoch": 1.17, "learning_rate": 0.00017654203833744546, "loss": 8.0308, "step": 12360 }, { "epoch": 1.17, "learning_rate": 0.00017652305940406152, "loss": 7.8292, "step": 12370 }, { "epoch": 1.17, "learning_rate": 0.00017650408047067756, "loss": 7.927, "step": 12380 }, { "epoch": 1.18, "learning_rate": 0.00017648510153729362, "loss": 8.009, "step": 12390 }, { "epoch": 1.18, "learning_rate": 0.00017646612260390968, "loss": 7.8794, "step": 12400 }, { "epoch": 1.18, "learning_rate": 0.00017644714367052574, "loss": 7.9297, "step": 12410 }, { "epoch": 1.18, "learning_rate": 0.0001764281647371418, "loss": 7.9262, "step": 12420 }, { "epoch": 1.18, "learning_rate": 0.00017640918580375786, "loss": 7.89, "step": 12430 }, { "epoch": 1.18, "learning_rate": 0.0001763902068703739, "loss": 7.8293, "step": 12440 }, { "epoch": 1.18, "learning_rate": 0.00017637122793698996, "loss": 8.0054, "step": 12450 }, { "epoch": 1.18, "learning_rate": 0.00017635224900360602, "loss": 7.9354, "step": 12460 }, { "epoch": 1.18, "learning_rate": 0.00017633327007022208, "loss": 7.8386, "step": 12470 }, { "epoch": 1.18, "learning_rate": 0.00017631429113683814, "loss": 7.9373, "step": 12480 }, { "epoch": 1.19, "learning_rate": 0.00017629531220345418, "loss": 7.9175, "step": 12490 }, { "epoch": 1.19, "learning_rate": 0.00017627633327007024, "loss": 7.8685, "step": 12500 }, { "epoch": 1.19, "learning_rate": 0.0001762573543366863, "loss": 7.8426, "step": 12510 }, { "epoch": 1.19, "learning_rate": 0.00017623837540330236, "loss": 7.9382, "step": 12520 }, { "epoch": 1.19, "learning_rate": 0.00017621939646991842, "loss": 7.8224, "step": 12530 }, { "epoch": 1.19, "learning_rate": 0.00017620041753653445, "loss": 7.864, "step": 12540 }, { "epoch": 1.19, "learning_rate": 0.00017618143860315052, "loss": 7.9494, "step": 12550 }, { "epoch": 1.19, "learning_rate": 0.00017616245966976658, "loss": 7.8304, "step": 12560 }, { "epoch": 1.19, "learning_rate": 0.00017614348073638264, "loss": 8.0052, "step": 12570 }, { "epoch": 1.19, "learning_rate": 0.0001761245018029987, "loss": 7.942, "step": 12580 }, { "epoch": 1.19, "learning_rate": 0.00017610552286961473, "loss": 7.8737, "step": 12590 }, { "epoch": 1.2, "learning_rate": 0.0001760865439362308, "loss": 7.8656, "step": 12600 }, { "epoch": 1.2, "learning_rate": 0.00017606756500284686, "loss": 7.9479, "step": 12610 }, { "epoch": 1.2, "learning_rate": 0.00017604858606946292, "loss": 7.8352, "step": 12620 }, { "epoch": 1.2, "learning_rate": 0.00017602960713607898, "loss": 7.9308, "step": 12630 }, { "epoch": 1.2, "learning_rate": 0.00017601062820269504, "loss": 7.8263, "step": 12640 }, { "epoch": 1.2, "learning_rate": 0.00017599164926931107, "loss": 7.8724, "step": 12650 }, { "epoch": 1.2, "learning_rate": 0.00017597267033592714, "loss": 7.8569, "step": 12660 }, { "epoch": 1.2, "learning_rate": 0.0001759536914025432, "loss": 7.8869, "step": 12670 }, { "epoch": 1.2, "learning_rate": 0.00017593471246915926, "loss": 7.9312, "step": 12680 }, { "epoch": 1.2, "learning_rate": 0.00017591573353577532, "loss": 7.8857, "step": 12690 }, { "epoch": 1.21, "learning_rate": 0.00017589675460239135, "loss": 7.8912, "step": 12700 }, { "epoch": 1.21, "learning_rate": 0.00017587777566900741, "loss": 7.8062, "step": 12710 }, { "epoch": 1.21, "learning_rate": 0.00017585879673562348, "loss": 7.8309, "step": 12720 }, { "epoch": 1.21, "learning_rate": 0.00017583981780223954, "loss": 7.8373, "step": 12730 }, { "epoch": 1.21, "learning_rate": 0.0001758208388688556, "loss": 7.9202, "step": 12740 }, { "epoch": 1.21, "learning_rate": 0.00017580185993547163, "loss": 7.8688, "step": 12750 }, { "epoch": 1.21, "learning_rate": 0.0001757828810020877, "loss": 7.8819, "step": 12760 }, { "epoch": 1.21, "learning_rate": 0.00017576390206870376, "loss": 7.9604, "step": 12770 }, { "epoch": 1.21, "learning_rate": 0.00017574492313531982, "loss": 7.8108, "step": 12780 }, { "epoch": 1.21, "learning_rate": 0.00017572594420193588, "loss": 7.8294, "step": 12790 }, { "epoch": 1.21, "learning_rate": 0.00017570696526855194, "loss": 7.8233, "step": 12800 }, { "epoch": 1.22, "learning_rate": 0.00017568798633516797, "loss": 7.8958, "step": 12810 }, { "epoch": 1.22, "learning_rate": 0.00017566900740178403, "loss": 7.9881, "step": 12820 }, { "epoch": 1.22, "learning_rate": 0.0001756500284684001, "loss": 7.884, "step": 12830 }, { "epoch": 1.22, "learning_rate": 0.00017563104953501616, "loss": 7.8286, "step": 12840 }, { "epoch": 1.22, "learning_rate": 0.00017561207060163222, "loss": 7.8803, "step": 12850 }, { "epoch": 1.22, "learning_rate": 0.00017559309166824825, "loss": 7.8043, "step": 12860 }, { "epoch": 1.22, "learning_rate": 0.00017557411273486431, "loss": 7.8647, "step": 12870 }, { "epoch": 1.22, "learning_rate": 0.00017555513380148038, "loss": 7.918, "step": 12880 }, { "epoch": 1.22, "learning_rate": 0.00017553615486809644, "loss": 7.8076, "step": 12890 }, { "epoch": 1.22, "learning_rate": 0.0001755171759347125, "loss": 7.9134, "step": 12900 }, { "epoch": 1.23, "learning_rate": 0.00017549819700132853, "loss": 7.9278, "step": 12910 }, { "epoch": 1.23, "learning_rate": 0.0001754792180679446, "loss": 7.8538, "step": 12920 }, { "epoch": 1.23, "learning_rate": 0.00017546023913456065, "loss": 7.8656, "step": 12930 }, { "epoch": 1.23, "learning_rate": 0.00017544126020117672, "loss": 7.9321, "step": 12940 }, { "epoch": 1.23, "learning_rate": 0.00017542228126779278, "loss": 7.9646, "step": 12950 }, { "epoch": 1.23, "learning_rate": 0.00017540330233440884, "loss": 7.9302, "step": 12960 }, { "epoch": 1.23, "learning_rate": 0.00017538432340102487, "loss": 7.8743, "step": 12970 }, { "epoch": 1.23, "learning_rate": 0.00017536534446764093, "loss": 7.8306, "step": 12980 }, { "epoch": 1.23, "learning_rate": 0.000175346365534257, "loss": 7.8704, "step": 12990 }, { "epoch": 1.23, "learning_rate": 0.00017532738660087306, "loss": 7.8513, "step": 13000 }, { "epoch": 1.23, "learning_rate": 0.00017530840766748912, "loss": 7.8965, "step": 13010 }, { "epoch": 1.24, "learning_rate": 0.00017528942873410515, "loss": 7.9604, "step": 13020 }, { "epoch": 1.24, "learning_rate": 0.0001752704498007212, "loss": 7.9551, "step": 13030 }, { "epoch": 1.24, "learning_rate": 0.00017525147086733727, "loss": 7.915, "step": 13040 }, { "epoch": 1.24, "learning_rate": 0.00017523249193395334, "loss": 7.856, "step": 13050 }, { "epoch": 1.24, "learning_rate": 0.0001752135130005694, "loss": 7.8559, "step": 13060 }, { "epoch": 1.24, "learning_rate": 0.00017519453406718543, "loss": 7.8043, "step": 13070 }, { "epoch": 1.24, "learning_rate": 0.0001751755551338015, "loss": 7.9493, "step": 13080 }, { "epoch": 1.24, "learning_rate": 0.00017515657620041755, "loss": 7.7208, "step": 13090 }, { "epoch": 1.24, "learning_rate": 0.00017513759726703361, "loss": 7.8285, "step": 13100 }, { "epoch": 1.24, "learning_rate": 0.00017511861833364968, "loss": 7.8769, "step": 13110 }, { "epoch": 1.25, "learning_rate": 0.0001750996394002657, "loss": 7.9413, "step": 13120 }, { "epoch": 1.25, "learning_rate": 0.00017508066046688177, "loss": 7.9337, "step": 13130 }, { "epoch": 1.25, "learning_rate": 0.00017506168153349783, "loss": 7.8663, "step": 13140 }, { "epoch": 1.25, "learning_rate": 0.0001750427026001139, "loss": 7.8629, "step": 13150 }, { "epoch": 1.25, "learning_rate": 0.00017502372366672996, "loss": 7.9704, "step": 13160 }, { "epoch": 1.25, "learning_rate": 0.00017500474473334602, "loss": 7.9988, "step": 13170 }, { "epoch": 1.25, "learning_rate": 0.00017498576579996205, "loss": 7.9194, "step": 13180 }, { "epoch": 1.25, "learning_rate": 0.0001749667868665781, "loss": 7.9008, "step": 13190 }, { "epoch": 1.25, "learning_rate": 0.00017494780793319417, "loss": 7.9587, "step": 13200 }, { "epoch": 1.25, "learning_rate": 0.00017492882899981023, "loss": 7.8987, "step": 13210 }, { "epoch": 1.25, "learning_rate": 0.0001749098500664263, "loss": 7.9129, "step": 13220 }, { "epoch": 1.26, "learning_rate": 0.00017489087113304233, "loss": 7.8203, "step": 13230 }, { "epoch": 1.26, "learning_rate": 0.0001748718921996584, "loss": 7.7843, "step": 13240 }, { "epoch": 1.26, "learning_rate": 0.00017485291326627445, "loss": 7.8528, "step": 13250 }, { "epoch": 1.26, "learning_rate": 0.00017483393433289051, "loss": 7.9028, "step": 13260 }, { "epoch": 1.26, "learning_rate": 0.00017481495539950658, "loss": 7.9222, "step": 13270 }, { "epoch": 1.26, "learning_rate": 0.0001747959764661226, "loss": 7.9008, "step": 13280 }, { "epoch": 1.26, "learning_rate": 0.00017477699753273867, "loss": 7.7545, "step": 13290 }, { "epoch": 1.26, "learning_rate": 0.00017475801859935473, "loss": 7.8473, "step": 13300 }, { "epoch": 1.26, "learning_rate": 0.0001747390396659708, "loss": 7.8514, "step": 13310 }, { "epoch": 1.26, "learning_rate": 0.00017472006073258685, "loss": 7.8281, "step": 13320 }, { "epoch": 1.26, "learning_rate": 0.00017470108179920292, "loss": 7.9274, "step": 13330 }, { "epoch": 1.27, "learning_rate": 0.00017468210286581895, "loss": 7.8855, "step": 13340 }, { "epoch": 1.27, "learning_rate": 0.000174663123932435, "loss": 7.9234, "step": 13350 }, { "epoch": 1.27, "learning_rate": 0.00017464414499905107, "loss": 7.9236, "step": 13360 }, { "epoch": 1.27, "learning_rate": 0.00017462516606566713, "loss": 7.8509, "step": 13370 }, { "epoch": 1.27, "learning_rate": 0.0001746061871322832, "loss": 7.9477, "step": 13380 }, { "epoch": 1.27, "learning_rate": 0.00017458720819889923, "loss": 7.8218, "step": 13390 }, { "epoch": 1.27, "learning_rate": 0.0001745682292655153, "loss": 7.9648, "step": 13400 }, { "epoch": 1.27, "learning_rate": 0.00017454925033213135, "loss": 7.9526, "step": 13410 }, { "epoch": 1.27, "learning_rate": 0.0001745302713987474, "loss": 7.8842, "step": 13420 }, { "epoch": 1.27, "learning_rate": 0.00017451129246536347, "loss": 7.9777, "step": 13430 }, { "epoch": 1.28, "learning_rate": 0.0001744923135319795, "loss": 7.9093, "step": 13440 }, { "epoch": 1.28, "learning_rate": 0.00017447333459859557, "loss": 7.8409, "step": 13450 }, { "epoch": 1.28, "learning_rate": 0.00017445435566521163, "loss": 7.8896, "step": 13460 }, { "epoch": 1.28, "learning_rate": 0.0001744353767318277, "loss": 7.8449, "step": 13470 }, { "epoch": 1.28, "learning_rate": 0.00017441639779844375, "loss": 7.8894, "step": 13480 }, { "epoch": 1.28, "learning_rate": 0.00017439741886505982, "loss": 7.9111, "step": 13490 }, { "epoch": 1.28, "learning_rate": 0.00017437843993167585, "loss": 7.8638, "step": 13500 }, { "epoch": 1.28, "learning_rate": 0.0001743594609982919, "loss": 7.8821, "step": 13510 }, { "epoch": 1.28, "learning_rate": 0.00017434048206490797, "loss": 7.8377, "step": 13520 }, { "epoch": 1.28, "learning_rate": 0.00017432150313152403, "loss": 7.9781, "step": 13530 }, { "epoch": 1.28, "learning_rate": 0.0001743025241981401, "loss": 7.8674, "step": 13540 }, { "epoch": 1.29, "learning_rate": 0.00017428354526475613, "loss": 7.8371, "step": 13550 }, { "epoch": 1.29, "learning_rate": 0.0001742645663313722, "loss": 7.8285, "step": 13560 }, { "epoch": 1.29, "learning_rate": 0.00017424558739798825, "loss": 7.8862, "step": 13570 }, { "epoch": 1.29, "learning_rate": 0.0001742266084646043, "loss": 7.883, "step": 13580 }, { "epoch": 1.29, "learning_rate": 0.00017420762953122037, "loss": 7.9047, "step": 13590 }, { "epoch": 1.29, "learning_rate": 0.0001741886505978364, "loss": 7.9901, "step": 13600 }, { "epoch": 1.29, "learning_rate": 0.00017416967166445247, "loss": 7.9442, "step": 13610 }, { "epoch": 1.29, "learning_rate": 0.00017415069273106853, "loss": 7.8501, "step": 13620 }, { "epoch": 1.29, "learning_rate": 0.0001741317137976846, "loss": 7.7991, "step": 13630 }, { "epoch": 1.29, "learning_rate": 0.00017411273486430065, "loss": 7.7366, "step": 13640 }, { "epoch": 1.3, "learning_rate": 0.0001740937559309167, "loss": 7.9723, "step": 13650 }, { "epoch": 1.3, "learning_rate": 0.00017407477699753275, "loss": 7.9549, "step": 13660 }, { "epoch": 1.3, "learning_rate": 0.0001740557980641488, "loss": 7.8386, "step": 13670 }, { "epoch": 1.3, "learning_rate": 0.00017403681913076487, "loss": 7.816, "step": 13680 }, { "epoch": 1.3, "learning_rate": 0.00017401784019738093, "loss": 7.7909, "step": 13690 }, { "epoch": 1.3, "learning_rate": 0.000173998861263997, "loss": 7.859, "step": 13700 }, { "epoch": 1.3, "learning_rate": 0.00017397988233061303, "loss": 7.9202, "step": 13710 }, { "epoch": 1.3, "learning_rate": 0.0001739609033972291, "loss": 7.8609, "step": 13720 }, { "epoch": 1.3, "learning_rate": 0.00017394192446384515, "loss": 8.0131, "step": 13730 }, { "epoch": 1.3, "learning_rate": 0.0001739229455304612, "loss": 7.9343, "step": 13740 }, { "epoch": 1.3, "learning_rate": 0.00017390396659707727, "loss": 7.8946, "step": 13750 }, { "epoch": 1.31, "learning_rate": 0.0001738849876636933, "loss": 7.8813, "step": 13760 }, { "epoch": 1.31, "learning_rate": 0.00017386600873030937, "loss": 7.9063, "step": 13770 }, { "epoch": 1.31, "learning_rate": 0.00017384702979692543, "loss": 7.806, "step": 13780 }, { "epoch": 1.31, "learning_rate": 0.0001738280508635415, "loss": 7.8988, "step": 13790 }, { "epoch": 1.31, "learning_rate": 0.00017380907193015755, "loss": 7.9128, "step": 13800 }, { "epoch": 1.31, "learning_rate": 0.00017379009299677359, "loss": 7.9373, "step": 13810 }, { "epoch": 1.31, "learning_rate": 0.00017377111406338965, "loss": 7.8923, "step": 13820 }, { "epoch": 1.31, "learning_rate": 0.0001737521351300057, "loss": 7.91, "step": 13830 }, { "epoch": 1.31, "learning_rate": 0.00017373315619662177, "loss": 7.8137, "step": 13840 }, { "epoch": 1.31, "learning_rate": 0.00017371417726323783, "loss": 7.8817, "step": 13850 }, { "epoch": 1.32, "learning_rate": 0.0001736951983298539, "loss": 7.874, "step": 13860 }, { "epoch": 1.32, "learning_rate": 0.00017367621939646993, "loss": 7.8659, "step": 13870 }, { "epoch": 1.32, "learning_rate": 0.000173657240463086, "loss": 7.9372, "step": 13880 }, { "epoch": 1.32, "learning_rate": 0.00017363826152970205, "loss": 7.7769, "step": 13890 }, { "epoch": 1.32, "learning_rate": 0.0001736192825963181, "loss": 7.9974, "step": 13900 }, { "epoch": 1.32, "learning_rate": 0.00017360030366293417, "loss": 7.7119, "step": 13910 }, { "epoch": 1.32, "learning_rate": 0.0001735813247295502, "loss": 7.9405, "step": 13920 }, { "epoch": 1.32, "learning_rate": 0.00017356234579616627, "loss": 7.8877, "step": 13930 }, { "epoch": 1.32, "learning_rate": 0.00017354336686278233, "loss": 7.8819, "step": 13940 }, { "epoch": 1.32, "learning_rate": 0.0001735243879293984, "loss": 7.9628, "step": 13950 }, { "epoch": 1.32, "learning_rate": 0.00017350540899601445, "loss": 7.9056, "step": 13960 }, { "epoch": 1.33, "learning_rate": 0.00017348643006263049, "loss": 7.7774, "step": 13970 }, { "epoch": 1.33, "learning_rate": 0.00017346745112924655, "loss": 7.9571, "step": 13980 }, { "epoch": 1.33, "learning_rate": 0.0001734484721958626, "loss": 7.8876, "step": 13990 }, { "epoch": 1.33, "learning_rate": 0.00017342949326247867, "loss": 7.8631, "step": 14000 }, { "epoch": 1.33, "learning_rate": 0.00017341051432909473, "loss": 7.8511, "step": 14010 }, { "epoch": 1.33, "learning_rate": 0.0001733915353957108, "loss": 7.8158, "step": 14020 }, { "epoch": 1.33, "learning_rate": 0.00017337255646232683, "loss": 7.8307, "step": 14030 }, { "epoch": 1.33, "learning_rate": 0.0001733535775289429, "loss": 7.8747, "step": 14040 }, { "epoch": 1.33, "learning_rate": 0.00017333459859555895, "loss": 7.8938, "step": 14050 }, { "epoch": 1.33, "learning_rate": 0.000173315619662175, "loss": 7.9864, "step": 14060 }, { "epoch": 1.34, "learning_rate": 0.00017329664072879107, "loss": 7.8846, "step": 14070 }, { "epoch": 1.34, "learning_rate": 0.0001732776617954071, "loss": 7.7599, "step": 14080 }, { "epoch": 1.34, "learning_rate": 0.00017325868286202317, "loss": 7.8753, "step": 14090 }, { "epoch": 1.34, "learning_rate": 0.00017323970392863923, "loss": 7.8896, "step": 14100 }, { "epoch": 1.34, "learning_rate": 0.0001732207249952553, "loss": 7.9112, "step": 14110 }, { "epoch": 1.34, "learning_rate": 0.00017320174606187135, "loss": 7.8636, "step": 14120 }, { "epoch": 1.34, "learning_rate": 0.00017318276712848738, "loss": 7.8808, "step": 14130 }, { "epoch": 1.34, "learning_rate": 0.00017316378819510345, "loss": 7.9008, "step": 14140 }, { "epoch": 1.34, "learning_rate": 0.0001731448092617195, "loss": 7.828, "step": 14150 }, { "epoch": 1.34, "learning_rate": 0.00017312583032833557, "loss": 7.9159, "step": 14160 }, { "epoch": 1.34, "learning_rate": 0.00017310685139495163, "loss": 8.0473, "step": 14170 }, { "epoch": 1.35, "learning_rate": 0.00017308787246156766, "loss": 7.9296, "step": 14180 }, { "epoch": 1.35, "learning_rate": 0.00017306889352818372, "loss": 7.905, "step": 14190 }, { "epoch": 1.35, "learning_rate": 0.00017304991459479979, "loss": 7.8447, "step": 14200 }, { "epoch": 1.35, "learning_rate": 0.00017303093566141585, "loss": 7.8995, "step": 14210 }, { "epoch": 1.35, "learning_rate": 0.0001730119567280319, "loss": 7.9104, "step": 14220 }, { "epoch": 1.35, "learning_rate": 0.00017299297779464797, "loss": 7.8408, "step": 14230 }, { "epoch": 1.35, "learning_rate": 0.000172973998861264, "loss": 7.8335, "step": 14240 }, { "epoch": 1.35, "learning_rate": 0.00017295501992788007, "loss": 7.7993, "step": 14250 }, { "epoch": 1.35, "learning_rate": 0.00017293604099449613, "loss": 7.8379, "step": 14260 }, { "epoch": 1.35, "learning_rate": 0.0001729170620611122, "loss": 7.8835, "step": 14270 }, { "epoch": 1.36, "learning_rate": 0.00017289808312772825, "loss": 7.7236, "step": 14280 }, { "epoch": 1.36, "learning_rate": 0.00017287910419434428, "loss": 7.9565, "step": 14290 }, { "epoch": 1.36, "learning_rate": 0.00017286012526096034, "loss": 7.9173, "step": 14300 }, { "epoch": 1.36, "learning_rate": 0.0001728411463275764, "loss": 7.8003, "step": 14310 }, { "epoch": 1.36, "learning_rate": 0.00017282216739419247, "loss": 7.9398, "step": 14320 }, { "epoch": 1.36, "learning_rate": 0.00017280318846080853, "loss": 7.8766, "step": 14330 }, { "epoch": 1.36, "learning_rate": 0.00017278420952742456, "loss": 7.8843, "step": 14340 }, { "epoch": 1.36, "learning_rate": 0.00017276523059404062, "loss": 7.7653, "step": 14350 }, { "epoch": 1.36, "learning_rate": 0.00017274625166065669, "loss": 7.8396, "step": 14360 }, { "epoch": 1.36, "learning_rate": 0.00017272727272727275, "loss": 7.9473, "step": 14370 }, { "epoch": 1.36, "learning_rate": 0.0001727082937938888, "loss": 7.8679, "step": 14380 }, { "epoch": 1.37, "learning_rate": 0.00017268931486050487, "loss": 7.9527, "step": 14390 }, { "epoch": 1.37, "learning_rate": 0.0001726703359271209, "loss": 7.9523, "step": 14400 }, { "epoch": 1.37, "learning_rate": 0.00017265135699373696, "loss": 7.8998, "step": 14410 }, { "epoch": 1.37, "learning_rate": 0.00017263237806035303, "loss": 7.9366, "step": 14420 }, { "epoch": 1.37, "learning_rate": 0.0001726133991269691, "loss": 7.8584, "step": 14430 }, { "epoch": 1.37, "learning_rate": 0.00017259442019358515, "loss": 7.9295, "step": 14440 }, { "epoch": 1.37, "learning_rate": 0.00017257544126020118, "loss": 7.8002, "step": 14450 }, { "epoch": 1.37, "learning_rate": 0.00017255646232681724, "loss": 7.9771, "step": 14460 }, { "epoch": 1.37, "learning_rate": 0.0001725374833934333, "loss": 7.9719, "step": 14470 }, { "epoch": 1.37, "learning_rate": 0.00017251850446004937, "loss": 7.8804, "step": 14480 }, { "epoch": 1.38, "learning_rate": 0.00017249952552666543, "loss": 7.8649, "step": 14490 }, { "epoch": 1.38, "learning_rate": 0.00017248054659328146, "loss": 7.7958, "step": 14500 }, { "epoch": 1.38, "learning_rate": 0.00017246156765989752, "loss": 7.8773, "step": 14510 }, { "epoch": 1.38, "learning_rate": 0.00017244258872651358, "loss": 7.8707, "step": 14520 }, { "epoch": 1.38, "learning_rate": 0.00017242360979312965, "loss": 7.8253, "step": 14530 }, { "epoch": 1.38, "learning_rate": 0.0001724046308597457, "loss": 7.9562, "step": 14540 }, { "epoch": 1.38, "learning_rate": 0.00017238565192636177, "loss": 7.8765, "step": 14550 }, { "epoch": 1.38, "learning_rate": 0.0001723666729929778, "loss": 7.8363, "step": 14560 }, { "epoch": 1.38, "learning_rate": 0.00017234769405959386, "loss": 7.9363, "step": 14570 }, { "epoch": 1.38, "learning_rate": 0.00017232871512620993, "loss": 7.9193, "step": 14580 }, { "epoch": 1.38, "learning_rate": 0.00017230973619282599, "loss": 7.8003, "step": 14590 }, { "epoch": 1.39, "learning_rate": 0.00017229075725944205, "loss": 7.8655, "step": 14600 }, { "epoch": 1.39, "learning_rate": 0.00017227177832605808, "loss": 7.8038, "step": 14610 }, { "epoch": 1.39, "learning_rate": 0.00017225279939267414, "loss": 7.8799, "step": 14620 }, { "epoch": 1.39, "learning_rate": 0.0001722338204592902, "loss": 7.81, "step": 14630 }, { "epoch": 1.39, "learning_rate": 0.00017221484152590627, "loss": 7.9701, "step": 14640 }, { "epoch": 1.39, "learning_rate": 0.00017219586259252233, "loss": 7.9683, "step": 14650 }, { "epoch": 1.39, "learning_rate": 0.00017217688365913836, "loss": 7.9245, "step": 14660 }, { "epoch": 1.39, "learning_rate": 0.00017215790472575442, "loss": 7.9538, "step": 14670 }, { "epoch": 1.39, "learning_rate": 0.00017213892579237048, "loss": 7.8722, "step": 14680 }, { "epoch": 1.39, "learning_rate": 0.00017211994685898654, "loss": 7.8426, "step": 14690 }, { "epoch": 1.39, "learning_rate": 0.0001721009679256026, "loss": 7.8924, "step": 14700 }, { "epoch": 1.4, "learning_rate": 0.00017208198899221864, "loss": 7.9378, "step": 14710 }, { "epoch": 1.4, "learning_rate": 0.0001720630100588347, "loss": 7.8708, "step": 14720 }, { "epoch": 1.4, "learning_rate": 0.00017204403112545076, "loss": 7.8503, "step": 14730 }, { "epoch": 1.4, "learning_rate": 0.00017202505219206682, "loss": 7.8053, "step": 14740 }, { "epoch": 1.4, "learning_rate": 0.00017200607325868289, "loss": 7.8506, "step": 14750 }, { "epoch": 1.4, "learning_rate": 0.00017198709432529895, "loss": 7.8753, "step": 14760 }, { "epoch": 1.4, "learning_rate": 0.00017196811539191498, "loss": 7.8996, "step": 14770 }, { "epoch": 1.4, "learning_rate": 0.00017194913645853104, "loss": 7.8542, "step": 14780 }, { "epoch": 1.4, "learning_rate": 0.0001719301575251471, "loss": 7.8244, "step": 14790 }, { "epoch": 1.4, "learning_rate": 0.00017191117859176316, "loss": 7.8535, "step": 14800 }, { "epoch": 1.41, "learning_rate": 0.00017189219965837923, "loss": 7.9369, "step": 14810 }, { "epoch": 1.41, "learning_rate": 0.00017187322072499526, "loss": 7.9422, "step": 14820 }, { "epoch": 1.41, "learning_rate": 0.00017185424179161132, "loss": 7.9927, "step": 14830 }, { "epoch": 1.41, "learning_rate": 0.00017183526285822738, "loss": 7.852, "step": 14840 }, { "epoch": 1.41, "learning_rate": 0.00017181628392484344, "loss": 7.947, "step": 14850 }, { "epoch": 1.41, "learning_rate": 0.0001717973049914595, "loss": 7.8916, "step": 14860 }, { "epoch": 1.41, "learning_rate": 0.00017177832605807554, "loss": 7.9578, "step": 14870 }, { "epoch": 1.41, "learning_rate": 0.0001717593471246916, "loss": 7.975, "step": 14880 }, { "epoch": 1.41, "learning_rate": 0.00017174036819130766, "loss": 7.9863, "step": 14890 }, { "epoch": 1.41, "learning_rate": 0.00017172138925792372, "loss": 7.8942, "step": 14900 }, { "epoch": 1.41, "learning_rate": 0.00017170241032453978, "loss": 7.8584, "step": 14910 }, { "epoch": 1.42, "learning_rate": 0.00017168343139115585, "loss": 7.9821, "step": 14920 }, { "epoch": 1.42, "learning_rate": 0.00017166445245777188, "loss": 7.8786, "step": 14930 }, { "epoch": 1.42, "learning_rate": 0.00017164547352438794, "loss": 7.8327, "step": 14940 }, { "epoch": 1.42, "learning_rate": 0.000171626494591004, "loss": 7.8841, "step": 14950 }, { "epoch": 1.42, "learning_rate": 0.00017160751565762006, "loss": 7.8089, "step": 14960 }, { "epoch": 1.42, "learning_rate": 0.00017158853672423613, "loss": 7.8177, "step": 14970 }, { "epoch": 1.42, "learning_rate": 0.00017156955779085216, "loss": 7.9116, "step": 14980 }, { "epoch": 1.42, "learning_rate": 0.00017155057885746822, "loss": 7.898, "step": 14990 }, { "epoch": 1.42, "learning_rate": 0.00017153159992408428, "loss": 7.8931, "step": 15000 }, { "epoch": 1.42, "learning_rate": 0.00017151262099070034, "loss": 7.8535, "step": 15010 }, { "epoch": 1.43, "learning_rate": 0.0001714936420573164, "loss": 7.9483, "step": 15020 }, { "epoch": 1.43, "learning_rate": 0.00017147466312393244, "loss": 7.8901, "step": 15030 }, { "epoch": 1.43, "learning_rate": 0.0001714556841905485, "loss": 7.8264, "step": 15040 }, { "epoch": 1.43, "learning_rate": 0.00017143670525716456, "loss": 7.8682, "step": 15050 }, { "epoch": 1.43, "learning_rate": 0.00017141772632378062, "loss": 7.9068, "step": 15060 }, { "epoch": 1.43, "learning_rate": 0.00017139874739039668, "loss": 7.8185, "step": 15070 }, { "epoch": 1.43, "learning_rate": 0.00017137976845701274, "loss": 7.8866, "step": 15080 }, { "epoch": 1.43, "learning_rate": 0.00017136078952362878, "loss": 7.8157, "step": 15090 }, { "epoch": 1.43, "learning_rate": 0.00017134181059024484, "loss": 7.9163, "step": 15100 }, { "epoch": 1.43, "learning_rate": 0.0001713228316568609, "loss": 7.9395, "step": 15110 }, { "epoch": 1.43, "learning_rate": 0.00017130385272347696, "loss": 7.7885, "step": 15120 }, { "epoch": 1.44, "learning_rate": 0.00017128487379009302, "loss": 7.9234, "step": 15130 }, { "epoch": 1.44, "learning_rate": 0.00017126589485670906, "loss": 7.7686, "step": 15140 }, { "epoch": 1.44, "learning_rate": 0.00017124691592332512, "loss": 7.9083, "step": 15150 }, { "epoch": 1.44, "learning_rate": 0.00017122793698994118, "loss": 7.9122, "step": 15160 }, { "epoch": 1.44, "learning_rate": 0.00017120895805655724, "loss": 7.8276, "step": 15170 }, { "epoch": 1.44, "learning_rate": 0.0001711899791231733, "loss": 7.8299, "step": 15180 }, { "epoch": 1.44, "learning_rate": 0.00017117100018978934, "loss": 7.8205, "step": 15190 }, { "epoch": 1.44, "learning_rate": 0.0001711520212564054, "loss": 7.9315, "step": 15200 }, { "epoch": 1.44, "learning_rate": 0.00017113304232302146, "loss": 7.931, "step": 15210 }, { "epoch": 1.44, "learning_rate": 0.00017111406338963752, "loss": 7.7818, "step": 15220 }, { "epoch": 1.45, "learning_rate": 0.00017109508445625358, "loss": 7.8072, "step": 15230 }, { "epoch": 1.45, "learning_rate": 0.00017107610552286962, "loss": 7.8526, "step": 15240 }, { "epoch": 1.45, "learning_rate": 0.00017105712658948568, "loss": 7.8238, "step": 15250 }, { "epoch": 1.45, "learning_rate": 0.00017103814765610174, "loss": 7.8086, "step": 15260 }, { "epoch": 1.45, "learning_rate": 0.0001710191687227178, "loss": 7.9769, "step": 15270 }, { "epoch": 1.45, "learning_rate": 0.00017100018978933386, "loss": 7.9049, "step": 15280 }, { "epoch": 1.45, "learning_rate": 0.00017098121085594992, "loss": 7.9835, "step": 15290 }, { "epoch": 1.45, "learning_rate": 0.00017096223192256596, "loss": 7.9445, "step": 15300 }, { "epoch": 1.45, "learning_rate": 0.00017094325298918202, "loss": 7.9323, "step": 15310 }, { "epoch": 1.45, "learning_rate": 0.00017092427405579808, "loss": 7.8599, "step": 15320 }, { "epoch": 1.45, "learning_rate": 0.00017090529512241414, "loss": 7.8268, "step": 15330 }, { "epoch": 1.46, "learning_rate": 0.0001708863161890302, "loss": 7.9166, "step": 15340 }, { "epoch": 1.46, "learning_rate": 0.00017086733725564624, "loss": 7.8486, "step": 15350 }, { "epoch": 1.46, "learning_rate": 0.0001708483583222623, "loss": 7.9515, "step": 15360 }, { "epoch": 1.46, "learning_rate": 0.00017082937938887836, "loss": 8.0511, "step": 15370 }, { "epoch": 1.46, "learning_rate": 0.00017081040045549442, "loss": 7.8987, "step": 15380 }, { "epoch": 1.46, "learning_rate": 0.00017079142152211048, "loss": 7.8301, "step": 15390 }, { "epoch": 1.46, "learning_rate": 0.00017077244258872652, "loss": 7.8589, "step": 15400 }, { "epoch": 1.46, "learning_rate": 0.00017075346365534258, "loss": 7.8948, "step": 15410 }, { "epoch": 1.46, "learning_rate": 0.00017073448472195864, "loss": 7.9108, "step": 15420 }, { "epoch": 1.46, "learning_rate": 0.0001707155057885747, "loss": 7.8949, "step": 15430 }, { "epoch": 1.47, "learning_rate": 0.00017069652685519076, "loss": 7.8468, "step": 15440 }, { "epoch": 1.47, "learning_rate": 0.00017067754792180682, "loss": 7.8275, "step": 15450 }, { "epoch": 1.47, "learning_rate": 0.00017065856898842286, "loss": 7.8753, "step": 15460 }, { "epoch": 1.47, "learning_rate": 0.00017063959005503892, "loss": 7.9076, "step": 15470 }, { "epoch": 1.47, "learning_rate": 0.00017062061112165498, "loss": 7.9085, "step": 15480 }, { "epoch": 1.47, "learning_rate": 0.00017060163218827104, "loss": 7.8878, "step": 15490 }, { "epoch": 1.47, "learning_rate": 0.0001705826532548871, "loss": 7.8957, "step": 15500 }, { "epoch": 1.47, "learning_rate": 0.00017056367432150314, "loss": 7.9515, "step": 15510 }, { "epoch": 1.47, "learning_rate": 0.0001705446953881192, "loss": 7.8527, "step": 15520 }, { "epoch": 1.47, "learning_rate": 0.00017052571645473526, "loss": 7.8314, "step": 15530 }, { "epoch": 1.47, "learning_rate": 0.00017050673752135132, "loss": 7.8202, "step": 15540 }, { "epoch": 1.48, "learning_rate": 0.00017048775858796738, "loss": 7.8515, "step": 15550 }, { "epoch": 1.48, "learning_rate": 0.00017046877965458342, "loss": 7.9015, "step": 15560 }, { "epoch": 1.48, "learning_rate": 0.00017044980072119948, "loss": 7.9215, "step": 15570 }, { "epoch": 1.48, "learning_rate": 0.00017043082178781554, "loss": 7.9696, "step": 15580 }, { "epoch": 1.48, "learning_rate": 0.0001704118428544316, "loss": 7.8362, "step": 15590 }, { "epoch": 1.48, "learning_rate": 0.00017039286392104766, "loss": 7.9248, "step": 15600 }, { "epoch": 1.48, "learning_rate": 0.00017037388498766372, "loss": 7.8658, "step": 15610 }, { "epoch": 1.48, "learning_rate": 0.00017035490605427976, "loss": 7.9837, "step": 15620 }, { "epoch": 1.48, "learning_rate": 0.00017033592712089582, "loss": 7.8591, "step": 15630 }, { "epoch": 1.48, "learning_rate": 0.00017031694818751188, "loss": 7.9034, "step": 15640 }, { "epoch": 1.49, "learning_rate": 0.00017029796925412794, "loss": 7.876, "step": 15650 }, { "epoch": 1.49, "learning_rate": 0.000170278990320744, "loss": 7.8455, "step": 15660 }, { "epoch": 1.49, "learning_rate": 0.00017026001138736004, "loss": 7.8404, "step": 15670 }, { "epoch": 1.49, "learning_rate": 0.0001702410324539761, "loss": 7.7896, "step": 15680 }, { "epoch": 1.49, "learning_rate": 0.00017022205352059216, "loss": 7.93, "step": 15690 }, { "epoch": 1.49, "learning_rate": 0.00017020307458720822, "loss": 7.7629, "step": 15700 }, { "epoch": 1.49, "learning_rate": 0.00017018409565382428, "loss": 7.8888, "step": 15710 }, { "epoch": 1.49, "learning_rate": 0.00017016511672044031, "loss": 7.8987, "step": 15720 }, { "epoch": 1.49, "learning_rate": 0.00017014613778705638, "loss": 7.9326, "step": 15730 }, { "epoch": 1.49, "learning_rate": 0.00017012715885367244, "loss": 7.8659, "step": 15740 }, { "epoch": 1.49, "learning_rate": 0.0001701081799202885, "loss": 7.8767, "step": 15750 }, { "epoch": 1.5, "learning_rate": 0.00017008920098690456, "loss": 7.8689, "step": 15760 }, { "epoch": 1.5, "learning_rate": 0.0001700702220535206, "loss": 7.8666, "step": 15770 }, { "epoch": 1.5, "learning_rate": 0.00017005124312013665, "loss": 7.9272, "step": 15780 }, { "epoch": 1.5, "learning_rate": 0.00017003226418675272, "loss": 7.8931, "step": 15790 }, { "epoch": 1.5, "learning_rate": 0.00017001328525336878, "loss": 7.9144, "step": 15800 }, { "epoch": 1.5, "learning_rate": 0.00016999430631998484, "loss": 7.9967, "step": 15810 }, { "epoch": 1.5, "learning_rate": 0.0001699753273866009, "loss": 7.876, "step": 15820 }, { "epoch": 1.5, "learning_rate": 0.00016995634845321693, "loss": 7.8345, "step": 15830 }, { "epoch": 1.5, "learning_rate": 0.000169937369519833, "loss": 7.9589, "step": 15840 }, { "epoch": 1.5, "learning_rate": 0.00016991839058644906, "loss": 7.7895, "step": 15850 }, { "epoch": 1.51, "learning_rate": 0.00016989941165306512, "loss": 7.8575, "step": 15860 }, { "epoch": 1.51, "learning_rate": 0.00016988043271968118, "loss": 7.882, "step": 15870 }, { "epoch": 1.51, "learning_rate": 0.0001698614537862972, "loss": 7.7647, "step": 15880 }, { "epoch": 1.51, "learning_rate": 0.00016984247485291327, "loss": 7.8884, "step": 15890 }, { "epoch": 1.51, "learning_rate": 0.00016982349591952934, "loss": 7.8281, "step": 15900 }, { "epoch": 1.51, "learning_rate": 0.0001698045169861454, "loss": 7.7815, "step": 15910 }, { "epoch": 1.51, "learning_rate": 0.00016978553805276146, "loss": 7.844, "step": 15920 }, { "epoch": 1.51, "learning_rate": 0.0001697665591193775, "loss": 7.8023, "step": 15930 }, { "epoch": 1.51, "learning_rate": 0.00016974758018599355, "loss": 7.935, "step": 15940 }, { "epoch": 1.51, "learning_rate": 0.00016972860125260962, "loss": 7.8531, "step": 15950 }, { "epoch": 1.51, "learning_rate": 0.00016970962231922568, "loss": 7.8349, "step": 15960 }, { "epoch": 1.52, "learning_rate": 0.00016969064338584174, "loss": 7.8348, "step": 15970 }, { "epoch": 1.52, "learning_rate": 0.0001696716644524578, "loss": 7.8758, "step": 15980 }, { "epoch": 1.52, "learning_rate": 0.00016965268551907383, "loss": 7.851, "step": 15990 }, { "epoch": 1.52, "learning_rate": 0.0001696337065856899, "loss": 7.7998, "step": 16000 }, { "epoch": 1.52, "learning_rate": 0.00016961472765230596, "loss": 7.8882, "step": 16010 }, { "epoch": 1.52, "learning_rate": 0.00016959574871892202, "loss": 7.8111, "step": 16020 }, { "epoch": 1.52, "learning_rate": 0.00016957676978553808, "loss": 7.8344, "step": 16030 }, { "epoch": 1.52, "learning_rate": 0.0001695577908521541, "loss": 7.9109, "step": 16040 }, { "epoch": 1.52, "learning_rate": 0.00016953881191877017, "loss": 7.9387, "step": 16050 }, { "epoch": 1.52, "learning_rate": 0.00016951983298538624, "loss": 7.9571, "step": 16060 }, { "epoch": 1.52, "learning_rate": 0.0001695008540520023, "loss": 7.878, "step": 16070 }, { "epoch": 1.53, "learning_rate": 0.00016948187511861836, "loss": 7.9668, "step": 16080 }, { "epoch": 1.53, "learning_rate": 0.0001694628961852344, "loss": 7.891, "step": 16090 }, { "epoch": 1.53, "learning_rate": 0.00016944391725185045, "loss": 7.9827, "step": 16100 }, { "epoch": 1.53, "learning_rate": 0.00016942493831846651, "loss": 7.873, "step": 16110 }, { "epoch": 1.53, "learning_rate": 0.00016940595938508258, "loss": 7.8736, "step": 16120 }, { "epoch": 1.53, "learning_rate": 0.00016938698045169864, "loss": 7.9248, "step": 16130 }, { "epoch": 1.53, "learning_rate": 0.0001693680015183147, "loss": 7.8698, "step": 16140 }, { "epoch": 1.53, "learning_rate": 0.00016934902258493073, "loss": 7.9242, "step": 16150 }, { "epoch": 1.53, "learning_rate": 0.0001693300436515468, "loss": 7.9224, "step": 16160 }, { "epoch": 1.53, "learning_rate": 0.00016931106471816285, "loss": 7.9268, "step": 16170 }, { "epoch": 1.54, "learning_rate": 0.00016929208578477892, "loss": 7.776, "step": 16180 }, { "epoch": 1.54, "learning_rate": 0.00016927310685139498, "loss": 7.8963, "step": 16190 }, { "epoch": 1.54, "learning_rate": 0.000169254127918011, "loss": 7.8501, "step": 16200 }, { "epoch": 1.54, "learning_rate": 0.00016923514898462707, "loss": 7.9608, "step": 16210 }, { "epoch": 1.54, "learning_rate": 0.00016921617005124313, "loss": 7.8569, "step": 16220 }, { "epoch": 1.54, "learning_rate": 0.0001691971911178592, "loss": 7.879, "step": 16230 }, { "epoch": 1.54, "learning_rate": 0.00016917821218447526, "loss": 7.9396, "step": 16240 }, { "epoch": 1.54, "learning_rate": 0.0001691592332510913, "loss": 7.7836, "step": 16250 }, { "epoch": 1.54, "learning_rate": 0.00016914025431770735, "loss": 7.8752, "step": 16260 }, { "epoch": 1.54, "learning_rate": 0.0001691212753843234, "loss": 7.9026, "step": 16270 }, { "epoch": 1.54, "learning_rate": 0.00016910229645093947, "loss": 7.9376, "step": 16280 }, { "epoch": 1.55, "learning_rate": 0.00016908331751755554, "loss": 7.853, "step": 16290 }, { "epoch": 1.55, "learning_rate": 0.00016906433858417157, "loss": 7.8648, "step": 16300 }, { "epoch": 1.55, "learning_rate": 0.00016904535965078763, "loss": 7.8895, "step": 16310 }, { "epoch": 1.55, "learning_rate": 0.0001690263807174037, "loss": 7.8541, "step": 16320 }, { "epoch": 1.55, "learning_rate": 0.00016900740178401975, "loss": 7.8666, "step": 16330 }, { "epoch": 1.55, "learning_rate": 0.00016898842285063582, "loss": 7.9544, "step": 16340 }, { "epoch": 1.55, "learning_rate": 0.00016896944391725188, "loss": 7.9778, "step": 16350 }, { "epoch": 1.55, "learning_rate": 0.0001689504649838679, "loss": 7.8113, "step": 16360 }, { "epoch": 1.55, "learning_rate": 0.00016893148605048397, "loss": 7.905, "step": 16370 }, { "epoch": 1.55, "learning_rate": 0.00016891250711710003, "loss": 7.8119, "step": 16380 }, { "epoch": 1.56, "learning_rate": 0.0001688935281837161, "loss": 8.0333, "step": 16390 }, { "epoch": 1.56, "learning_rate": 0.00016887454925033216, "loss": 7.9359, "step": 16400 }, { "epoch": 1.56, "learning_rate": 0.0001688555703169482, "loss": 7.8126, "step": 16410 }, { "epoch": 1.56, "learning_rate": 0.00016883659138356425, "loss": 7.8503, "step": 16420 }, { "epoch": 1.56, "learning_rate": 0.0001688176124501803, "loss": 7.741, "step": 16430 }, { "epoch": 1.56, "learning_rate": 0.00016879863351679637, "loss": 7.9028, "step": 16440 }, { "epoch": 1.56, "learning_rate": 0.00016877965458341244, "loss": 7.7372, "step": 16450 }, { "epoch": 1.56, "learning_rate": 0.00016876067565002847, "loss": 7.815, "step": 16460 }, { "epoch": 1.56, "learning_rate": 0.00016874169671664453, "loss": 7.8055, "step": 16470 }, { "epoch": 1.56, "learning_rate": 0.0001687227177832606, "loss": 7.8315, "step": 16480 }, { "epoch": 1.56, "learning_rate": 0.00016870373884987665, "loss": 7.8523, "step": 16490 }, { "epoch": 1.57, "learning_rate": 0.00016868475991649271, "loss": 7.9014, "step": 16500 }, { "epoch": 1.57, "learning_rate": 0.00016866578098310878, "loss": 7.7971, "step": 16510 }, { "epoch": 1.57, "learning_rate": 0.0001686468020497248, "loss": 7.8215, "step": 16520 }, { "epoch": 1.57, "learning_rate": 0.00016862782311634087, "loss": 7.8164, "step": 16530 }, { "epoch": 1.57, "learning_rate": 0.00016860884418295693, "loss": 7.8579, "step": 16540 }, { "epoch": 1.57, "learning_rate": 0.000168589865249573, "loss": 7.9486, "step": 16550 }, { "epoch": 1.57, "learning_rate": 0.00016857088631618906, "loss": 7.9259, "step": 16560 }, { "epoch": 1.57, "learning_rate": 0.0001685519073828051, "loss": 7.8656, "step": 16570 }, { "epoch": 1.57, "learning_rate": 0.00016853292844942115, "loss": 7.729, "step": 16580 }, { "epoch": 1.57, "learning_rate": 0.0001685139495160372, "loss": 7.969, "step": 16590 }, { "epoch": 1.58, "learning_rate": 0.00016849497058265327, "loss": 7.8591, "step": 16600 }, { "epoch": 1.58, "learning_rate": 0.00016847599164926933, "loss": 7.781, "step": 16610 }, { "epoch": 1.58, "learning_rate": 0.00016845701271588537, "loss": 7.8389, "step": 16620 }, { "epoch": 1.58, "learning_rate": 0.00016843803378250143, "loss": 7.8652, "step": 16630 }, { "epoch": 1.58, "learning_rate": 0.0001684190548491175, "loss": 7.9893, "step": 16640 }, { "epoch": 1.58, "learning_rate": 0.00016840007591573355, "loss": 7.8324, "step": 16650 }, { "epoch": 1.58, "learning_rate": 0.00016838109698234961, "loss": 7.8324, "step": 16660 }, { "epoch": 1.58, "learning_rate": 0.00016836211804896567, "loss": 8.0189, "step": 16670 }, { "epoch": 1.58, "learning_rate": 0.0001683431391155817, "loss": 7.825, "step": 16680 }, { "epoch": 1.58, "learning_rate": 0.00016832416018219777, "loss": 7.8511, "step": 16690 }, { "epoch": 1.58, "learning_rate": 0.00016830518124881383, "loss": 7.873, "step": 16700 }, { "epoch": 1.59, "learning_rate": 0.0001682862023154299, "loss": 7.8481, "step": 16710 }, { "epoch": 1.59, "learning_rate": 0.00016826722338204595, "loss": 7.8828, "step": 16720 }, { "epoch": 1.59, "learning_rate": 0.000168248244448662, "loss": 7.8733, "step": 16730 }, { "epoch": 1.59, "learning_rate": 0.00016822926551527805, "loss": 7.968, "step": 16740 }, { "epoch": 1.59, "learning_rate": 0.0001682102865818941, "loss": 7.806, "step": 16750 }, { "epoch": 1.59, "learning_rate": 0.00016819130764851017, "loss": 7.8548, "step": 16760 }, { "epoch": 1.59, "learning_rate": 0.00016817232871512623, "loss": 7.8729, "step": 16770 }, { "epoch": 1.59, "learning_rate": 0.00016815334978174227, "loss": 7.842, "step": 16780 }, { "epoch": 1.59, "learning_rate": 0.00016813437084835833, "loss": 7.8119, "step": 16790 }, { "epoch": 1.59, "learning_rate": 0.0001681153919149744, "loss": 7.8486, "step": 16800 }, { "epoch": 1.6, "learning_rate": 0.00016809641298159045, "loss": 7.7795, "step": 16810 }, { "epoch": 1.6, "learning_rate": 0.0001680774340482065, "loss": 7.9168, "step": 16820 }, { "epoch": 1.6, "learning_rate": 0.00016805845511482257, "loss": 7.8893, "step": 16830 }, { "epoch": 1.6, "learning_rate": 0.0001680394761814386, "loss": 7.8338, "step": 16840 }, { "epoch": 1.6, "learning_rate": 0.00016802049724805467, "loss": 7.8607, "step": 16850 }, { "epoch": 1.6, "learning_rate": 0.00016800151831467073, "loss": 7.7818, "step": 16860 }, { "epoch": 1.6, "learning_rate": 0.0001679825393812868, "loss": 7.8901, "step": 16870 }, { "epoch": 1.6, "learning_rate": 0.00016796356044790285, "loss": 7.9417, "step": 16880 }, { "epoch": 1.6, "learning_rate": 0.0001679445815145189, "loss": 7.8965, "step": 16890 }, { "epoch": 1.6, "learning_rate": 0.00016792560258113495, "loss": 7.891, "step": 16900 }, { "epoch": 1.6, "learning_rate": 0.000167906623647751, "loss": 7.8494, "step": 16910 }, { "epoch": 1.61, "learning_rate": 0.00016788764471436707, "loss": 7.8651, "step": 16920 }, { "epoch": 1.61, "learning_rate": 0.00016786866578098313, "loss": 7.8477, "step": 16930 }, { "epoch": 1.61, "learning_rate": 0.00016784968684759917, "loss": 7.7701, "step": 16940 }, { "epoch": 1.61, "learning_rate": 0.00016783070791421523, "loss": 7.8612, "step": 16950 }, { "epoch": 1.61, "learning_rate": 0.0001678117289808313, "loss": 7.8107, "step": 16960 }, { "epoch": 1.61, "learning_rate": 0.00016779275004744735, "loss": 7.83, "step": 16970 }, { "epoch": 1.61, "learning_rate": 0.0001677737711140634, "loss": 7.9374, "step": 16980 }, { "epoch": 1.61, "learning_rate": 0.00016775479218067945, "loss": 7.885, "step": 16990 }, { "epoch": 1.61, "learning_rate": 0.0001677358132472955, "loss": 7.8927, "step": 17000 }, { "epoch": 1.61, "learning_rate": 0.00016771683431391157, "loss": 7.8929, "step": 17010 }, { "epoch": 1.62, "learning_rate": 0.00016769785538052763, "loss": 7.7847, "step": 17020 }, { "epoch": 1.62, "learning_rate": 0.0001676788764471437, "loss": 7.9631, "step": 17030 }, { "epoch": 1.62, "learning_rate": 0.00016765989751375975, "loss": 7.9438, "step": 17040 }, { "epoch": 1.62, "learning_rate": 0.0001676409185803758, "loss": 7.7907, "step": 17050 }, { "epoch": 1.62, "learning_rate": 0.00016762193964699185, "loss": 7.9279, "step": 17060 }, { "epoch": 1.62, "learning_rate": 0.0001676029607136079, "loss": 7.8815, "step": 17070 }, { "epoch": 1.62, "learning_rate": 0.00016758398178022397, "loss": 8.0089, "step": 17080 }, { "epoch": 1.62, "learning_rate": 0.00016756500284684003, "loss": 7.9016, "step": 17090 }, { "epoch": 1.62, "learning_rate": 0.00016754602391345607, "loss": 7.7751, "step": 17100 }, { "epoch": 1.62, "learning_rate": 0.00016752704498007213, "loss": 7.8964, "step": 17110 }, { "epoch": 1.62, "learning_rate": 0.0001675080660466882, "loss": 7.7608, "step": 17120 }, { "epoch": 1.63, "learning_rate": 0.00016748908711330425, "loss": 7.9548, "step": 17130 }, { "epoch": 1.63, "learning_rate": 0.0001674701081799203, "loss": 7.9519, "step": 17140 }, { "epoch": 1.63, "learning_rate": 0.00016745112924653635, "loss": 7.9832, "step": 17150 }, { "epoch": 1.63, "learning_rate": 0.0001674321503131524, "loss": 7.7887, "step": 17160 }, { "epoch": 1.63, "learning_rate": 0.00016741317137976847, "loss": 7.972, "step": 17170 }, { "epoch": 1.63, "learning_rate": 0.00016739419244638453, "loss": 7.8508, "step": 17180 }, { "epoch": 1.63, "learning_rate": 0.0001673752135130006, "loss": 7.8145, "step": 17190 }, { "epoch": 1.63, "learning_rate": 0.00016735623457961665, "loss": 7.7618, "step": 17200 }, { "epoch": 1.63, "learning_rate": 0.00016733725564623269, "loss": 7.8352, "step": 17210 }, { "epoch": 1.63, "learning_rate": 0.00016731827671284875, "loss": 7.8661, "step": 17220 }, { "epoch": 1.64, "learning_rate": 0.0001672992977794648, "loss": 7.9131, "step": 17230 }, { "epoch": 1.64, "learning_rate": 0.00016728031884608087, "loss": 7.7831, "step": 17240 }, { "epoch": 1.64, "learning_rate": 0.00016726133991269693, "loss": 7.8058, "step": 17250 }, { "epoch": 1.64, "learning_rate": 0.00016724236097931296, "loss": 7.8926, "step": 17260 }, { "epoch": 1.64, "learning_rate": 0.00016722338204592903, "loss": 7.8892, "step": 17270 }, { "epoch": 1.64, "learning_rate": 0.0001672044031125451, "loss": 7.9619, "step": 17280 }, { "epoch": 1.64, "learning_rate": 0.00016718542417916115, "loss": 7.8365, "step": 17290 }, { "epoch": 1.64, "learning_rate": 0.0001671664452457772, "loss": 7.8317, "step": 17300 }, { "epoch": 1.64, "learning_rate": 0.00016714746631239324, "loss": 7.8683, "step": 17310 }, { "epoch": 1.64, "learning_rate": 0.0001671284873790093, "loss": 7.9458, "step": 17320 }, { "epoch": 1.64, "learning_rate": 0.00016710950844562537, "loss": 7.9498, "step": 17330 }, { "epoch": 1.65, "learning_rate": 0.00016709052951224143, "loss": 7.9496, "step": 17340 }, { "epoch": 1.65, "learning_rate": 0.0001670715505788575, "loss": 7.9351, "step": 17350 }, { "epoch": 1.65, "learning_rate": 0.00016705257164547355, "loss": 7.8879, "step": 17360 }, { "epoch": 1.65, "learning_rate": 0.00016703359271208958, "loss": 7.8785, "step": 17370 }, { "epoch": 1.65, "learning_rate": 0.00016701461377870565, "loss": 7.8406, "step": 17380 }, { "epoch": 1.65, "learning_rate": 0.0001669956348453217, "loss": 7.8577, "step": 17390 }, { "epoch": 1.65, "learning_rate": 0.00016697665591193777, "loss": 7.8559, "step": 17400 }, { "epoch": 1.65, "learning_rate": 0.00016695767697855383, "loss": 7.8396, "step": 17410 }, { "epoch": 1.65, "learning_rate": 0.00016693869804516986, "loss": 7.9009, "step": 17420 }, { "epoch": 1.65, "learning_rate": 0.00016691971911178593, "loss": 7.8063, "step": 17430 }, { "epoch": 1.65, "learning_rate": 0.000166900740178402, "loss": 7.8424, "step": 17440 }, { "epoch": 1.66, "learning_rate": 0.00016688176124501805, "loss": 7.9207, "step": 17450 }, { "epoch": 1.66, "learning_rate": 0.0001668627823116341, "loss": 7.9105, "step": 17460 }, { "epoch": 1.66, "learning_rate": 0.00016684380337825014, "loss": 7.8379, "step": 17470 }, { "epoch": 1.66, "learning_rate": 0.0001668248244448662, "loss": 7.7831, "step": 17480 }, { "epoch": 1.66, "learning_rate": 0.00016680584551148227, "loss": 7.9231, "step": 17490 }, { "epoch": 1.66, "learning_rate": 0.00016678686657809833, "loss": 7.86, "step": 17500 }, { "epoch": 1.66, "learning_rate": 0.0001667678876447144, "loss": 7.8946, "step": 17510 }, { "epoch": 1.66, "learning_rate": 0.00016674890871133042, "loss": 7.9196, "step": 17520 }, { "epoch": 1.66, "learning_rate": 0.00016672992977794648, "loss": 7.9259, "step": 17530 }, { "epoch": 1.66, "learning_rate": 0.00016671095084456255, "loss": 7.9177, "step": 17540 }, { "epoch": 1.67, "learning_rate": 0.0001666919719111786, "loss": 7.8724, "step": 17550 }, { "epoch": 1.67, "learning_rate": 0.00016667299297779467, "loss": 7.8552, "step": 17560 }, { "epoch": 1.67, "learning_rate": 0.00016665401404441073, "loss": 7.8017, "step": 17570 }, { "epoch": 1.67, "learning_rate": 0.00016663503511102676, "loss": 7.843, "step": 17580 }, { "epoch": 1.67, "learning_rate": 0.00016661605617764282, "loss": 7.8963, "step": 17590 }, { "epoch": 1.67, "learning_rate": 0.00016659707724425889, "loss": 7.822, "step": 17600 }, { "epoch": 1.67, "learning_rate": 0.00016657809831087495, "loss": 7.844, "step": 17610 }, { "epoch": 1.67, "learning_rate": 0.000166559119377491, "loss": 7.8534, "step": 17620 }, { "epoch": 1.67, "learning_rate": 0.00016654014044410704, "loss": 7.9459, "step": 17630 }, { "epoch": 1.67, "learning_rate": 0.0001665211615107231, "loss": 7.8753, "step": 17640 }, { "epoch": 1.67, "learning_rate": 0.00016650218257733917, "loss": 7.8419, "step": 17650 }, { "epoch": 1.68, "learning_rate": 0.00016648320364395523, "loss": 7.9352, "step": 17660 }, { "epoch": 1.68, "learning_rate": 0.0001664642247105713, "loss": 7.8022, "step": 17670 }, { "epoch": 1.68, "learning_rate": 0.00016644524577718732, "loss": 7.9272, "step": 17680 }, { "epoch": 1.68, "learning_rate": 0.00016642626684380338, "loss": 7.8676, "step": 17690 }, { "epoch": 1.68, "learning_rate": 0.00016640728791041944, "loss": 7.9678, "step": 17700 }, { "epoch": 1.68, "learning_rate": 0.0001663883089770355, "loss": 7.919, "step": 17710 }, { "epoch": 1.68, "learning_rate": 0.00016636933004365157, "loss": 7.8938, "step": 17720 }, { "epoch": 1.68, "learning_rate": 0.00016635035111026763, "loss": 7.9555, "step": 17730 }, { "epoch": 1.68, "learning_rate": 0.00016633137217688366, "loss": 7.8598, "step": 17740 }, { "epoch": 1.68, "learning_rate": 0.00016631239324349972, "loss": 7.9456, "step": 17750 }, { "epoch": 1.69, "learning_rate": 0.00016629341431011578, "loss": 7.9102, "step": 17760 }, { "epoch": 1.69, "learning_rate": 0.00016627443537673185, "loss": 7.8114, "step": 17770 }, { "epoch": 1.69, "learning_rate": 0.0001662554564433479, "loss": 7.8783, "step": 17780 }, { "epoch": 1.69, "learning_rate": 0.00016623647750996394, "loss": 7.902, "step": 17790 }, { "epoch": 1.69, "learning_rate": 0.00016621749857658, "loss": 7.8586, "step": 17800 }, { "epoch": 1.69, "learning_rate": 0.00016619851964319606, "loss": 7.9233, "step": 17810 }, { "epoch": 1.69, "learning_rate": 0.00016617954070981213, "loss": 7.8281, "step": 17820 }, { "epoch": 1.69, "learning_rate": 0.0001661605617764282, "loss": 7.7916, "step": 17830 }, { "epoch": 1.69, "learning_rate": 0.00016614158284304422, "loss": 7.9073, "step": 17840 }, { "epoch": 1.69, "learning_rate": 0.00016612260390966028, "loss": 7.8684, "step": 17850 }, { "epoch": 1.69, "learning_rate": 0.00016610362497627634, "loss": 7.9522, "step": 17860 }, { "epoch": 1.7, "learning_rate": 0.0001660846460428924, "loss": 7.7994, "step": 17870 }, { "epoch": 1.7, "learning_rate": 0.00016606566710950847, "loss": 7.9712, "step": 17880 }, { "epoch": 1.7, "learning_rate": 0.00016604668817612453, "loss": 7.9103, "step": 17890 }, { "epoch": 1.7, "learning_rate": 0.00016602770924274056, "loss": 7.8875, "step": 17900 }, { "epoch": 1.7, "learning_rate": 0.00016600873030935662, "loss": 7.9074, "step": 17910 }, { "epoch": 1.7, "learning_rate": 0.00016598975137597268, "loss": 7.8733, "step": 17920 }, { "epoch": 1.7, "learning_rate": 0.00016597077244258875, "loss": 7.8312, "step": 17930 }, { "epoch": 1.7, "learning_rate": 0.0001659517935092048, "loss": 7.869, "step": 17940 }, { "epoch": 1.7, "learning_rate": 0.00016593281457582084, "loss": 7.8955, "step": 17950 }, { "epoch": 1.7, "learning_rate": 0.0001659138356424369, "loss": 7.8527, "step": 17960 }, { "epoch": 1.71, "learning_rate": 0.00016589485670905296, "loss": 7.8674, "step": 17970 }, { "epoch": 1.71, "learning_rate": 0.00016587587777566902, "loss": 7.8768, "step": 17980 }, { "epoch": 1.71, "learning_rate": 0.00016585689884228509, "loss": 7.9197, "step": 17990 }, { "epoch": 1.71, "learning_rate": 0.00016583791990890112, "loss": 7.8606, "step": 18000 }, { "epoch": 1.71, "learning_rate": 0.00016581894097551718, "loss": 7.9081, "step": 18010 }, { "epoch": 1.71, "learning_rate": 0.00016579996204213324, "loss": 7.8936, "step": 18020 }, { "epoch": 1.71, "learning_rate": 0.0001657809831087493, "loss": 7.9135, "step": 18030 }, { "epoch": 1.71, "learning_rate": 0.00016576200417536537, "loss": 7.824, "step": 18040 }, { "epoch": 1.71, "learning_rate": 0.0001657430252419814, "loss": 7.918, "step": 18050 }, { "epoch": 1.71, "learning_rate": 0.00016572404630859746, "loss": 7.9342, "step": 18060 }, { "epoch": 1.71, "learning_rate": 0.00016570506737521352, "loss": 7.8834, "step": 18070 }, { "epoch": 1.72, "learning_rate": 0.00016568608844182958, "loss": 7.7842, "step": 18080 }, { "epoch": 1.72, "learning_rate": 0.00016566710950844564, "loss": 7.8498, "step": 18090 }, { "epoch": 1.72, "learning_rate": 0.0001656481305750617, "loss": 7.8644, "step": 18100 }, { "epoch": 1.72, "learning_rate": 0.00016562915164167774, "loss": 7.8654, "step": 18110 }, { "epoch": 1.72, "learning_rate": 0.0001656101727082938, "loss": 7.8258, "step": 18120 }, { "epoch": 1.72, "learning_rate": 0.00016559119377490986, "loss": 7.8322, "step": 18130 }, { "epoch": 1.72, "learning_rate": 0.00016557221484152592, "loss": 7.9357, "step": 18140 }, { "epoch": 1.72, "learning_rate": 0.00016555323590814198, "loss": 7.8355, "step": 18150 }, { "epoch": 1.72, "learning_rate": 0.00016553425697475802, "loss": 7.8667, "step": 18160 }, { "epoch": 1.72, "learning_rate": 0.00016551527804137408, "loss": 7.8392, "step": 18170 }, { "epoch": 1.73, "learning_rate": 0.00016549629910799014, "loss": 7.8973, "step": 18180 }, { "epoch": 1.73, "learning_rate": 0.0001654773201746062, "loss": 7.9184, "step": 18190 }, { "epoch": 1.73, "learning_rate": 0.00016545834124122226, "loss": 7.8068, "step": 18200 }, { "epoch": 1.73, "learning_rate": 0.0001654393623078383, "loss": 7.9054, "step": 18210 }, { "epoch": 1.73, "learning_rate": 0.00016542038337445436, "loss": 7.8072, "step": 18220 }, { "epoch": 1.73, "learning_rate": 0.00016540140444107042, "loss": 7.8678, "step": 18230 }, { "epoch": 1.73, "learning_rate": 0.00016538242550768648, "loss": 7.9126, "step": 18240 }, { "epoch": 1.73, "learning_rate": 0.00016536344657430254, "loss": 7.9272, "step": 18250 }, { "epoch": 1.73, "learning_rate": 0.0001653444676409186, "loss": 7.9315, "step": 18260 }, { "epoch": 1.73, "learning_rate": 0.00016532548870753464, "loss": 7.8238, "step": 18270 }, { "epoch": 1.73, "learning_rate": 0.0001653065097741507, "loss": 7.8731, "step": 18280 }, { "epoch": 1.74, "learning_rate": 0.00016528753084076676, "loss": 7.8416, "step": 18290 }, { "epoch": 1.74, "learning_rate": 0.00016526855190738282, "loss": 7.9475, "step": 18300 }, { "epoch": 1.74, "learning_rate": 0.00016524957297399888, "loss": 7.9196, "step": 18310 }, { "epoch": 1.74, "learning_rate": 0.00016523059404061492, "loss": 7.8805, "step": 18320 }, { "epoch": 1.74, "learning_rate": 0.00016521161510723098, "loss": 7.8936, "step": 18330 }, { "epoch": 1.74, "learning_rate": 0.00016519263617384704, "loss": 7.8933, "step": 18340 }, { "epoch": 1.74, "learning_rate": 0.0001651736572404631, "loss": 7.8424, "step": 18350 }, { "epoch": 1.74, "learning_rate": 0.00016515467830707916, "loss": 7.9242, "step": 18360 }, { "epoch": 1.74, "learning_rate": 0.0001651356993736952, "loss": 7.8339, "step": 18370 }, { "epoch": 1.74, "learning_rate": 0.00016511672044031126, "loss": 8.0267, "step": 18380 }, { "epoch": 1.75, "learning_rate": 0.00016509774150692732, "loss": 7.9168, "step": 18390 }, { "epoch": 1.75, "learning_rate": 0.00016507876257354338, "loss": 7.8347, "step": 18400 }, { "epoch": 1.75, "learning_rate": 0.00016505978364015944, "loss": 7.8527, "step": 18410 }, { "epoch": 1.75, "learning_rate": 0.0001650408047067755, "loss": 7.8267, "step": 18420 }, { "epoch": 1.75, "learning_rate": 0.00016502182577339154, "loss": 7.813, "step": 18430 }, { "epoch": 1.75, "learning_rate": 0.0001650028468400076, "loss": 7.8132, "step": 18440 }, { "epoch": 1.75, "learning_rate": 0.00016498386790662366, "loss": 7.9144, "step": 18450 }, { "epoch": 1.75, "learning_rate": 0.00016496488897323972, "loss": 7.8411, "step": 18460 }, { "epoch": 1.75, "learning_rate": 0.00016494591003985578, "loss": 7.8303, "step": 18470 }, { "epoch": 1.75, "learning_rate": 0.00016492693110647182, "loss": 7.8463, "step": 18480 }, { "epoch": 1.75, "learning_rate": 0.00016490795217308788, "loss": 7.8983, "step": 18490 }, { "epoch": 1.76, "learning_rate": 0.00016488897323970394, "loss": 7.8702, "step": 18500 }, { "epoch": 1.76, "learning_rate": 0.00016486999430632, "loss": 7.7592, "step": 18510 }, { "epoch": 1.76, "learning_rate": 0.00016485101537293606, "loss": 7.7817, "step": 18520 }, { "epoch": 1.76, "learning_rate": 0.0001648320364395521, "loss": 7.845, "step": 18530 }, { "epoch": 1.76, "learning_rate": 0.00016481305750616816, "loss": 7.8269, "step": 18540 }, { "epoch": 1.76, "learning_rate": 0.00016479407857278422, "loss": 7.8083, "step": 18550 }, { "epoch": 1.76, "learning_rate": 0.00016477509963940028, "loss": 7.8609, "step": 18560 }, { "epoch": 1.76, "learning_rate": 0.00016475612070601634, "loss": 7.7417, "step": 18570 }, { "epoch": 1.76, "learning_rate": 0.00016473714177263238, "loss": 7.8818, "step": 18580 }, { "epoch": 1.76, "learning_rate": 0.00016471816283924844, "loss": 7.8636, "step": 18590 }, { "epoch": 1.77, "learning_rate": 0.0001646991839058645, "loss": 7.8993, "step": 18600 }, { "epoch": 1.77, "learning_rate": 0.00016468020497248056, "loss": 7.9056, "step": 18610 }, { "epoch": 1.77, "learning_rate": 0.00016466122603909662, "loss": 7.852, "step": 18620 }, { "epoch": 1.77, "learning_rate": 0.00016464224710571268, "loss": 7.8672, "step": 18630 }, { "epoch": 1.77, "learning_rate": 0.00016462326817232872, "loss": 7.8413, "step": 18640 }, { "epoch": 1.77, "learning_rate": 0.00016460428923894478, "loss": 7.8847, "step": 18650 }, { "epoch": 1.77, "learning_rate": 0.00016458531030556084, "loss": 7.8263, "step": 18660 }, { "epoch": 1.77, "learning_rate": 0.0001645663313721769, "loss": 7.8653, "step": 18670 }, { "epoch": 1.77, "learning_rate": 0.00016454735243879296, "loss": 7.857, "step": 18680 }, { "epoch": 1.77, "learning_rate": 0.000164528373505409, "loss": 8.0016, "step": 18690 }, { "epoch": 1.77, "learning_rate": 0.00016450939457202506, "loss": 7.8949, "step": 18700 }, { "epoch": 1.78, "learning_rate": 0.00016449041563864112, "loss": 7.8634, "step": 18710 }, { "epoch": 1.78, "learning_rate": 0.00016447143670525718, "loss": 7.8651, "step": 18720 }, { "epoch": 1.78, "learning_rate": 0.00016445245777187324, "loss": 7.8809, "step": 18730 }, { "epoch": 1.78, "learning_rate": 0.00016443347883848928, "loss": 7.8964, "step": 18740 }, { "epoch": 1.78, "learning_rate": 0.00016441449990510534, "loss": 7.8104, "step": 18750 }, { "epoch": 1.78, "learning_rate": 0.0001643955209717214, "loss": 7.911, "step": 18760 }, { "epoch": 1.78, "learning_rate": 0.00016437654203833746, "loss": 7.8607, "step": 18770 }, { "epoch": 1.78, "learning_rate": 0.00016435756310495352, "loss": 7.8043, "step": 18780 }, { "epoch": 1.78, "learning_rate": 0.00016433858417156958, "loss": 7.9543, "step": 18790 }, { "epoch": 1.78, "learning_rate": 0.00016431960523818562, "loss": 7.884, "step": 18800 }, { "epoch": 1.78, "learning_rate": 0.00016430062630480168, "loss": 7.9173, "step": 18810 }, { "epoch": 1.79, "learning_rate": 0.00016428164737141774, "loss": 7.7703, "step": 18820 }, { "epoch": 1.79, "learning_rate": 0.0001642626684380338, "loss": 7.8585, "step": 18830 }, { "epoch": 1.79, "learning_rate": 0.00016424368950464986, "loss": 7.9445, "step": 18840 }, { "epoch": 1.79, "learning_rate": 0.0001642247105712659, "loss": 7.9027, "step": 18850 }, { "epoch": 1.79, "learning_rate": 0.00016420573163788196, "loss": 7.8269, "step": 18860 }, { "epoch": 1.79, "learning_rate": 0.00016418675270449802, "loss": 7.8792, "step": 18870 }, { "epoch": 1.79, "learning_rate": 0.00016416777377111408, "loss": 7.9192, "step": 18880 }, { "epoch": 1.79, "learning_rate": 0.00016414879483773014, "loss": 7.9763, "step": 18890 }, { "epoch": 1.79, "learning_rate": 0.00016412981590434617, "loss": 7.8849, "step": 18900 }, { "epoch": 1.79, "learning_rate": 0.00016411083697096224, "loss": 7.8498, "step": 18910 }, { "epoch": 1.8, "learning_rate": 0.0001640918580375783, "loss": 7.807, "step": 18920 }, { "epoch": 1.8, "learning_rate": 0.00016407287910419436, "loss": 7.8959, "step": 18930 }, { "epoch": 1.8, "learning_rate": 0.00016405390017081042, "loss": 7.8704, "step": 18940 }, { "epoch": 1.8, "learning_rate": 0.00016403492123742648, "loss": 7.842, "step": 18950 }, { "epoch": 1.8, "learning_rate": 0.00016401594230404251, "loss": 7.8434, "step": 18960 }, { "epoch": 1.8, "learning_rate": 0.00016399696337065858, "loss": 7.8772, "step": 18970 }, { "epoch": 1.8, "learning_rate": 0.00016397798443727464, "loss": 7.9123, "step": 18980 }, { "epoch": 1.8, "learning_rate": 0.0001639590055038907, "loss": 7.8603, "step": 18990 }, { "epoch": 1.8, "learning_rate": 0.00016394002657050676, "loss": 7.871, "step": 19000 }, { "epoch": 1.8, "learning_rate": 0.0001639210476371228, "loss": 7.7392, "step": 19010 }, { "epoch": 1.8, "learning_rate": 0.00016390206870373886, "loss": 7.8537, "step": 19020 }, { "epoch": 1.81, "learning_rate": 0.00016388308977035492, "loss": 7.9253, "step": 19030 }, { "epoch": 1.81, "learning_rate": 0.00016386411083697098, "loss": 7.8189, "step": 19040 }, { "epoch": 1.81, "learning_rate": 0.00016384513190358704, "loss": 7.8834, "step": 19050 }, { "epoch": 1.81, "learning_rate": 0.00016382615297020307, "loss": 7.8735, "step": 19060 }, { "epoch": 1.81, "learning_rate": 0.00016380717403681913, "loss": 7.952, "step": 19070 }, { "epoch": 1.81, "learning_rate": 0.0001637881951034352, "loss": 7.7672, "step": 19080 }, { "epoch": 1.81, "learning_rate": 0.00016376921617005126, "loss": 7.88, "step": 19090 }, { "epoch": 1.81, "learning_rate": 0.00016375023723666732, "loss": 7.8417, "step": 19100 }, { "epoch": 1.81, "learning_rate": 0.00016373125830328335, "loss": 7.8759, "step": 19110 }, { "epoch": 1.81, "learning_rate": 0.00016371227936989941, "loss": 7.9401, "step": 19120 }, { "epoch": 1.82, "learning_rate": 0.00016369330043651548, "loss": 7.7792, "step": 19130 }, { "epoch": 1.82, "learning_rate": 0.00016367432150313154, "loss": 7.904, "step": 19140 }, { "epoch": 1.82, "learning_rate": 0.0001636553425697476, "loss": 7.8967, "step": 19150 }, { "epoch": 1.82, "learning_rate": 0.00016363636363636366, "loss": 7.9611, "step": 19160 }, { "epoch": 1.82, "learning_rate": 0.0001636173847029797, "loss": 7.8921, "step": 19170 }, { "epoch": 1.82, "learning_rate": 0.00016359840576959575, "loss": 7.9201, "step": 19180 }, { "epoch": 1.82, "learning_rate": 0.00016357942683621182, "loss": 7.9224, "step": 19190 }, { "epoch": 1.82, "learning_rate": 0.00016356044790282788, "loss": 7.8683, "step": 19200 }, { "epoch": 1.82, "learning_rate": 0.00016354146896944394, "loss": 7.9341, "step": 19210 }, { "epoch": 1.82, "learning_rate": 0.00016352249003605997, "loss": 7.9042, "step": 19220 }, { "epoch": 1.82, "learning_rate": 0.00016350351110267603, "loss": 7.8717, "step": 19230 }, { "epoch": 1.83, "learning_rate": 0.0001634845321692921, "loss": 7.9177, "step": 19240 }, { "epoch": 1.83, "learning_rate": 0.00016346555323590816, "loss": 7.9534, "step": 19250 }, { "epoch": 1.83, "learning_rate": 0.00016344657430252422, "loss": 7.8696, "step": 19260 }, { "epoch": 1.83, "learning_rate": 0.00016342759536914025, "loss": 7.857, "step": 19270 }, { "epoch": 1.83, "learning_rate": 0.0001634086164357563, "loss": 7.8828, "step": 19280 }, { "epoch": 1.83, "learning_rate": 0.00016338963750237237, "loss": 7.828, "step": 19290 }, { "epoch": 1.83, "learning_rate": 0.00016337065856898844, "loss": 7.9118, "step": 19300 }, { "epoch": 1.83, "learning_rate": 0.0001633516796356045, "loss": 7.8622, "step": 19310 }, { "epoch": 1.83, "learning_rate": 0.00016333270070222056, "loss": 7.8215, "step": 19320 }, { "epoch": 1.83, "learning_rate": 0.0001633137217688366, "loss": 7.9519, "step": 19330 }, { "epoch": 1.84, "learning_rate": 0.00016329474283545265, "loss": 7.8305, "step": 19340 }, { "epoch": 1.84, "learning_rate": 0.00016327576390206871, "loss": 7.9268, "step": 19350 }, { "epoch": 1.84, "learning_rate": 0.00016325678496868478, "loss": 7.8265, "step": 19360 }, { "epoch": 1.84, "learning_rate": 0.00016323780603530084, "loss": 7.8933, "step": 19370 }, { "epoch": 1.84, "learning_rate": 0.00016321882710191687, "loss": 7.8313, "step": 19380 }, { "epoch": 1.84, "learning_rate": 0.00016319984816853293, "loss": 7.8801, "step": 19390 }, { "epoch": 1.84, "learning_rate": 0.000163180869235149, "loss": 7.9039, "step": 19400 }, { "epoch": 1.84, "learning_rate": 0.00016316189030176506, "loss": 7.8568, "step": 19410 }, { "epoch": 1.84, "learning_rate": 0.00016314291136838112, "loss": 7.8692, "step": 19420 }, { "epoch": 1.84, "learning_rate": 0.00016312393243499715, "loss": 7.9184, "step": 19430 }, { "epoch": 1.84, "learning_rate": 0.0001631049535016132, "loss": 7.8313, "step": 19440 }, { "epoch": 1.85, "learning_rate": 0.00016308597456822927, "loss": 7.8239, "step": 19450 }, { "epoch": 1.85, "learning_rate": 0.00016306699563484533, "loss": 7.9093, "step": 19460 }, { "epoch": 1.85, "learning_rate": 0.0001630480167014614, "loss": 7.7581, "step": 19470 }, { "epoch": 1.85, "learning_rate": 0.00016302903776807746, "loss": 8.0268, "step": 19480 }, { "epoch": 1.85, "learning_rate": 0.0001630100588346935, "loss": 7.8394, "step": 19490 }, { "epoch": 1.85, "learning_rate": 0.00016299107990130955, "loss": 7.8193, "step": 19500 }, { "epoch": 1.85, "learning_rate": 0.00016297210096792561, "loss": 7.9236, "step": 19510 }, { "epoch": 1.85, "learning_rate": 0.00016295312203454168, "loss": 7.963, "step": 19520 }, { "epoch": 1.85, "learning_rate": 0.00016293414310115774, "loss": 7.9338, "step": 19530 }, { "epoch": 1.85, "learning_rate": 0.00016291516416777377, "loss": 7.8894, "step": 19540 }, { "epoch": 1.86, "learning_rate": 0.00016289618523438983, "loss": 7.9377, "step": 19550 }, { "epoch": 1.86, "learning_rate": 0.0001628772063010059, "loss": 7.8134, "step": 19560 }, { "epoch": 1.86, "learning_rate": 0.00016285822736762195, "loss": 7.9315, "step": 19570 }, { "epoch": 1.86, "learning_rate": 0.00016283924843423802, "loss": 7.8767, "step": 19580 }, { "epoch": 1.86, "learning_rate": 0.00016282026950085405, "loss": 7.7828, "step": 19590 }, { "epoch": 1.86, "learning_rate": 0.0001628012905674701, "loss": 7.8605, "step": 19600 }, { "epoch": 1.86, "learning_rate": 0.00016278231163408617, "loss": 7.7621, "step": 19610 }, { "epoch": 1.86, "learning_rate": 0.00016276333270070223, "loss": 7.8432, "step": 19620 }, { "epoch": 1.86, "learning_rate": 0.0001627443537673183, "loss": 7.898, "step": 19630 }, { "epoch": 1.86, "learning_rate": 0.00016272537483393433, "loss": 7.9226, "step": 19640 }, { "epoch": 1.86, "learning_rate": 0.0001627063959005504, "loss": 7.8093, "step": 19650 }, { "epoch": 1.87, "learning_rate": 0.00016268741696716645, "loss": 7.856, "step": 19660 }, { "epoch": 1.87, "learning_rate": 0.0001626684380337825, "loss": 7.9024, "step": 19670 }, { "epoch": 1.87, "learning_rate": 0.00016264945910039857, "loss": 7.9161, "step": 19680 }, { "epoch": 1.87, "learning_rate": 0.00016263048016701464, "loss": 7.893, "step": 19690 }, { "epoch": 1.87, "learning_rate": 0.00016261150123363067, "loss": 7.8729, "step": 19700 }, { "epoch": 1.87, "learning_rate": 0.00016259252230024673, "loss": 7.8657, "step": 19710 }, { "epoch": 1.87, "learning_rate": 0.0001625735433668628, "loss": 7.9963, "step": 19720 }, { "epoch": 1.87, "learning_rate": 0.00016255456443347885, "loss": 7.877, "step": 19730 }, { "epoch": 1.87, "learning_rate": 0.00016253558550009491, "loss": 7.868, "step": 19740 }, { "epoch": 1.87, "learning_rate": 0.00016251660656671095, "loss": 7.8692, "step": 19750 }, { "epoch": 1.88, "learning_rate": 0.000162497627633327, "loss": 7.9183, "step": 19760 }, { "epoch": 1.88, "learning_rate": 0.00016247864869994307, "loss": 7.8108, "step": 19770 }, { "epoch": 1.88, "learning_rate": 0.00016245966976655913, "loss": 7.7614, "step": 19780 }, { "epoch": 1.88, "learning_rate": 0.0001624406908331752, "loss": 7.9221, "step": 19790 }, { "epoch": 1.88, "learning_rate": 0.00016242171189979123, "loss": 7.959, "step": 19800 }, { "epoch": 1.88, "learning_rate": 0.0001624027329664073, "loss": 7.8201, "step": 19810 }, { "epoch": 1.88, "learning_rate": 0.00016238375403302335, "loss": 7.8931, "step": 19820 }, { "epoch": 1.88, "learning_rate": 0.0001623647750996394, "loss": 7.7885, "step": 19830 }, { "epoch": 1.88, "learning_rate": 0.00016234579616625547, "loss": 7.937, "step": 19840 }, { "epoch": 1.88, "learning_rate": 0.00016232681723287153, "loss": 7.8538, "step": 19850 }, { "epoch": 1.88, "learning_rate": 0.00016230783829948757, "loss": 7.9476, "step": 19860 }, { "epoch": 1.89, "learning_rate": 0.00016228885936610363, "loss": 7.8243, "step": 19870 }, { "epoch": 1.89, "learning_rate": 0.0001622698804327197, "loss": 7.9034, "step": 19880 }, { "epoch": 1.89, "learning_rate": 0.00016225090149933575, "loss": 7.8047, "step": 19890 }, { "epoch": 1.89, "learning_rate": 0.00016223192256595181, "loss": 7.9023, "step": 19900 }, { "epoch": 1.89, "learning_rate": 0.00016221294363256785, "loss": 7.7868, "step": 19910 }, { "epoch": 1.89, "learning_rate": 0.0001621939646991839, "loss": 7.8242, "step": 19920 }, { "epoch": 1.89, "learning_rate": 0.00016217498576579997, "loss": 7.8165, "step": 19930 }, { "epoch": 1.89, "learning_rate": 0.00016215600683241603, "loss": 7.9508, "step": 19940 }, { "epoch": 1.89, "learning_rate": 0.0001621370278990321, "loss": 7.8908, "step": 19950 }, { "epoch": 1.89, "learning_rate": 0.00016211804896564813, "loss": 7.892, "step": 19960 }, { "epoch": 1.9, "learning_rate": 0.0001620990700322642, "loss": 7.8592, "step": 19970 }, { "epoch": 1.9, "learning_rate": 0.00016208009109888025, "loss": 7.9194, "step": 19980 }, { "epoch": 1.9, "learning_rate": 0.0001620611121654963, "loss": 7.8351, "step": 19990 }, { "epoch": 1.9, "learning_rate": 0.00016204213323211237, "loss": 7.9319, "step": 20000 }, { "epoch": 1.9, "learning_rate": 0.00016202315429872843, "loss": 7.8847, "step": 20010 }, { "epoch": 1.9, "learning_rate": 0.00016200417536534447, "loss": 7.8301, "step": 20020 }, { "epoch": 1.9, "learning_rate": 0.00016198519643196053, "loss": 7.8568, "step": 20030 }, { "epoch": 1.9, "learning_rate": 0.0001619662174985766, "loss": 7.8261, "step": 20040 }, { "epoch": 1.9, "learning_rate": 0.00016194723856519265, "loss": 8.0282, "step": 20050 }, { "epoch": 1.9, "learning_rate": 0.0001619282596318087, "loss": 7.9086, "step": 20060 }, { "epoch": 1.9, "learning_rate": 0.00016190928069842475, "loss": 7.9526, "step": 20070 }, { "epoch": 1.91, "learning_rate": 0.0001618903017650408, "loss": 7.7902, "step": 20080 }, { "epoch": 1.91, "learning_rate": 0.00016187132283165687, "loss": 7.8771, "step": 20090 }, { "epoch": 1.91, "learning_rate": 0.00016185234389827293, "loss": 7.8308, "step": 20100 }, { "epoch": 1.91, "learning_rate": 0.000161833364964889, "loss": 7.7032, "step": 20110 }, { "epoch": 1.91, "learning_rate": 0.00016181438603150503, "loss": 7.8337, "step": 20120 }, { "epoch": 1.91, "learning_rate": 0.0001617954070981211, "loss": 7.8936, "step": 20130 }, { "epoch": 1.91, "learning_rate": 0.00016177642816473715, "loss": 7.9296, "step": 20140 }, { "epoch": 1.91, "learning_rate": 0.0001617574492313532, "loss": 7.9812, "step": 20150 }, { "epoch": 1.91, "learning_rate": 0.00016173847029796927, "loss": 7.9834, "step": 20160 }, { "epoch": 1.91, "learning_rate": 0.0001617194913645853, "loss": 7.8294, "step": 20170 }, { "epoch": 1.91, "learning_rate": 0.00016170051243120137, "loss": 7.8923, "step": 20180 }, { "epoch": 1.92, "learning_rate": 0.00016168153349781743, "loss": 7.8267, "step": 20190 }, { "epoch": 1.92, "learning_rate": 0.0001616625545644335, "loss": 7.9175, "step": 20200 }, { "epoch": 1.92, "learning_rate": 0.00016164357563104955, "loss": 7.7191, "step": 20210 }, { "epoch": 1.92, "learning_rate": 0.0001616245966976656, "loss": 7.8315, "step": 20220 }, { "epoch": 1.92, "learning_rate": 0.00016160561776428165, "loss": 7.8252, "step": 20230 }, { "epoch": 1.92, "learning_rate": 0.0001615866388308977, "loss": 7.872, "step": 20240 }, { "epoch": 1.92, "learning_rate": 0.00016156765989751377, "loss": 7.8921, "step": 20250 }, { "epoch": 1.92, "learning_rate": 0.00016154868096412983, "loss": 7.8788, "step": 20260 }, { "epoch": 1.92, "learning_rate": 0.0001615297020307459, "loss": 7.8861, "step": 20270 }, { "epoch": 1.92, "learning_rate": 0.00016151072309736193, "loss": 7.8892, "step": 20280 }, { "epoch": 1.93, "learning_rate": 0.000161491744163978, "loss": 7.8554, "step": 20290 }, { "epoch": 1.93, "learning_rate": 0.00016147276523059405, "loss": 7.9014, "step": 20300 }, { "epoch": 1.93, "learning_rate": 0.0001614537862972101, "loss": 7.8427, "step": 20310 }, { "epoch": 1.93, "learning_rate": 0.00016143480736382617, "loss": 7.8983, "step": 20320 }, { "epoch": 1.93, "learning_rate": 0.0001614158284304422, "loss": 7.8983, "step": 20330 }, { "epoch": 1.93, "learning_rate": 0.00016139684949705827, "loss": 7.7865, "step": 20340 }, { "epoch": 1.93, "learning_rate": 0.00016137787056367433, "loss": 7.8339, "step": 20350 }, { "epoch": 1.93, "learning_rate": 0.0001613588916302904, "loss": 7.8371, "step": 20360 }, { "epoch": 1.93, "learning_rate": 0.00016133991269690645, "loss": 7.7893, "step": 20370 }, { "epoch": 1.93, "learning_rate": 0.0001613209337635225, "loss": 7.9403, "step": 20380 }, { "epoch": 1.93, "learning_rate": 0.00016130195483013855, "loss": 7.886, "step": 20390 }, { "epoch": 1.94, "learning_rate": 0.0001612829758967546, "loss": 7.8127, "step": 20400 }, { "epoch": 1.94, "learning_rate": 0.00016126399696337067, "loss": 7.8164, "step": 20410 }, { "epoch": 1.94, "learning_rate": 0.00016124501802998673, "loss": 7.8491, "step": 20420 }, { "epoch": 1.94, "learning_rate": 0.0001612260390966028, "loss": 7.7832, "step": 20430 }, { "epoch": 1.94, "learning_rate": 0.00016120706016321882, "loss": 7.8549, "step": 20440 }, { "epoch": 1.94, "learning_rate": 0.00016118808122983489, "loss": 8.0095, "step": 20450 }, { "epoch": 1.94, "learning_rate": 0.00016116910229645095, "loss": 7.7896, "step": 20460 }, { "epoch": 1.94, "learning_rate": 0.000161150123363067, "loss": 7.8677, "step": 20470 }, { "epoch": 1.94, "learning_rate": 0.00016113114442968307, "loss": 7.8216, "step": 20480 }, { "epoch": 1.94, "learning_rate": 0.0001611121654962991, "loss": 7.7524, "step": 20490 }, { "epoch": 1.95, "learning_rate": 0.00016109318656291517, "loss": 7.78, "step": 20500 }, { "epoch": 1.95, "learning_rate": 0.00016107420762953123, "loss": 7.9191, "step": 20510 }, { "epoch": 1.95, "learning_rate": 0.0001610552286961473, "loss": 7.8323, "step": 20520 }, { "epoch": 1.95, "learning_rate": 0.00016103624976276335, "loss": 7.7972, "step": 20530 }, { "epoch": 1.95, "learning_rate": 0.0001610172708293794, "loss": 7.8524, "step": 20540 }, { "epoch": 1.95, "learning_rate": 0.00016099829189599544, "loss": 7.8708, "step": 20550 }, { "epoch": 1.95, "learning_rate": 0.0001609793129626115, "loss": 7.8355, "step": 20560 }, { "epoch": 1.95, "learning_rate": 0.00016096033402922757, "loss": 7.8517, "step": 20570 }, { "epoch": 1.95, "learning_rate": 0.00016094135509584363, "loss": 7.874, "step": 20580 }, { "epoch": 1.95, "learning_rate": 0.0001609223761624597, "loss": 7.7804, "step": 20590 }, { "epoch": 1.95, "learning_rate": 0.00016090339722907572, "loss": 7.8556, "step": 20600 }, { "epoch": 1.96, "learning_rate": 0.00016088441829569179, "loss": 7.9106, "step": 20610 }, { "epoch": 1.96, "learning_rate": 0.00016086543936230785, "loss": 7.9515, "step": 20620 }, { "epoch": 1.96, "learning_rate": 0.0001608464604289239, "loss": 7.8692, "step": 20630 }, { "epoch": 1.96, "learning_rate": 0.00016082748149553997, "loss": 7.8373, "step": 20640 }, { "epoch": 1.96, "learning_rate": 0.000160808502562156, "loss": 7.867, "step": 20650 }, { "epoch": 1.96, "learning_rate": 0.00016078952362877206, "loss": 7.8678, "step": 20660 }, { "epoch": 1.96, "learning_rate": 0.00016077054469538813, "loss": 7.8488, "step": 20670 }, { "epoch": 1.96, "learning_rate": 0.0001607515657620042, "loss": 7.8678, "step": 20680 }, { "epoch": 1.96, "learning_rate": 0.00016073258682862025, "loss": 7.8862, "step": 20690 }, { "epoch": 1.96, "learning_rate": 0.00016071360789523628, "loss": 7.862, "step": 20700 }, { "epoch": 1.97, "learning_rate": 0.00016069462896185234, "loss": 7.7964, "step": 20710 }, { "epoch": 1.97, "learning_rate": 0.0001606756500284684, "loss": 7.8565, "step": 20720 }, { "epoch": 1.97, "learning_rate": 0.00016065667109508447, "loss": 7.9738, "step": 20730 }, { "epoch": 1.97, "learning_rate": 0.00016063769216170053, "loss": 7.9275, "step": 20740 }, { "epoch": 1.97, "learning_rate": 0.0001606187132283166, "loss": 7.8584, "step": 20750 }, { "epoch": 1.97, "learning_rate": 0.00016059973429493262, "loss": 7.8415, "step": 20760 }, { "epoch": 1.97, "learning_rate": 0.00016058075536154868, "loss": 7.9181, "step": 20770 }, { "epoch": 1.97, "learning_rate": 0.00016056177642816475, "loss": 7.7571, "step": 20780 }, { "epoch": 1.97, "learning_rate": 0.0001605427974947808, "loss": 7.9947, "step": 20790 }, { "epoch": 1.97, "learning_rate": 0.00016052381856139687, "loss": 7.9309, "step": 20800 }, { "epoch": 1.97, "learning_rate": 0.0001605048396280129, "loss": 7.788, "step": 20810 }, { "epoch": 1.98, "learning_rate": 0.00016048586069462896, "loss": 7.9506, "step": 20820 }, { "epoch": 1.98, "learning_rate": 0.00016046688176124502, "loss": 7.9019, "step": 20830 }, { "epoch": 1.98, "learning_rate": 0.00016044790282786109, "loss": 7.9267, "step": 20840 }, { "epoch": 1.98, "learning_rate": 0.00016042892389447715, "loss": 7.8396, "step": 20850 }, { "epoch": 1.98, "learning_rate": 0.00016040994496109318, "loss": 7.8808, "step": 20860 }, { "epoch": 1.98, "learning_rate": 0.00016039096602770924, "loss": 7.8719, "step": 20870 }, { "epoch": 1.98, "learning_rate": 0.0001603719870943253, "loss": 7.8729, "step": 20880 }, { "epoch": 1.98, "learning_rate": 0.00016035300816094137, "loss": 7.8936, "step": 20890 }, { "epoch": 1.98, "learning_rate": 0.00016033402922755743, "loss": 7.8068, "step": 20900 }, { "epoch": 1.98, "learning_rate": 0.0001603150502941735, "loss": 7.8761, "step": 20910 }, { "epoch": 1.99, "learning_rate": 0.00016029607136078952, "loss": 7.8925, "step": 20920 }, { "epoch": 1.99, "learning_rate": 0.00016027709242740558, "loss": 7.7976, "step": 20930 }, { "epoch": 1.99, "learning_rate": 0.00016025811349402164, "loss": 7.9489, "step": 20940 }, { "epoch": 1.99, "learning_rate": 0.0001602391345606377, "loss": 7.9128, "step": 20950 }, { "epoch": 1.99, "learning_rate": 0.00016022015562725377, "loss": 7.8393, "step": 20960 }, { "epoch": 1.99, "learning_rate": 0.0001602011766938698, "loss": 7.7641, "step": 20970 }, { "epoch": 1.99, "learning_rate": 0.00016018219776048586, "loss": 7.946, "step": 20980 }, { "epoch": 1.99, "learning_rate": 0.00016016321882710192, "loss": 7.8674, "step": 20990 }, { "epoch": 1.99, "learning_rate": 0.00016014423989371799, "loss": 7.889, "step": 21000 }, { "epoch": 1.99, "learning_rate": 0.00016012526096033405, "loss": 7.9576, "step": 21010 }, { "epoch": 1.99, "learning_rate": 0.00016010628202695008, "loss": 7.7703, "step": 21020 }, { "epoch": 2.0, "learning_rate": 0.00016008730309356614, "loss": 7.921, "step": 21030 }, { "epoch": 2.0, "learning_rate": 0.0001600683241601822, "loss": 7.906, "step": 21040 }, { "epoch": 2.0, "learning_rate": 0.00016004934522679826, "loss": 7.8525, "step": 21050 }, { "epoch": 2.0, "learning_rate": 0.00016003036629341433, "loss": 7.8645, "step": 21060 }, { "epoch": 2.0, "learning_rate": 0.0001600113873600304, "loss": 7.8351, "step": 21070 }, { "epoch": 2.0, "learning_rate": 0.00015999240842664642, "loss": 7.9053, "step": 21080 }, { "epoch": 2.0, "learning_rate": 0.00015997342949326248, "loss": 7.9325, "step": 21090 }, { "epoch": 2.0, "learning_rate": 0.00015995445055987854, "loss": 7.9268, "step": 21100 }, { "epoch": 2.0, "learning_rate": 0.0001599354716264946, "loss": 7.8665, "step": 21110 }, { "epoch": 2.0, "learning_rate": 0.00015991649269311067, "loss": 7.876, "step": 21120 }, { "epoch": 2.01, "learning_rate": 0.0001598975137597267, "loss": 7.7425, "step": 21130 }, { "epoch": 2.01, "learning_rate": 0.00015987853482634276, "loss": 7.8238, "step": 21140 }, { "epoch": 2.01, "learning_rate": 0.00015985955589295882, "loss": 7.8337, "step": 21150 }, { "epoch": 2.01, "learning_rate": 0.00015984057695957488, "loss": 7.8954, "step": 21160 }, { "epoch": 2.01, "learning_rate": 0.00015982159802619095, "loss": 7.9015, "step": 21170 }, { "epoch": 2.01, "learning_rate": 0.00015980261909280698, "loss": 7.8966, "step": 21180 }, { "epoch": 2.01, "learning_rate": 0.00015978364015942304, "loss": 7.9607, "step": 21190 }, { "epoch": 2.01, "learning_rate": 0.0001597646612260391, "loss": 8.0079, "step": 21200 }, { "epoch": 2.01, "learning_rate": 0.00015974568229265516, "loss": 7.8708, "step": 21210 }, { "epoch": 2.01, "learning_rate": 0.00015972670335927123, "loss": 7.9371, "step": 21220 }, { "epoch": 2.01, "learning_rate": 0.00015970772442588726, "loss": 7.8218, "step": 21230 }, { "epoch": 2.02, "learning_rate": 0.00015968874549250332, "loss": 7.819, "step": 21240 }, { "epoch": 2.02, "learning_rate": 0.00015966976655911938, "loss": 7.8379, "step": 21250 }, { "epoch": 2.02, "learning_rate": 0.00015965078762573544, "loss": 7.9082, "step": 21260 }, { "epoch": 2.02, "learning_rate": 0.0001596318086923515, "loss": 7.7326, "step": 21270 }, { "epoch": 2.02, "learning_rate": 0.00015961282975896757, "loss": 7.8548, "step": 21280 }, { "epoch": 2.02, "learning_rate": 0.0001595938508255836, "loss": 7.9232, "step": 21290 }, { "epoch": 2.02, "learning_rate": 0.00015957487189219966, "loss": 8.0124, "step": 21300 }, { "epoch": 2.02, "learning_rate": 0.00015955589295881572, "loss": 7.7575, "step": 21310 }, { "epoch": 2.02, "learning_rate": 0.00015953691402543178, "loss": 7.7923, "step": 21320 }, { "epoch": 2.02, "learning_rate": 0.00015951793509204784, "loss": 7.8825, "step": 21330 }, { "epoch": 2.03, "learning_rate": 0.00015949895615866388, "loss": 7.9135, "step": 21340 }, { "epoch": 2.03, "learning_rate": 0.00015947997722527994, "loss": 7.8871, "step": 21350 }, { "epoch": 2.03, "learning_rate": 0.000159460998291896, "loss": 7.8631, "step": 21360 }, { "epoch": 2.03, "learning_rate": 0.00015944201935851206, "loss": 7.9616, "step": 21370 }, { "epoch": 2.03, "learning_rate": 0.00015942304042512812, "loss": 7.8579, "step": 21380 }, { "epoch": 2.03, "learning_rate": 0.00015940406149174416, "loss": 7.8539, "step": 21390 }, { "epoch": 2.03, "learning_rate": 0.00015938508255836022, "loss": 7.8704, "step": 21400 }, { "epoch": 2.03, "learning_rate": 0.00015936610362497628, "loss": 7.8644, "step": 21410 }, { "epoch": 2.03, "learning_rate": 0.00015934712469159234, "loss": 7.8173, "step": 21420 }, { "epoch": 2.03, "learning_rate": 0.0001593281457582084, "loss": 7.8461, "step": 21430 }, { "epoch": 2.03, "learning_rate": 0.00015930916682482446, "loss": 7.8491, "step": 21440 }, { "epoch": 2.04, "learning_rate": 0.0001592901878914405, "loss": 7.8831, "step": 21450 }, { "epoch": 2.04, "learning_rate": 0.00015927120895805656, "loss": 7.878, "step": 21460 }, { "epoch": 2.04, "learning_rate": 0.00015925223002467262, "loss": 7.7975, "step": 21470 }, { "epoch": 2.04, "learning_rate": 0.00015923325109128868, "loss": 7.8669, "step": 21480 }, { "epoch": 2.04, "learning_rate": 0.00015921427215790474, "loss": 7.8426, "step": 21490 }, { "epoch": 2.04, "learning_rate": 0.00015919529322452078, "loss": 7.7905, "step": 21500 }, { "epoch": 2.04, "learning_rate": 0.00015917631429113684, "loss": 7.8559, "step": 21510 }, { "epoch": 2.04, "learning_rate": 0.0001591573353577529, "loss": 7.9196, "step": 21520 }, { "epoch": 2.04, "learning_rate": 0.00015913835642436896, "loss": 7.8966, "step": 21530 }, { "epoch": 2.04, "learning_rate": 0.00015911937749098502, "loss": 7.9587, "step": 21540 }, { "epoch": 2.04, "learning_rate": 0.00015910039855760106, "loss": 7.904, "step": 21550 }, { "epoch": 2.05, "learning_rate": 0.00015908141962421712, "loss": 7.8236, "step": 21560 }, { "epoch": 2.05, "learning_rate": 0.00015906244069083318, "loss": 7.9078, "step": 21570 }, { "epoch": 2.05, "learning_rate": 0.00015904346175744924, "loss": 7.8783, "step": 21580 }, { "epoch": 2.05, "learning_rate": 0.0001590244828240653, "loss": 7.767, "step": 21590 }, { "epoch": 2.05, "learning_rate": 0.00015900550389068136, "loss": 7.8145, "step": 21600 }, { "epoch": 2.05, "learning_rate": 0.0001589865249572974, "loss": 7.8772, "step": 21610 }, { "epoch": 2.05, "learning_rate": 0.00015896754602391346, "loss": 7.8538, "step": 21620 }, { "epoch": 2.05, "learning_rate": 0.00015894856709052952, "loss": 7.8231, "step": 21630 }, { "epoch": 2.05, "learning_rate": 0.00015892958815714558, "loss": 7.9159, "step": 21640 }, { "epoch": 2.05, "learning_rate": 0.00015891060922376164, "loss": 7.9374, "step": 21650 }, { "epoch": 2.06, "learning_rate": 0.00015889163029037768, "loss": 7.8821, "step": 21660 }, { "epoch": 2.06, "learning_rate": 0.00015887265135699374, "loss": 7.9214, "step": 21670 }, { "epoch": 2.06, "learning_rate": 0.0001588536724236098, "loss": 8.002, "step": 21680 }, { "epoch": 2.06, "learning_rate": 0.00015883469349022586, "loss": 7.8611, "step": 21690 }, { "epoch": 2.06, "learning_rate": 0.00015881571455684192, "loss": 8.0108, "step": 21700 }, { "epoch": 2.06, "learning_rate": 0.00015879673562345796, "loss": 7.8643, "step": 21710 }, { "epoch": 2.06, "learning_rate": 0.00015877775669007402, "loss": 7.9021, "step": 21720 }, { "epoch": 2.06, "learning_rate": 0.00015875877775669008, "loss": 7.8611, "step": 21730 }, { "epoch": 2.06, "learning_rate": 0.00015873979882330614, "loss": 7.7276, "step": 21740 }, { "epoch": 2.06, "learning_rate": 0.0001587208198899222, "loss": 7.9215, "step": 21750 }, { "epoch": 2.06, "learning_rate": 0.00015870184095653824, "loss": 7.8625, "step": 21760 }, { "epoch": 2.07, "learning_rate": 0.0001586828620231543, "loss": 7.9357, "step": 21770 }, { "epoch": 2.07, "learning_rate": 0.00015866388308977036, "loss": 7.89, "step": 21780 }, { "epoch": 2.07, "learning_rate": 0.00015864490415638642, "loss": 7.6989, "step": 21790 }, { "epoch": 2.07, "learning_rate": 0.00015862592522300248, "loss": 7.7832, "step": 21800 }, { "epoch": 2.07, "learning_rate": 0.00015860694628961854, "loss": 7.9075, "step": 21810 }, { "epoch": 2.07, "learning_rate": 0.00015858796735623458, "loss": 7.9715, "step": 21820 }, { "epoch": 2.07, "learning_rate": 0.00015856898842285064, "loss": 7.8661, "step": 21830 }, { "epoch": 2.07, "learning_rate": 0.0001585500094894667, "loss": 7.8871, "step": 21840 }, { "epoch": 2.07, "learning_rate": 0.00015853103055608276, "loss": 7.9332, "step": 21850 }, { "epoch": 2.07, "learning_rate": 0.00015851205162269882, "loss": 7.839, "step": 21860 }, { "epoch": 2.08, "learning_rate": 0.00015849307268931486, "loss": 7.9071, "step": 21870 }, { "epoch": 2.08, "learning_rate": 0.00015847409375593092, "loss": 7.8392, "step": 21880 }, { "epoch": 2.08, "learning_rate": 0.00015845511482254698, "loss": 7.8378, "step": 21890 }, { "epoch": 2.08, "learning_rate": 0.00015843613588916304, "loss": 7.895, "step": 21900 }, { "epoch": 2.08, "learning_rate": 0.0001584171569557791, "loss": 7.895, "step": 21910 }, { "epoch": 2.08, "learning_rate": 0.00015839817802239513, "loss": 7.9746, "step": 21920 }, { "epoch": 2.08, "learning_rate": 0.0001583791990890112, "loss": 7.7522, "step": 21930 }, { "epoch": 2.08, "learning_rate": 0.00015836022015562726, "loss": 7.9543, "step": 21940 }, { "epoch": 2.08, "learning_rate": 0.00015834124122224332, "loss": 7.9075, "step": 21950 }, { "epoch": 2.08, "learning_rate": 0.00015832226228885938, "loss": 7.8838, "step": 21960 }, { "epoch": 2.08, "learning_rate": 0.00015830328335547544, "loss": 7.8334, "step": 21970 }, { "epoch": 2.09, "learning_rate": 0.00015828430442209148, "loss": 7.888, "step": 21980 }, { "epoch": 2.09, "learning_rate": 0.00015826532548870754, "loss": 7.853, "step": 21990 }, { "epoch": 2.09, "learning_rate": 0.0001582463465553236, "loss": 8.0296, "step": 22000 }, { "epoch": 2.09, "learning_rate": 0.00015822736762193966, "loss": 7.8432, "step": 22010 }, { "epoch": 2.09, "learning_rate": 0.00015820838868855572, "loss": 7.8741, "step": 22020 }, { "epoch": 2.09, "learning_rate": 0.00015818940975517175, "loss": 7.8237, "step": 22030 }, { "epoch": 2.09, "learning_rate": 0.00015817043082178782, "loss": 7.9621, "step": 22040 }, { "epoch": 2.09, "learning_rate": 0.00015815145188840388, "loss": 7.8859, "step": 22050 }, { "epoch": 2.09, "learning_rate": 0.00015813247295501994, "loss": 7.9003, "step": 22060 }, { "epoch": 2.09, "learning_rate": 0.000158113494021636, "loss": 7.9417, "step": 22070 }, { "epoch": 2.1, "learning_rate": 0.00015809451508825203, "loss": 7.9236, "step": 22080 }, { "epoch": 2.1, "learning_rate": 0.0001580755361548681, "loss": 7.7871, "step": 22090 }, { "epoch": 2.1, "learning_rate": 0.00015805655722148416, "loss": 7.8612, "step": 22100 }, { "epoch": 2.1, "learning_rate": 0.00015803757828810022, "loss": 8.0152, "step": 22110 }, { "epoch": 2.1, "learning_rate": 0.00015801859935471628, "loss": 7.8189, "step": 22120 }, { "epoch": 2.1, "learning_rate": 0.00015799962042133234, "loss": 7.9239, "step": 22130 }, { "epoch": 2.1, "learning_rate": 0.00015798064148794837, "loss": 7.8071, "step": 22140 }, { "epoch": 2.1, "learning_rate": 0.00015796166255456444, "loss": 7.8181, "step": 22150 }, { "epoch": 2.1, "learning_rate": 0.0001579426836211805, "loss": 7.847, "step": 22160 }, { "epoch": 2.1, "learning_rate": 0.00015792370468779656, "loss": 7.8419, "step": 22170 }, { "epoch": 2.1, "learning_rate": 0.00015790472575441262, "loss": 7.9681, "step": 22180 }, { "epoch": 2.11, "learning_rate": 0.00015788574682102865, "loss": 7.8276, "step": 22190 }, { "epoch": 2.11, "learning_rate": 0.00015786676788764472, "loss": 7.8943, "step": 22200 }, { "epoch": 2.11, "learning_rate": 0.00015784778895426078, "loss": 7.8348, "step": 22210 }, { "epoch": 2.11, "learning_rate": 0.00015782881002087684, "loss": 7.8609, "step": 22220 }, { "epoch": 2.11, "learning_rate": 0.0001578098310874929, "loss": 7.8044, "step": 22230 }, { "epoch": 2.11, "learning_rate": 0.00015779085215410893, "loss": 7.9309, "step": 22240 }, { "epoch": 2.11, "learning_rate": 0.000157771873220725, "loss": 7.8456, "step": 22250 }, { "epoch": 2.11, "learning_rate": 0.00015775289428734106, "loss": 7.8255, "step": 22260 }, { "epoch": 2.11, "learning_rate": 0.00015773391535395712, "loss": 7.9197, "step": 22270 }, { "epoch": 2.11, "learning_rate": 0.00015771493642057318, "loss": 7.8745, "step": 22280 }, { "epoch": 2.12, "learning_rate": 0.0001576959574871892, "loss": 7.8433, "step": 22290 }, { "epoch": 2.12, "learning_rate": 0.00015767697855380527, "loss": 7.8798, "step": 22300 }, { "epoch": 2.12, "learning_rate": 0.00015765799962042134, "loss": 7.8917, "step": 22310 }, { "epoch": 2.12, "learning_rate": 0.0001576390206870374, "loss": 7.884, "step": 22320 }, { "epoch": 2.12, "learning_rate": 0.00015762004175365346, "loss": 7.8985, "step": 22330 }, { "epoch": 2.12, "learning_rate": 0.00015760106282026952, "loss": 7.9394, "step": 22340 }, { "epoch": 2.12, "learning_rate": 0.00015758208388688555, "loss": 7.8019, "step": 22350 }, { "epoch": 2.12, "learning_rate": 0.00015756310495350161, "loss": 7.8047, "step": 22360 }, { "epoch": 2.12, "learning_rate": 0.00015754412602011768, "loss": 7.8143, "step": 22370 }, { "epoch": 2.12, "learning_rate": 0.00015752514708673374, "loss": 7.81, "step": 22380 }, { "epoch": 2.12, "learning_rate": 0.0001575061681533498, "loss": 7.8116, "step": 22390 }, { "epoch": 2.13, "learning_rate": 0.00015748718921996583, "loss": 7.8917, "step": 22400 }, { "epoch": 2.13, "learning_rate": 0.0001574682102865819, "loss": 7.849, "step": 22410 }, { "epoch": 2.13, "learning_rate": 0.00015744923135319795, "loss": 7.8409, "step": 22420 }, { "epoch": 2.13, "learning_rate": 0.00015743025241981402, "loss": 7.8352, "step": 22430 }, { "epoch": 2.13, "learning_rate": 0.00015741127348643008, "loss": 7.8681, "step": 22440 }, { "epoch": 2.13, "learning_rate": 0.0001573922945530461, "loss": 7.9063, "step": 22450 }, { "epoch": 2.13, "learning_rate": 0.00015737331561966217, "loss": 7.8869, "step": 22460 }, { "epoch": 2.13, "learning_rate": 0.00015735433668627823, "loss": 7.9133, "step": 22470 }, { "epoch": 2.13, "learning_rate": 0.0001573353577528943, "loss": 7.9333, "step": 22480 }, { "epoch": 2.13, "learning_rate": 0.00015731637881951036, "loss": 7.9291, "step": 22490 }, { "epoch": 2.14, "learning_rate": 0.00015729739988612642, "loss": 7.9055, "step": 22500 }, { "epoch": 2.14, "learning_rate": 0.00015727842095274245, "loss": 7.7936, "step": 22510 }, { "epoch": 2.14, "learning_rate": 0.0001572594420193585, "loss": 7.8572, "step": 22520 }, { "epoch": 2.14, "learning_rate": 0.00015724046308597457, "loss": 7.7666, "step": 22530 }, { "epoch": 2.14, "learning_rate": 0.00015722148415259064, "loss": 7.8616, "step": 22540 }, { "epoch": 2.14, "learning_rate": 0.0001572025052192067, "loss": 7.8381, "step": 22550 }, { "epoch": 2.14, "learning_rate": 0.00015718352628582273, "loss": 7.902, "step": 22560 }, { "epoch": 2.14, "learning_rate": 0.0001571645473524388, "loss": 7.8894, "step": 22570 }, { "epoch": 2.14, "learning_rate": 0.00015714556841905485, "loss": 7.8568, "step": 22580 }, { "epoch": 2.14, "learning_rate": 0.00015712658948567092, "loss": 7.8792, "step": 22590 }, { "epoch": 2.14, "learning_rate": 0.00015710761055228698, "loss": 7.8798, "step": 22600 }, { "epoch": 2.15, "learning_rate": 0.000157088631618903, "loss": 7.8514, "step": 22610 }, { "epoch": 2.15, "learning_rate": 0.00015706965268551907, "loss": 7.8087, "step": 22620 }, { "epoch": 2.15, "learning_rate": 0.00015705067375213513, "loss": 7.8658, "step": 22630 }, { "epoch": 2.15, "learning_rate": 0.0001570316948187512, "loss": 7.9174, "step": 22640 }, { "epoch": 2.15, "learning_rate": 0.00015701271588536726, "loss": 7.8491, "step": 22650 }, { "epoch": 2.15, "learning_rate": 0.00015699373695198332, "loss": 7.7947, "step": 22660 }, { "epoch": 2.15, "learning_rate": 0.00015697475801859935, "loss": 7.7663, "step": 22670 }, { "epoch": 2.15, "learning_rate": 0.0001569557790852154, "loss": 7.8722, "step": 22680 }, { "epoch": 2.15, "learning_rate": 0.00015693680015183147, "loss": 7.8574, "step": 22690 }, { "epoch": 2.15, "learning_rate": 0.00015691782121844754, "loss": 7.706, "step": 22700 }, { "epoch": 2.16, "learning_rate": 0.0001568988422850636, "loss": 7.8818, "step": 22710 }, { "epoch": 2.16, "learning_rate": 0.00015687986335167963, "loss": 7.8492, "step": 22720 }, { "epoch": 2.16, "learning_rate": 0.0001568608844182957, "loss": 7.8555, "step": 22730 }, { "epoch": 2.16, "learning_rate": 0.00015684190548491175, "loss": 7.8188, "step": 22740 }, { "epoch": 2.16, "learning_rate": 0.00015682292655152781, "loss": 7.8021, "step": 22750 }, { "epoch": 2.16, "learning_rate": 0.00015680394761814388, "loss": 7.9007, "step": 22760 }, { "epoch": 2.16, "learning_rate": 0.0001567849686847599, "loss": 7.7355, "step": 22770 }, { "epoch": 2.16, "learning_rate": 0.00015676598975137597, "loss": 7.7312, "step": 22780 }, { "epoch": 2.16, "learning_rate": 0.00015674701081799203, "loss": 7.9311, "step": 22790 }, { "epoch": 2.16, "learning_rate": 0.0001567280318846081, "loss": 7.879, "step": 22800 }, { "epoch": 2.16, "learning_rate": 0.00015670905295122415, "loss": 7.8623, "step": 22810 }, { "epoch": 2.17, "learning_rate": 0.0001566900740178402, "loss": 7.8358, "step": 22820 }, { "epoch": 2.17, "learning_rate": 0.00015667109508445625, "loss": 7.8962, "step": 22830 }, { "epoch": 2.17, "learning_rate": 0.0001566521161510723, "loss": 7.8288, "step": 22840 }, { "epoch": 2.17, "learning_rate": 0.00015663313721768837, "loss": 7.9741, "step": 22850 }, { "epoch": 2.17, "learning_rate": 0.00015661415828430443, "loss": 7.8187, "step": 22860 }, { "epoch": 2.17, "learning_rate": 0.0001565951793509205, "loss": 7.8735, "step": 22870 }, { "epoch": 2.17, "learning_rate": 0.00015657620041753653, "loss": 7.9464, "step": 22880 }, { "epoch": 2.17, "learning_rate": 0.0001565572214841526, "loss": 7.8947, "step": 22890 }, { "epoch": 2.17, "learning_rate": 0.00015653824255076865, "loss": 7.8647, "step": 22900 }, { "epoch": 2.17, "learning_rate": 0.0001565192636173847, "loss": 7.8879, "step": 22910 }, { "epoch": 2.17, "learning_rate": 0.00015650028468400077, "loss": 7.8699, "step": 22920 }, { "epoch": 2.18, "learning_rate": 0.0001564813057506168, "loss": 7.8284, "step": 22930 }, { "epoch": 2.18, "learning_rate": 0.00015646232681723287, "loss": 7.826, "step": 22940 }, { "epoch": 2.18, "learning_rate": 0.00015644334788384893, "loss": 7.9656, "step": 22950 }, { "epoch": 2.18, "learning_rate": 0.000156424368950465, "loss": 7.748, "step": 22960 }, { "epoch": 2.18, "learning_rate": 0.00015640539001708105, "loss": 7.7614, "step": 22970 }, { "epoch": 2.18, "learning_rate": 0.0001563864110836971, "loss": 7.8216, "step": 22980 }, { "epoch": 2.18, "learning_rate": 0.00015636743215031315, "loss": 8.0235, "step": 22990 }, { "epoch": 2.18, "learning_rate": 0.0001563484532169292, "loss": 7.8968, "step": 23000 }, { "epoch": 2.18, "learning_rate": 0.00015632947428354527, "loss": 7.9184, "step": 23010 }, { "epoch": 2.18, "learning_rate": 0.00015631049535016133, "loss": 7.8753, "step": 23020 }, { "epoch": 2.19, "learning_rate": 0.0001562915164167774, "loss": 7.9102, "step": 23030 }, { "epoch": 2.19, "learning_rate": 0.00015627253748339343, "loss": 7.7806, "step": 23040 }, { "epoch": 2.19, "learning_rate": 0.0001562535585500095, "loss": 7.8738, "step": 23050 }, { "epoch": 2.19, "learning_rate": 0.00015623457961662555, "loss": 7.8276, "step": 23060 }, { "epoch": 2.19, "learning_rate": 0.0001562156006832416, "loss": 7.8367, "step": 23070 }, { "epoch": 2.19, "learning_rate": 0.00015619662174985767, "loss": 7.8343, "step": 23080 }, { "epoch": 2.19, "learning_rate": 0.0001561776428164737, "loss": 7.863, "step": 23090 }, { "epoch": 2.19, "learning_rate": 0.00015615866388308977, "loss": 7.8564, "step": 23100 }, { "epoch": 2.19, "learning_rate": 0.00015613968494970583, "loss": 7.8091, "step": 23110 }, { "epoch": 2.19, "learning_rate": 0.0001561207060163219, "loss": 7.881, "step": 23120 }, { "epoch": 2.19, "learning_rate": 0.00015610172708293795, "loss": 7.9338, "step": 23130 }, { "epoch": 2.2, "learning_rate": 0.000156082748149554, "loss": 7.8681, "step": 23140 }, { "epoch": 2.2, "learning_rate": 0.00015606376921617005, "loss": 7.7641, "step": 23150 }, { "epoch": 2.2, "learning_rate": 0.0001560447902827861, "loss": 7.9572, "step": 23160 }, { "epoch": 2.2, "learning_rate": 0.00015602581134940217, "loss": 7.793, "step": 23170 }, { "epoch": 2.2, "learning_rate": 0.00015600683241601823, "loss": 7.8405, "step": 23180 }, { "epoch": 2.2, "learning_rate": 0.0001559878534826343, "loss": 7.8318, "step": 23190 }, { "epoch": 2.2, "learning_rate": 0.00015596887454925033, "loss": 7.8334, "step": 23200 }, { "epoch": 2.2, "learning_rate": 0.0001559498956158664, "loss": 7.7095, "step": 23210 }, { "epoch": 2.2, "learning_rate": 0.00015593091668248245, "loss": 7.885, "step": 23220 }, { "epoch": 2.2, "learning_rate": 0.0001559119377490985, "loss": 7.904, "step": 23230 }, { "epoch": 2.21, "learning_rate": 0.00015589295881571457, "loss": 7.8512, "step": 23240 }, { "epoch": 2.21, "learning_rate": 0.0001558739798823306, "loss": 7.7914, "step": 23250 }, { "epoch": 2.21, "learning_rate": 0.00015585500094894667, "loss": 7.9344, "step": 23260 }, { "epoch": 2.21, "learning_rate": 0.00015583602201556273, "loss": 7.8801, "step": 23270 }, { "epoch": 2.21, "learning_rate": 0.0001558170430821788, "loss": 7.8102, "step": 23280 }, { "epoch": 2.21, "learning_rate": 0.00015579806414879485, "loss": 7.8134, "step": 23290 }, { "epoch": 2.21, "learning_rate": 0.0001557790852154109, "loss": 7.857, "step": 23300 }, { "epoch": 2.21, "learning_rate": 0.00015576010628202695, "loss": 7.902, "step": 23310 }, { "epoch": 2.21, "learning_rate": 0.000155741127348643, "loss": 7.8886, "step": 23320 }, { "epoch": 2.21, "learning_rate": 0.00015572214841525907, "loss": 7.8732, "step": 23330 }, { "epoch": 2.21, "learning_rate": 0.00015570316948187513, "loss": 7.8585, "step": 23340 }, { "epoch": 2.22, "learning_rate": 0.0001556841905484912, "loss": 7.8644, "step": 23350 }, { "epoch": 2.22, "learning_rate": 0.00015566521161510723, "loss": 7.8146, "step": 23360 }, { "epoch": 2.22, "learning_rate": 0.0001556462326817233, "loss": 7.8164, "step": 23370 }, { "epoch": 2.22, "learning_rate": 0.00015562725374833935, "loss": 7.7867, "step": 23380 }, { "epoch": 2.22, "learning_rate": 0.0001556082748149554, "loss": 7.9057, "step": 23390 }, { "epoch": 2.22, "learning_rate": 0.00015558929588157147, "loss": 7.8249, "step": 23400 }, { "epoch": 2.22, "learning_rate": 0.0001555703169481875, "loss": 7.9146, "step": 23410 }, { "epoch": 2.22, "learning_rate": 0.00015555133801480357, "loss": 7.8243, "step": 23420 }, { "epoch": 2.22, "learning_rate": 0.00015553235908141963, "loss": 7.9238, "step": 23430 }, { "epoch": 2.22, "learning_rate": 0.0001555133801480357, "loss": 7.8485, "step": 23440 }, { "epoch": 2.23, "learning_rate": 0.00015549440121465175, "loss": 7.821, "step": 23450 }, { "epoch": 2.23, "learning_rate": 0.00015547542228126779, "loss": 7.9004, "step": 23460 }, { "epoch": 2.23, "learning_rate": 0.00015545644334788385, "loss": 7.8125, "step": 23470 }, { "epoch": 2.23, "learning_rate": 0.0001554374644144999, "loss": 7.9106, "step": 23480 }, { "epoch": 2.23, "learning_rate": 0.00015541848548111597, "loss": 7.947, "step": 23490 }, { "epoch": 2.23, "learning_rate": 0.00015539950654773203, "loss": 7.8984, "step": 23500 }, { "epoch": 2.23, "learning_rate": 0.00015538052761434806, "loss": 7.9675, "step": 23510 }, { "epoch": 2.23, "learning_rate": 0.00015536154868096413, "loss": 7.8223, "step": 23520 }, { "epoch": 2.23, "learning_rate": 0.0001553425697475802, "loss": 7.8708, "step": 23530 }, { "epoch": 2.23, "learning_rate": 0.00015532359081419625, "loss": 7.9586, "step": 23540 }, { "epoch": 2.23, "learning_rate": 0.0001553046118808123, "loss": 7.818, "step": 23550 }, { "epoch": 2.24, "learning_rate": 0.00015528563294742837, "loss": 7.8387, "step": 23560 }, { "epoch": 2.24, "learning_rate": 0.0001552666540140444, "loss": 7.8902, "step": 23570 }, { "epoch": 2.24, "learning_rate": 0.00015524767508066047, "loss": 7.8054, "step": 23580 }, { "epoch": 2.24, "learning_rate": 0.00015522869614727653, "loss": 7.8994, "step": 23590 }, { "epoch": 2.24, "learning_rate": 0.0001552097172138926, "loss": 7.9191, "step": 23600 }, { "epoch": 2.24, "learning_rate": 0.00015519073828050865, "loss": 7.8364, "step": 23610 }, { "epoch": 2.24, "learning_rate": 0.00015517175934712468, "loss": 7.9712, "step": 23620 }, { "epoch": 2.24, "learning_rate": 0.00015515278041374075, "loss": 7.8791, "step": 23630 }, { "epoch": 2.24, "learning_rate": 0.0001551338014803568, "loss": 7.8106, "step": 23640 }, { "epoch": 2.24, "learning_rate": 0.00015511482254697287, "loss": 7.8697, "step": 23650 }, { "epoch": 2.25, "learning_rate": 0.00015509584361358893, "loss": 7.9453, "step": 23660 }, { "epoch": 2.25, "learning_rate": 0.00015507686468020496, "loss": 7.8686, "step": 23670 }, { "epoch": 2.25, "learning_rate": 0.00015505788574682103, "loss": 7.9112, "step": 23680 }, { "epoch": 2.25, "learning_rate": 0.0001550389068134371, "loss": 7.8276, "step": 23690 }, { "epoch": 2.25, "learning_rate": 0.00015501992788005315, "loss": 7.8023, "step": 23700 }, { "epoch": 2.25, "learning_rate": 0.0001550009489466692, "loss": 7.9167, "step": 23710 }, { "epoch": 2.25, "learning_rate": 0.00015498197001328527, "loss": 7.9369, "step": 23720 }, { "epoch": 2.25, "learning_rate": 0.0001549629910799013, "loss": 7.7976, "step": 23730 }, { "epoch": 2.25, "learning_rate": 0.00015494401214651737, "loss": 7.8442, "step": 23740 }, { "epoch": 2.25, "learning_rate": 0.00015492503321313343, "loss": 7.8399, "step": 23750 }, { "epoch": 2.25, "learning_rate": 0.0001549060542797495, "loss": 7.8819, "step": 23760 }, { "epoch": 2.26, "learning_rate": 0.00015488707534636555, "loss": 7.9336, "step": 23770 }, { "epoch": 2.26, "learning_rate": 0.00015486809641298158, "loss": 7.8861, "step": 23780 }, { "epoch": 2.26, "learning_rate": 0.00015484911747959765, "loss": 7.875, "step": 23790 }, { "epoch": 2.26, "learning_rate": 0.0001548301385462137, "loss": 7.8272, "step": 23800 }, { "epoch": 2.26, "learning_rate": 0.00015481115961282977, "loss": 7.8513, "step": 23810 }, { "epoch": 2.26, "learning_rate": 0.00015479218067944583, "loss": 7.8837, "step": 23820 }, { "epoch": 2.26, "learning_rate": 0.00015477320174606186, "loss": 7.851, "step": 23830 }, { "epoch": 2.26, "learning_rate": 0.00015475422281267792, "loss": 7.8773, "step": 23840 }, { "epoch": 2.26, "learning_rate": 0.00015473524387929399, "loss": 7.9477, "step": 23850 }, { "epoch": 2.26, "learning_rate": 0.00015471626494591005, "loss": 7.9506, "step": 23860 }, { "epoch": 2.27, "learning_rate": 0.0001546972860125261, "loss": 7.8128, "step": 23870 }, { "epoch": 2.27, "learning_rate": 0.00015467830707914217, "loss": 7.9312, "step": 23880 }, { "epoch": 2.27, "learning_rate": 0.0001546593281457582, "loss": 7.9325, "step": 23890 }, { "epoch": 2.27, "learning_rate": 0.00015464034921237426, "loss": 7.8281, "step": 23900 }, { "epoch": 2.27, "learning_rate": 0.00015462137027899033, "loss": 8.0098, "step": 23910 }, { "epoch": 2.27, "learning_rate": 0.0001546023913456064, "loss": 7.7144, "step": 23920 }, { "epoch": 2.27, "learning_rate": 0.00015458341241222245, "loss": 7.9032, "step": 23930 }, { "epoch": 2.27, "learning_rate": 0.00015456443347883848, "loss": 7.7935, "step": 23940 }, { "epoch": 2.27, "learning_rate": 0.00015454545454545454, "loss": 7.8352, "step": 23950 }, { "epoch": 2.27, "learning_rate": 0.0001545264756120706, "loss": 7.8817, "step": 23960 }, { "epoch": 2.27, "learning_rate": 0.00015450749667868667, "loss": 7.8525, "step": 23970 }, { "epoch": 2.28, "learning_rate": 0.00015448851774530273, "loss": 7.8043, "step": 23980 }, { "epoch": 2.28, "learning_rate": 0.00015446953881191876, "loss": 7.908, "step": 23990 }, { "epoch": 2.28, "learning_rate": 0.00015445055987853482, "loss": 7.8206, "step": 24000 } ], "max_steps": 105380, "num_train_epochs": 10, "total_flos": 2.5273221210952704e+16, "trial_name": null, "trial_params": null }