{ "best_metric": 23.848435348153973, "best_model_checkpoint": "/content/XLMFinetune/model-bin2/test/checkpoint-26168", "epoch": 2.0, "eval_steps": 500, "global_step": 26168, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.111535523300229e-07, "loss": 5.3672, "step": 5 }, { "epoch": 0.0, "learning_rate": 1.3750954927425516e-06, "loss": 5.4479, "step": 10 }, { "epoch": 0.0, "learning_rate": 2.1390374331550802e-06, "loss": 5.3383, "step": 15 }, { "epoch": 0.0, "learning_rate": 2.902979373567609e-06, "loss": 4.952, "step": 20 }, { "epoch": 0.0, "learning_rate": 3.666921313980138e-06, "loss": 4.4477, "step": 25 }, { "epoch": 0.0, "learning_rate": 4.430863254392666e-06, "loss": 4.4078, "step": 30 }, { "epoch": 0.0, "learning_rate": 5.194805194805195e-06, "loss": 4.5465, "step": 35 }, { "epoch": 0.0, "learning_rate": 5.958747135217724e-06, "loss": 4.3111, "step": 40 }, { "epoch": 0.0, "learning_rate": 6.722689075630252e-06, "loss": 4.3164, "step": 45 }, { "epoch": 0.0, "learning_rate": 7.4866310160427806e-06, "loss": 4.1561, "step": 50 }, { "epoch": 0.0, "learning_rate": 8.25057295645531e-06, "loss": 5.8121, "step": 55 }, { "epoch": 0.0, "learning_rate": 9.014514896867839e-06, "loss": 5.9082, "step": 60 }, { "epoch": 0.0, "learning_rate": 9.778456837280367e-06, "loss": 5.732, "step": 65 }, { "epoch": 0.01, "learning_rate": 1.0542398777692896e-05, "loss": 5.7656, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.1306340718105425e-05, "loss": 5.3199, "step": 75 }, { "epoch": 0.01, "learning_rate": 1.2070282658517953e-05, "loss": 5.1668, "step": 80 }, { "epoch": 0.01, "learning_rate": 1.2834224598930484e-05, "loss": 5.2023, "step": 85 }, { "epoch": 0.01, "learning_rate": 1.359816653934301e-05, "loss": 4.1394, "step": 90 }, { "epoch": 0.01, "learning_rate": 1.436210847975554e-05, "loss": 3.974, "step": 95 }, { "epoch": 0.01, "learning_rate": 1.5126050420168067e-05, "loss": 3.4523, "step": 100 }, { "epoch": 0.01, "learning_rate": 1.5889992360580598e-05, "loss": 5.4578, "step": 105 }, { "epoch": 0.01, "learning_rate": 1.6653934300993127e-05, "loss": 5.7855, "step": 110 }, { "epoch": 0.01, "learning_rate": 1.7417876241405653e-05, "loss": 5.0931, "step": 115 }, { "epoch": 0.01, "learning_rate": 1.8181818181818182e-05, "loss": 5.3789, "step": 120 }, { "epoch": 0.01, "learning_rate": 1.894576012223071e-05, "loss": 5.2285, "step": 125 }, { "epoch": 0.01, "learning_rate": 1.970970206264324e-05, "loss": 4.8857, "step": 130 }, { "epoch": 0.01, "learning_rate": 2.047364400305577e-05, "loss": 4.5129, "step": 135 }, { "epoch": 0.01, "learning_rate": 2.1237585943468296e-05, "loss": 4.0876, "step": 140 }, { "epoch": 0.01, "learning_rate": 2.2001527883880825e-05, "loss": 4.2938, "step": 145 }, { "epoch": 0.01, "learning_rate": 2.2765469824293358e-05, "loss": 3.9873, "step": 150 }, { "epoch": 0.01, "learning_rate": 2.3529411764705884e-05, "loss": 5.5092, "step": 155 }, { "epoch": 0.01, "learning_rate": 2.4293353705118413e-05, "loss": 5.9281, "step": 160 }, { "epoch": 0.01, "learning_rate": 2.5057295645530942e-05, "loss": 5.6781, "step": 165 }, { "epoch": 0.01, "learning_rate": 2.5821237585943468e-05, "loss": 4.7973, "step": 170 }, { "epoch": 0.01, "learning_rate": 2.6585179526355997e-05, "loss": 5.1039, "step": 175 }, { "epoch": 0.01, "learning_rate": 2.734912146676853e-05, "loss": 5.259, "step": 180 }, { "epoch": 0.01, "learning_rate": 2.8113063407181056e-05, "loss": 5.0793, "step": 185 }, { "epoch": 0.01, "learning_rate": 2.8877005347593582e-05, "loss": 4.8102, "step": 190 }, { "epoch": 0.01, "learning_rate": 2.9640947288006115e-05, "loss": 4.2803, "step": 195 }, { "epoch": 0.02, "learning_rate": 3.0404889228418644e-05, "loss": 3.758, "step": 200 }, { "epoch": 0.02, "learning_rate": 3.1168831168831166e-05, "loss": 5.957, "step": 205 }, { "epoch": 0.02, "learning_rate": 3.1932773109243696e-05, "loss": 5.9391, "step": 210 }, { "epoch": 0.02, "learning_rate": 3.269671504965623e-05, "loss": 5.4367, "step": 215 }, { "epoch": 0.02, "learning_rate": 3.3460656990068754e-05, "loss": 5.3895, "step": 220 }, { "epoch": 0.02, "learning_rate": 3.4224598930481284e-05, "loss": 5.2668, "step": 225 }, { "epoch": 0.02, "learning_rate": 3.498854087089381e-05, "loss": 4.9992, "step": 230 }, { "epoch": 0.02, "learning_rate": 3.575248281130634e-05, "loss": 4.3316, "step": 235 }, { "epoch": 0.02, "learning_rate": 3.651642475171887e-05, "loss": 4.4064, "step": 240 }, { "epoch": 0.02, "learning_rate": 3.72803666921314e-05, "loss": 3.9166, "step": 245 }, { "epoch": 0.02, "learning_rate": 3.804430863254393e-05, "loss": 3.5941, "step": 250 }, { "epoch": 0.02, "learning_rate": 3.880825057295646e-05, "loss": 5.6594, "step": 255 }, { "epoch": 0.02, "learning_rate": 3.957219251336899e-05, "loss": 5.5082, "step": 260 }, { "epoch": 0.02, "learning_rate": 4.033613445378152e-05, "loss": 5.4387, "step": 265 }, { "epoch": 0.02, "learning_rate": 4.110007639419404e-05, "loss": 5.1539, "step": 270 }, { "epoch": 0.02, "learning_rate": 4.186401833460657e-05, "loss": 5.0574, "step": 275 }, { "epoch": 0.02, "learning_rate": 4.26279602750191e-05, "loss": 4.8289, "step": 280 }, { "epoch": 0.02, "learning_rate": 4.339190221543163e-05, "loss": 4.8527, "step": 285 }, { "epoch": 0.02, "learning_rate": 4.415584415584416e-05, "loss": 4.3766, "step": 290 }, { "epoch": 0.02, "learning_rate": 4.491978609625669e-05, "loss": 3.7943, "step": 295 }, { "epoch": 0.02, "learning_rate": 4.5683728036669216e-05, "loss": 3.2498, "step": 300 }, { "epoch": 0.02, "learning_rate": 4.6447669977081745e-05, "loss": 5.7168, "step": 305 }, { "epoch": 0.02, "learning_rate": 4.7211611917494275e-05, "loss": 5.8766, "step": 310 }, { "epoch": 0.02, "learning_rate": 4.7975553857906804e-05, "loss": 5.7383, "step": 315 }, { "epoch": 0.02, "learning_rate": 4.8739495798319326e-05, "loss": 5.4395, "step": 320 }, { "epoch": 0.02, "learning_rate": 4.950343773873186e-05, "loss": 5.3113, "step": 325 }, { "epoch": 0.03, "learning_rate": 5.026737967914439e-05, "loss": 4.9613, "step": 330 }, { "epoch": 0.03, "learning_rate": 5.1031321619556914e-05, "loss": 4.5619, "step": 335 }, { "epoch": 0.03, "learning_rate": 5.1795263559969444e-05, "loss": 4.3414, "step": 340 }, { "epoch": 0.03, "learning_rate": 5.255920550038197e-05, "loss": 4.4539, "step": 345 }, { "epoch": 0.03, "learning_rate": 5.332314744079451e-05, "loss": 3.3832, "step": 350 }, { "epoch": 0.03, "learning_rate": 5.408708938120703e-05, "loss": 5.9297, "step": 355 }, { "epoch": 0.03, "learning_rate": 5.485103132161956e-05, "loss": 5.4016, "step": 360 }, { "epoch": 0.03, "learning_rate": 5.561497326203209e-05, "loss": 4.8334, "step": 365 }, { "epoch": 0.03, "learning_rate": 5.637891520244461e-05, "loss": 5.2273, "step": 370 }, { "epoch": 0.03, "learning_rate": 5.714285714285714e-05, "loss": 4.9238, "step": 375 }, { "epoch": 0.03, "learning_rate": 5.7754010695187164e-05, "loss": 4.3672, "step": 380 }, { "epoch": 0.03, "learning_rate": 5.851795263559969e-05, "loss": 4.2797, "step": 385 }, { "epoch": 0.03, "learning_rate": 5.928189457601223e-05, "loss": 4.1217, "step": 390 }, { "epoch": 0.03, "learning_rate": 6.004583651642476e-05, "loss": 4.0648, "step": 395 }, { "epoch": 0.03, "learning_rate": 6.080977845683729e-05, "loss": 3.3252, "step": 400 }, { "epoch": 0.03, "learning_rate": 6.157372039724982e-05, "loss": 5.5906, "step": 405 }, { "epoch": 0.03, "learning_rate": 6.233766233766233e-05, "loss": 5.3297, "step": 410 }, { "epoch": 0.03, "learning_rate": 6.310160427807486e-05, "loss": 5.5746, "step": 415 }, { "epoch": 0.03, "learning_rate": 6.386554621848739e-05, "loss": 5.2078, "step": 420 }, { "epoch": 0.03, "learning_rate": 6.462948815889993e-05, "loss": 4.8525, "step": 425 }, { "epoch": 0.03, "learning_rate": 6.539343009931246e-05, "loss": 4.7914, "step": 430 }, { "epoch": 0.03, "learning_rate": 6.615737203972498e-05, "loss": 4.6715, "step": 435 }, { "epoch": 0.03, "learning_rate": 6.692131398013751e-05, "loss": 4.5393, "step": 440 }, { "epoch": 0.03, "learning_rate": 6.753246753246754e-05, "loss": 4.075, "step": 445 }, { "epoch": 0.03, "learning_rate": 6.829640947288007e-05, "loss": 3.6494, "step": 450 }, { "epoch": 0.03, "learning_rate": 6.90603514132926e-05, "loss": 5.5176, "step": 455 }, { "epoch": 0.04, "learning_rate": 6.967150496562261e-05, "loss": 6.3191, "step": 460 }, { "epoch": 0.04, "learning_rate": 7.043544690603514e-05, "loss": 4.6111, "step": 465 }, { "epoch": 0.04, "learning_rate": 7.119938884644768e-05, "loss": 5.2383, "step": 470 }, { "epoch": 0.04, "learning_rate": 7.181054239877769e-05, "loss": 5.5373, "step": 475 }, { "epoch": 0.04, "learning_rate": 7.257448433919023e-05, "loss": 4.9678, "step": 480 }, { "epoch": 0.04, "learning_rate": 7.333842627960276e-05, "loss": 4.3086, "step": 485 }, { "epoch": 0.04, "learning_rate": 7.410236822001529e-05, "loss": 4.2645, "step": 490 }, { "epoch": 0.04, "learning_rate": 7.486631016042782e-05, "loss": 3.6471, "step": 495 }, { "epoch": 0.04, "learning_rate": 7.563025210084033e-05, "loss": 2.734, "step": 500 }, { "epoch": 0.04, "learning_rate": 7.639419404125286e-05, "loss": 5.6322, "step": 505 }, { "epoch": 0.04, "learning_rate": 7.71581359816654e-05, "loss": 6.4516, "step": 510 }, { "epoch": 0.04, "learning_rate": 7.792207792207793e-05, "loss": 6.141, "step": 515 }, { "epoch": 0.04, "learning_rate": 7.868601986249046e-05, "loss": 4.4348, "step": 520 }, { "epoch": 0.04, "learning_rate": 7.944996180290298e-05, "loss": 4.6922, "step": 525 }, { "epoch": 0.04, "learning_rate": 8.021390374331551e-05, "loss": 4.8404, "step": 530 }, { "epoch": 0.04, "learning_rate": 8.097784568372804e-05, "loss": 5.326, "step": 535 }, { "epoch": 0.04, "learning_rate": 8.158899923605807e-05, "loss": 4.1762, "step": 540 }, { "epoch": 0.04, "learning_rate": 8.23529411764706e-05, "loss": 2.7068, "step": 545 }, { "epoch": 0.04, "learning_rate": 8.311688311688312e-05, "loss": 2.9233, "step": 550 }, { "epoch": 0.04, "learning_rate": 8.388082505729565e-05, "loss": 5.177, "step": 555 }, { "epoch": 0.04, "learning_rate": 8.464476699770818e-05, "loss": 5.1857, "step": 560 }, { "epoch": 0.04, "learning_rate": 8.540870893812071e-05, "loss": 4.8047, "step": 565 }, { "epoch": 0.04, "learning_rate": 8.617265087853324e-05, "loss": 5.1225, "step": 570 }, { "epoch": 0.04, "learning_rate": 8.693659281894576e-05, "loss": 3.5857, "step": 575 }, { "epoch": 0.04, "learning_rate": 8.770053475935829e-05, "loss": 5.016, "step": 580 }, { "epoch": 0.04, "learning_rate": 8.846447669977083e-05, "loss": 4.1475, "step": 585 }, { "epoch": 0.05, "learning_rate": 8.922841864018336e-05, "loss": 3.4462, "step": 590 }, { "epoch": 0.05, "learning_rate": 8.999236058059587e-05, "loss": 2.926, "step": 595 }, { "epoch": 0.05, "learning_rate": 9.07563025210084e-05, "loss": 3.36, "step": 600 }, { "epoch": 0.05, "learning_rate": 9.152024446142093e-05, "loss": 5.6203, "step": 605 }, { "epoch": 0.05, "learning_rate": 9.228418640183346e-05, "loss": 5.2746, "step": 610 }, { "epoch": 0.05, "learning_rate": 9.3048128342246e-05, "loss": 6.3246, "step": 615 }, { "epoch": 0.05, "learning_rate": 9.381207028265852e-05, "loss": 4.7615, "step": 620 }, { "epoch": 0.05, "learning_rate": 9.457601222307105e-05, "loss": 4.4709, "step": 625 }, { "epoch": 0.05, "learning_rate": 9.533995416348358e-05, "loss": 4.0367, "step": 630 }, { "epoch": 0.05, "learning_rate": 9.610389610389611e-05, "loss": 4.5443, "step": 635 }, { "epoch": 0.05, "learning_rate": 9.686783804430864e-05, "loss": 2.754, "step": 640 }, { "epoch": 0.05, "learning_rate": 9.763177998472117e-05, "loss": 2.841, "step": 645 }, { "epoch": 0.05, "learning_rate": 9.83957219251337e-05, "loss": 2.6588, "step": 650 }, { "epoch": 0.05, "learning_rate": 9.915966386554623e-05, "loss": 5.3617, "step": 655 }, { "epoch": 0.05, "learning_rate": 9.992360580595875e-05, "loss": 6.0061, "step": 660 }, { "epoch": 0.05, "learning_rate": 0.00010068754774637128, "loss": 5.8316, "step": 665 }, { "epoch": 0.05, "learning_rate": 0.00010145148968678381, "loss": 5.1043, "step": 670 }, { "epoch": 0.05, "learning_rate": 0.00010221543162719633, "loss": 3.8354, "step": 675 }, { "epoch": 0.05, "learning_rate": 0.00010297937356760887, "loss": 3.8439, "step": 680 }, { "epoch": 0.05, "learning_rate": 0.00010374331550802139, "loss": 5.7764, "step": 685 }, { "epoch": 0.05, "learning_rate": 0.00010450725744843393, "loss": 3.8581, "step": 690 }, { "epoch": 0.05, "learning_rate": 0.00010527119938884645, "loss": 2.841, "step": 695 }, { "epoch": 0.05, "learning_rate": 0.00010603514132925898, "loss": 2.4475, "step": 700 }, { "epoch": 0.05, "learning_rate": 0.00010679908326967152, "loss": 5.5125, "step": 705 }, { "epoch": 0.05, "learning_rate": 0.00010756302521008403, "loss": 5.4578, "step": 710 }, { "epoch": 0.05, "learning_rate": 0.00010832696715049658, "loss": 5.5195, "step": 715 }, { "epoch": 0.06, "learning_rate": 0.00010909090909090909, "loss": 4.5291, "step": 720 }, { "epoch": 0.06, "learning_rate": 0.00010985485103132162, "loss": 4.5887, "step": 725 }, { "epoch": 0.06, "learning_rate": 0.00011061879297173416, "loss": 5.2873, "step": 730 }, { "epoch": 0.06, "learning_rate": 0.00011138273491214668, "loss": 3.6059, "step": 735 }, { "epoch": 0.06, "learning_rate": 0.00011214667685255922, "loss": 3.4224, "step": 740 }, { "epoch": 0.06, "learning_rate": 0.00011291061879297174, "loss": 3.1507, "step": 745 }, { "epoch": 0.06, "learning_rate": 0.00011367456073338427, "loss": 2.7281, "step": 750 }, { "epoch": 0.06, "learning_rate": 0.00011443850267379678, "loss": 4.7617, "step": 755 }, { "epoch": 0.06, "learning_rate": 0.00011520244461420933, "loss": 5.1021, "step": 760 }, { "epoch": 0.06, "learning_rate": 0.00011596638655462187, "loss": 4.8506, "step": 765 }, { "epoch": 0.06, "learning_rate": 0.00011673032849503439, "loss": 4.4602, "step": 770 }, { "epoch": 0.06, "learning_rate": 0.00011749427043544691, "loss": 3.5974, "step": 775 }, { "epoch": 0.06, "learning_rate": 0.00011825821237585943, "loss": 3.5879, "step": 780 }, { "epoch": 0.06, "learning_rate": 0.00011902215431627197, "loss": 4.2986, "step": 785 }, { "epoch": 0.06, "learning_rate": 0.00011978609625668449, "loss": 2.5129, "step": 790 }, { "epoch": 0.06, "learning_rate": 0.00012055003819709703, "loss": 3.3521, "step": 795 }, { "epoch": 0.06, "learning_rate": 0.00012131398013750956, "loss": 1.9732, "step": 800 }, { "epoch": 0.06, "learning_rate": 0.00012207792207792208, "loss": 4.8342, "step": 805 }, { "epoch": 0.06, "learning_rate": 0.0001228418640183346, "loss": 5.3076, "step": 810 }, { "epoch": 0.06, "learning_rate": 0.00012360580595874714, "loss": 4.8811, "step": 815 }, { "epoch": 0.06, "learning_rate": 0.00012436974789915966, "loss": 5.3889, "step": 820 }, { "epoch": 0.06, "learning_rate": 0.0001251336898395722, "loss": 3.7623, "step": 825 }, { "epoch": 0.06, "learning_rate": 0.00012589763177998472, "loss": 4.561, "step": 830 }, { "epoch": 0.06, "learning_rate": 0.00012666157372039725, "loss": 4.0161, "step": 835 }, { "epoch": 0.06, "learning_rate": 0.00012742551566080978, "loss": 3.5828, "step": 840 }, { "epoch": 0.06, "learning_rate": 0.0001281894576012223, "loss": 2.6784, "step": 845 }, { "epoch": 0.06, "learning_rate": 0.00012895339954163484, "loss": 2.1759, "step": 850 }, { "epoch": 0.07, "learning_rate": 0.00012971734148204737, "loss": 5.285, "step": 855 }, { "epoch": 0.07, "learning_rate": 0.0001304812834224599, "loss": 4.3326, "step": 860 }, { "epoch": 0.07, "learning_rate": 0.00013124522536287243, "loss": 4.8477, "step": 865 }, { "epoch": 0.07, "learning_rate": 0.00013200916730328496, "loss": 3.8988, "step": 870 }, { "epoch": 0.07, "learning_rate": 0.0001327731092436975, "loss": 3.8404, "step": 875 }, { "epoch": 0.07, "learning_rate": 0.00013353705118411002, "loss": 2.9827, "step": 880 }, { "epoch": 0.07, "learning_rate": 0.00013430099312452255, "loss": 3.5191, "step": 885 }, { "epoch": 0.07, "learning_rate": 0.00013506493506493507, "loss": 3.0866, "step": 890 }, { "epoch": 0.07, "learning_rate": 0.0001358288770053476, "loss": 3.6331, "step": 895 }, { "epoch": 0.07, "learning_rate": 0.00013659281894576013, "loss": 2.1087, "step": 900 }, { "epoch": 0.07, "learning_rate": 0.00013735676088617266, "loss": 4.9768, "step": 905 }, { "epoch": 0.07, "learning_rate": 0.0001381207028265852, "loss": 4.974, "step": 910 }, { "epoch": 0.07, "learning_rate": 0.00013888464476699772, "loss": 5.1777, "step": 915 }, { "epoch": 0.07, "learning_rate": 0.00013964858670741022, "loss": 5.3486, "step": 920 }, { "epoch": 0.07, "learning_rate": 0.00014041252864782278, "loss": 4.2877, "step": 925 }, { "epoch": 0.07, "learning_rate": 0.0001411764705882353, "loss": 3.8871, "step": 930 }, { "epoch": 0.07, "learning_rate": 0.00014194041252864784, "loss": 3.4246, "step": 935 }, { "epoch": 0.07, "learning_rate": 0.00014270435446906037, "loss": 4.7645, "step": 940 }, { "epoch": 0.07, "learning_rate": 0.00014346829640947287, "loss": 2.9158, "step": 945 }, { "epoch": 0.07, "learning_rate": 0.00014423223834988543, "loss": 1.6789, "step": 950 }, { "epoch": 0.07, "learning_rate": 0.00014499618029029793, "loss": 4.8162, "step": 955 }, { "epoch": 0.07, "learning_rate": 0.00014576012223071048, "loss": 4.7348, "step": 960 }, { "epoch": 0.07, "learning_rate": 0.00014652406417112301, "loss": 4.8879, "step": 965 }, { "epoch": 0.07, "learning_rate": 0.00014728800611153552, "loss": 6.2023, "step": 970 }, { "epoch": 0.07, "learning_rate": 0.00014805194805194807, "loss": 3.7848, "step": 975 }, { "epoch": 0.07, "learning_rate": 0.00014881588999236057, "loss": 3.9518, "step": 980 }, { "epoch": 0.08, "learning_rate": 0.00014957983193277313, "loss": 5.0244, "step": 985 }, { "epoch": 0.08, "learning_rate": 0.00015034377387318566, "loss": 3.1289, "step": 990 }, { "epoch": 0.08, "learning_rate": 0.00015110771581359816, "loss": 2.6522, "step": 995 }, { "epoch": 0.08, "learning_rate": 0.00015187165775401072, "loss": 2.3406, "step": 1000 }, { "epoch": 0.08, "learning_rate": 0.00015263559969442322, "loss": 5.4945, "step": 1005 }, { "epoch": 0.08, "learning_rate": 0.00015339954163483578, "loss": 6.2398, "step": 1010 }, { "epoch": 0.08, "learning_rate": 0.00015416348357524828, "loss": 4.6941, "step": 1015 }, { "epoch": 0.08, "learning_rate": 0.0001549274255156608, "loss": 5.5852, "step": 1020 }, { "epoch": 0.08, "learning_rate": 0.00015569136745607337, "loss": 4.1475, "step": 1025 }, { "epoch": 0.08, "learning_rate": 0.00015645530939648587, "loss": 3.8572, "step": 1030 }, { "epoch": 0.08, "learning_rate": 0.0001572192513368984, "loss": 3.3049, "step": 1035 }, { "epoch": 0.08, "learning_rate": 0.00015798319327731093, "loss": 2.8052, "step": 1040 }, { "epoch": 0.08, "learning_rate": 0.00015874713521772346, "loss": 3.4445, "step": 1045 }, { "epoch": 0.08, "learning_rate": 0.00015951107715813598, "loss": 1.85, "step": 1050 }, { "epoch": 0.08, "learning_rate": 0.00016027501909854851, "loss": 5.0895, "step": 1055 }, { "epoch": 0.08, "learning_rate": 0.00016103896103896104, "loss": 4.1975, "step": 1060 }, { "epoch": 0.08, "learning_rate": 0.00016180290297937357, "loss": 4.8666, "step": 1065 }, { "epoch": 0.08, "learning_rate": 0.0001625668449197861, "loss": 5.3641, "step": 1070 }, { "epoch": 0.08, "learning_rate": 0.00016333078686019863, "loss": 4.3744, "step": 1075 }, { "epoch": 0.08, "learning_rate": 0.00016409472880061116, "loss": 4.8926, "step": 1080 }, { "epoch": 0.08, "learning_rate": 0.0001648586707410237, "loss": 3.8107, "step": 1085 }, { "epoch": 0.08, "learning_rate": 0.00016562261268143622, "loss": 2.8379, "step": 1090 }, { "epoch": 0.08, "learning_rate": 0.00016638655462184875, "loss": 2.4593, "step": 1095 }, { "epoch": 0.08, "learning_rate": 0.00016715049656226128, "loss": 2.2865, "step": 1100 }, { "epoch": 0.08, "learning_rate": 0.0001679144385026738, "loss": 5.5463, "step": 1105 }, { "epoch": 0.08, "learning_rate": 0.00016867838044308634, "loss": 5.9701, "step": 1110 }, { "epoch": 0.09, "learning_rate": 0.00016944232238349887, "loss": 4.5203, "step": 1115 }, { "epoch": 0.09, "learning_rate": 0.0001702062643239114, "loss": 4.7477, "step": 1120 }, { "epoch": 0.09, "learning_rate": 0.00017097020626432392, "loss": 3.3791, "step": 1125 }, { "epoch": 0.09, "learning_rate": 0.00017173414820473645, "loss": 3.7066, "step": 1130 }, { "epoch": 0.09, "learning_rate": 0.00017249809014514898, "loss": 4.1764, "step": 1135 }, { "epoch": 0.09, "learning_rate": 0.0001732620320855615, "loss": 3.2178, "step": 1140 }, { "epoch": 0.09, "learning_rate": 0.00017402597402597401, "loss": 2.541, "step": 1145 }, { "epoch": 0.09, "learning_rate": 0.00017478991596638657, "loss": 3.3618, "step": 1150 }, { "epoch": 0.09, "learning_rate": 0.0001755538579067991, "loss": 4.0588, "step": 1155 }, { "epoch": 0.09, "learning_rate": 0.00017631779984721163, "loss": 5.4387, "step": 1160 }, { "epoch": 0.09, "learning_rate": 0.00017708174178762416, "loss": 4.2572, "step": 1165 }, { "epoch": 0.09, "learning_rate": 0.00017784568372803666, "loss": 5.133, "step": 1170 }, { "epoch": 0.09, "learning_rate": 0.00017860962566844922, "loss": 4.4044, "step": 1175 }, { "epoch": 0.09, "learning_rate": 0.00017937356760886172, "loss": 3.218, "step": 1180 }, { "epoch": 0.09, "learning_rate": 0.00018013750954927428, "loss": 3.2953, "step": 1185 }, { "epoch": 0.09, "learning_rate": 0.0001809014514896868, "loss": 2.4731, "step": 1190 }, { "epoch": 0.09, "learning_rate": 0.0001816653934300993, "loss": 3.5734, "step": 1195 }, { "epoch": 0.09, "learning_rate": 0.00018242933537051186, "loss": 1.5107, "step": 1200 }, { "epoch": 0.09, "learning_rate": 0.00018319327731092437, "loss": 4.7984, "step": 1205 }, { "epoch": 0.09, "learning_rate": 0.00018395721925133692, "loss": 5.5371, "step": 1210 }, { "epoch": 0.09, "learning_rate": 0.00018472116119174942, "loss": 3.9248, "step": 1215 }, { "epoch": 0.09, "learning_rate": 0.00018548510313216195, "loss": 4.2715, "step": 1220 }, { "epoch": 0.09, "learning_rate": 0.0001862490450725745, "loss": 4.9904, "step": 1225 }, { "epoch": 0.09, "learning_rate": 0.000187012987012987, "loss": 3.2785, "step": 1230 }, { "epoch": 0.09, "learning_rate": 0.00018777692895339957, "loss": 3.5854, "step": 1235 }, { "epoch": 0.09, "learning_rate": 0.00018854087089381207, "loss": 3.5434, "step": 1240 }, { "epoch": 0.1, "learning_rate": 0.0001893048128342246, "loss": 2.4767, "step": 1245 }, { "epoch": 0.1, "learning_rate": 0.00019006875477463716, "loss": 1.1776, "step": 1250 }, { "epoch": 0.1, "learning_rate": 0.00019083269671504966, "loss": 5.5906, "step": 1255 }, { "epoch": 0.1, "learning_rate": 0.00019159663865546221, "loss": 5.1738, "step": 1260 }, { "epoch": 0.1, "learning_rate": 0.00019236058059587472, "loss": 4.7447, "step": 1265 }, { "epoch": 0.1, "learning_rate": 0.00019312452253628725, "loss": 4.5672, "step": 1270 }, { "epoch": 0.1, "learning_rate": 0.00019388846447669978, "loss": 3.4076, "step": 1275 }, { "epoch": 0.1, "learning_rate": 0.0001946524064171123, "loss": 3.9701, "step": 1280 }, { "epoch": 0.1, "learning_rate": 0.00019541634835752486, "loss": 3.6115, "step": 1285 }, { "epoch": 0.1, "learning_rate": 0.00019618029029793736, "loss": 3.1557, "step": 1290 }, { "epoch": 0.1, "learning_rate": 0.0001969442322383499, "loss": 2.698, "step": 1295 }, { "epoch": 0.1, "learning_rate": 0.00019770817417876242, "loss": 1.5998, "step": 1300 }, { "epoch": 0.1, "learning_rate": 0.00019847211611917495, "loss": 5.1922, "step": 1305 }, { "epoch": 0.1, "learning_rate": 0.00019923605805958748, "loss": 4.9844, "step": 1310 }, { "epoch": 0.1, "learning_rate": 0.0002, "loss": 3.5535, "step": 1315 }, { "epoch": 0.1, "learning_rate": 0.00019995977312039907, "loss": 3.9018, "step": 1320 }, { "epoch": 0.1, "learning_rate": 0.00019991954624079813, "loss": 4.1891, "step": 1325 }, { "epoch": 0.1, "learning_rate": 0.00019987931936119716, "loss": 3.2686, "step": 1330 }, { "epoch": 0.1, "learning_rate": 0.0001998390924815962, "loss": 3.498, "step": 1335 }, { "epoch": 0.1, "learning_rate": 0.00019979886560199525, "loss": 1.9607, "step": 1340 }, { "epoch": 0.1, "learning_rate": 0.0001997586387223943, "loss": 2.2703, "step": 1345 }, { "epoch": 0.1, "learning_rate": 0.00019971841184279336, "loss": 2.7828, "step": 1350 }, { "epoch": 0.1, "learning_rate": 0.00019967818496319242, "loss": 5.5039, "step": 1355 }, { "epoch": 0.1, "learning_rate": 0.00019963795808359148, "loss": 5.6221, "step": 1360 }, { "epoch": 0.1, "learning_rate": 0.0001995977312039905, "loss": 5.0553, "step": 1365 }, { "epoch": 0.1, "learning_rate": 0.00019955750432438957, "loss": 4.3732, "step": 1370 }, { "epoch": 0.11, "learning_rate": 0.0001995172774447886, "loss": 4.2107, "step": 1375 }, { "epoch": 0.11, "learning_rate": 0.00019947705056518766, "loss": 3.9906, "step": 1380 }, { "epoch": 0.11, "learning_rate": 0.00019943682368558672, "loss": 3.2902, "step": 1385 }, { "epoch": 0.11, "learning_rate": 0.00019939659680598577, "loss": 3.6535, "step": 1390 }, { "epoch": 0.11, "learning_rate": 0.00019935636992638483, "loss": 3.9422, "step": 1395 }, { "epoch": 0.11, "learning_rate": 0.00019931614304678386, "loss": 1.6013, "step": 1400 }, { "epoch": 0.11, "learning_rate": 0.00019927591616718292, "loss": 5.3186, "step": 1405 }, { "epoch": 0.11, "learning_rate": 0.00019923568928758195, "loss": 4.8236, "step": 1410 }, { "epoch": 0.11, "learning_rate": 0.000199195462407981, "loss": 4.2941, "step": 1415 }, { "epoch": 0.11, "learning_rate": 0.00019915523552838007, "loss": 3.9504, "step": 1420 }, { "epoch": 0.11, "learning_rate": 0.00019911500864877913, "loss": 3.9039, "step": 1425 }, { "epoch": 0.11, "learning_rate": 0.0001990747817691782, "loss": 3.1057, "step": 1430 }, { "epoch": 0.11, "learning_rate": 0.00019903455488957722, "loss": 3.3202, "step": 1435 }, { "epoch": 0.11, "learning_rate": 0.00019899432800997628, "loss": 4.9787, "step": 1440 }, { "epoch": 0.11, "learning_rate": 0.00019895410113037533, "loss": 2.4636, "step": 1445 }, { "epoch": 0.11, "learning_rate": 0.00019891387425077437, "loss": 2.2541, "step": 1450 }, { "epoch": 0.11, "learning_rate": 0.00019887364737117342, "loss": 4.0754, "step": 1455 }, { "epoch": 0.11, "learning_rate": 0.00019883342049157248, "loss": 5.0287, "step": 1460 }, { "epoch": 0.11, "learning_rate": 0.00019879319361197154, "loss": 4.4176, "step": 1465 }, { "epoch": 0.11, "learning_rate": 0.00019875296673237057, "loss": 4.0273, "step": 1470 }, { "epoch": 0.11, "learning_rate": 0.00019871273985276963, "loss": 4.6488, "step": 1475 }, { "epoch": 0.11, "learning_rate": 0.0001986725129731687, "loss": 3.7554, "step": 1480 }, { "epoch": 0.11, "learning_rate": 0.00019863228609356772, "loss": 4.065, "step": 1485 }, { "epoch": 0.11, "learning_rate": 0.00019859205921396678, "loss": 3.813, "step": 1490 }, { "epoch": 0.11, "learning_rate": 0.00019855183233436584, "loss": 2.2622, "step": 1495 }, { "epoch": 0.11, "learning_rate": 0.0001985116054547649, "loss": 1.2104, "step": 1500 }, { "epoch": 0.12, "learning_rate": 0.00019847137857516392, "loss": 4.7545, "step": 1505 }, { "epoch": 0.12, "learning_rate": 0.00019843115169556298, "loss": 5.3623, "step": 1510 }, { "epoch": 0.12, "learning_rate": 0.00019839092481596204, "loss": 3.8675, "step": 1515 }, { "epoch": 0.12, "learning_rate": 0.0001983506979363611, "loss": 3.4828, "step": 1520 }, { "epoch": 0.12, "learning_rate": 0.00019831047105676013, "loss": 3.7339, "step": 1525 }, { "epoch": 0.12, "learning_rate": 0.0001982702441771592, "loss": 3.509, "step": 1530 }, { "epoch": 0.12, "learning_rate": 0.00019823001729755825, "loss": 3.3487, "step": 1535 }, { "epoch": 0.12, "learning_rate": 0.00019818979041795728, "loss": 5.2811, "step": 1540 }, { "epoch": 0.12, "learning_rate": 0.00019814956353835634, "loss": 2.0317, "step": 1545 }, { "epoch": 0.12, "learning_rate": 0.0001981093366587554, "loss": 1.7232, "step": 1550 }, { "epoch": 0.12, "learning_rate": 0.00019806910977915445, "loss": 3.9217, "step": 1555 }, { "epoch": 0.12, "learning_rate": 0.00019802888289955348, "loss": 4.3631, "step": 1560 }, { "epoch": 0.12, "learning_rate": 0.00019798865601995254, "loss": 6.2506, "step": 1565 }, { "epoch": 0.12, "learning_rate": 0.00019795647451627176, "loss": 4.4891, "step": 1570 }, { "epoch": 0.12, "learning_rate": 0.00019791624763667082, "loss": 3.79, "step": 1575 }, { "epoch": 0.12, "learning_rate": 0.00019787602075706988, "loss": 4.5018, "step": 1580 }, { "epoch": 0.12, "learning_rate": 0.00019783579387746894, "loss": 3.6576, "step": 1585 }, { "epoch": 0.12, "learning_rate": 0.000197795566997868, "loss": 3.0058, "step": 1590 }, { "epoch": 0.12, "learning_rate": 0.00019775534011826705, "loss": 3.6505, "step": 1595 }, { "epoch": 0.12, "learning_rate": 0.00019771511323866608, "loss": 1.5467, "step": 1600 }, { "epoch": 0.12, "learning_rate": 0.00019767488635906514, "loss": 4.4375, "step": 1605 }, { "epoch": 0.12, "learning_rate": 0.00019763465947946417, "loss": 4.4432, "step": 1610 }, { "epoch": 0.12, "learning_rate": 0.00019759443259986323, "loss": 4.8396, "step": 1615 }, { "epoch": 0.12, "learning_rate": 0.0001975542057202623, "loss": 4.9068, "step": 1620 }, { "epoch": 0.12, "learning_rate": 0.00019751397884066135, "loss": 3.3321, "step": 1625 }, { "epoch": 0.12, "learning_rate": 0.0001974737519610604, "loss": 3.0243, "step": 1630 }, { "epoch": 0.12, "learning_rate": 0.00019743352508145944, "loss": 3.6723, "step": 1635 }, { "epoch": 0.13, "learning_rate": 0.0001973932982018585, "loss": 3.564, "step": 1640 }, { "epoch": 0.13, "learning_rate": 0.00019735307132225753, "loss": 1.2411, "step": 1645 }, { "epoch": 0.13, "learning_rate": 0.00019731284444265659, "loss": 0.7007, "step": 1650 }, { "epoch": 0.13, "learning_rate": 0.00019727261756305564, "loss": 4.9119, "step": 1655 }, { "epoch": 0.13, "learning_rate": 0.0001972323906834547, "loss": 6.3875, "step": 1660 }, { "epoch": 0.13, "learning_rate": 0.00019719216380385376, "loss": 4.9604, "step": 1665 }, { "epoch": 0.13, "learning_rate": 0.0001971519369242528, "loss": 5.1688, "step": 1670 }, { "epoch": 0.13, "learning_rate": 0.00019711171004465185, "loss": 3.3456, "step": 1675 }, { "epoch": 0.13, "learning_rate": 0.0001970714831650509, "loss": 4.251, "step": 1680 }, { "epoch": 0.13, "learning_rate": 0.00019703125628544994, "loss": 2.4675, "step": 1685 }, { "epoch": 0.13, "learning_rate": 0.000196991029405849, "loss": 3.2748, "step": 1690 }, { "epoch": 0.13, "learning_rate": 0.00019695080252624806, "loss": 2.7843, "step": 1695 }, { "epoch": 0.13, "learning_rate": 0.00019691057564664709, "loss": 2.922, "step": 1700 }, { "epoch": 0.13, "learning_rate": 0.00019687034876704614, "loss": 4.8305, "step": 1705 }, { "epoch": 0.13, "learning_rate": 0.0001968301218874452, "loss": 4.2068, "step": 1710 }, { "epoch": 0.13, "learning_rate": 0.00019678989500784426, "loss": 4.2574, "step": 1715 }, { "epoch": 0.13, "learning_rate": 0.0001967496681282433, "loss": 3.9824, "step": 1720 }, { "epoch": 0.13, "learning_rate": 0.00019670944124864235, "loss": 4.5187, "step": 1725 }, { "epoch": 0.13, "learning_rate": 0.0001966692143690414, "loss": 3.1695, "step": 1730 }, { "epoch": 0.13, "learning_rate": 0.00019662898748944044, "loss": 2.4495, "step": 1735 }, { "epoch": 0.13, "learning_rate": 0.0001965887606098395, "loss": 3.9741, "step": 1740 }, { "epoch": 0.13, "learning_rate": 0.00019654853373023856, "loss": 3.6335, "step": 1745 }, { "epoch": 0.13, "learning_rate": 0.00019650830685063761, "loss": 1.5263, "step": 1750 }, { "epoch": 0.13, "learning_rate": 0.00019646807997103667, "loss": 4.2363, "step": 1755 }, { "epoch": 0.13, "learning_rate": 0.0001964278530914357, "loss": 4.9076, "step": 1760 }, { "epoch": 0.13, "learning_rate": 0.00019638762621183476, "loss": 4.7342, "step": 1765 }, { "epoch": 0.14, "learning_rate": 0.0001963473993322338, "loss": 6.3031, "step": 1770 }, { "epoch": 0.14, "learning_rate": 0.00019630717245263285, "loss": 4.618, "step": 1775 }, { "epoch": 0.14, "learning_rate": 0.0001962669455730319, "loss": 3.8465, "step": 1780 }, { "epoch": 0.14, "learning_rate": 0.00019622671869343097, "loss": 2.1621, "step": 1785 }, { "epoch": 0.14, "learning_rate": 0.00019618649181383003, "loss": 2.1442, "step": 1790 }, { "epoch": 0.14, "learning_rate": 0.00019614626493422906, "loss": 2.3709, "step": 1795 }, { "epoch": 0.14, "learning_rate": 0.00019610603805462812, "loss": 3.6523, "step": 1800 }, { "epoch": 0.14, "learning_rate": 0.00019606581117502715, "loss": 5.8377, "step": 1805 }, { "epoch": 0.14, "learning_rate": 0.0001960255842954262, "loss": 4.6049, "step": 1810 }, { "epoch": 0.14, "learning_rate": 0.00019598535741582526, "loss": 4.4676, "step": 1815 }, { "epoch": 0.14, "learning_rate": 0.00019594513053622432, "loss": 4.2148, "step": 1820 }, { "epoch": 0.14, "learning_rate": 0.00019590490365662338, "loss": 4.3605, "step": 1825 }, { "epoch": 0.14, "learning_rate": 0.00019586467677702244, "loss": 3.4217, "step": 1830 }, { "epoch": 0.14, "learning_rate": 0.00019582444989742147, "loss": 4.5377, "step": 1835 }, { "epoch": 0.14, "learning_rate": 0.0001957842230178205, "loss": 2.128, "step": 1840 }, { "epoch": 0.14, "learning_rate": 0.00019574399613821956, "loss": 2.0646, "step": 1845 }, { "epoch": 0.14, "learning_rate": 0.00019570376925861862, "loss": 2.895, "step": 1850 }, { "epoch": 0.14, "learning_rate": 0.00019566354237901768, "loss": 4.5703, "step": 1855 }, { "epoch": 0.14, "learning_rate": 0.00019562331549941673, "loss": 4.5592, "step": 1860 }, { "epoch": 0.14, "learning_rate": 0.0001955830886198158, "loss": 5.3285, "step": 1865 }, { "epoch": 0.14, "learning_rate": 0.00019554286174021482, "loss": 4.4393, "step": 1870 }, { "epoch": 0.14, "learning_rate": 0.00019550263486061385, "loss": 4.2725, "step": 1875 }, { "epoch": 0.14, "learning_rate": 0.0001954624079810129, "loss": 2.5464, "step": 1880 }, { "epoch": 0.14, "learning_rate": 0.00019542218110141197, "loss": 3.3585, "step": 1885 }, { "epoch": 0.14, "learning_rate": 0.00019538195422181103, "loss": 2.9919, "step": 1890 }, { "epoch": 0.14, "learning_rate": 0.0001953417273422101, "loss": 1.8249, "step": 1895 }, { "epoch": 0.15, "learning_rate": 0.00019530150046260915, "loss": 2.5888, "step": 1900 }, { "epoch": 0.15, "learning_rate": 0.00019526127358300818, "loss": 5.8953, "step": 1905 }, { "epoch": 0.15, "learning_rate": 0.0001952210467034072, "loss": 5.2256, "step": 1910 }, { "epoch": 0.15, "learning_rate": 0.00019518081982380627, "loss": 4.6908, "step": 1915 }, { "epoch": 0.15, "learning_rate": 0.00019514059294420532, "loss": 3.9816, "step": 1920 }, { "epoch": 0.15, "learning_rate": 0.00019510036606460438, "loss": 3.4984, "step": 1925 }, { "epoch": 0.15, "learning_rate": 0.00019506013918500344, "loss": 2.786, "step": 1930 }, { "epoch": 0.15, "learning_rate": 0.0001950199123054025, "loss": 2.0924, "step": 1935 }, { "epoch": 0.15, "learning_rate": 0.00019497968542580153, "loss": 2.1778, "step": 1940 }, { "epoch": 0.15, "learning_rate": 0.00019493945854620056, "loss": 3.1165, "step": 1945 }, { "epoch": 0.15, "learning_rate": 0.00019489923166659962, "loss": 1.3559, "step": 1950 }, { "epoch": 0.15, "learning_rate": 0.00019485900478699868, "loss": 4.5992, "step": 1955 }, { "epoch": 0.15, "learning_rate": 0.00019481877790739774, "loss": 4.9736, "step": 1960 }, { "epoch": 0.15, "learning_rate": 0.0001947785510277968, "loss": 5.0469, "step": 1965 }, { "epoch": 0.15, "learning_rate": 0.00019473832414819585, "loss": 4.3723, "step": 1970 }, { "epoch": 0.15, "learning_rate": 0.00019469809726859488, "loss": 3.779, "step": 1975 }, { "epoch": 0.15, "learning_rate": 0.00019465787038899394, "loss": 4.1662, "step": 1980 }, { "epoch": 0.15, "learning_rate": 0.00019461764350939297, "loss": 3.2694, "step": 1985 }, { "epoch": 0.15, "learning_rate": 0.00019457741662979203, "loss": 2.2375, "step": 1990 }, { "epoch": 0.15, "learning_rate": 0.0001945371897501911, "loss": 2.9865, "step": 1995 }, { "epoch": 0.15, "learning_rate": 0.00019449696287059015, "loss": 1.6985, "step": 2000 }, { "epoch": 0.15, "learning_rate": 0.00019445673599098918, "loss": 4.2811, "step": 2005 }, { "epoch": 0.15, "learning_rate": 0.00019441650911138824, "loss": 4.726, "step": 2010 }, { "epoch": 0.15, "learning_rate": 0.0001943762822317873, "loss": 4.2904, "step": 2015 }, { "epoch": 0.15, "learning_rate": 0.00019433605535218633, "loss": 3.7736, "step": 2020 }, { "epoch": 0.15, "learning_rate": 0.00019429582847258538, "loss": 3.3697, "step": 2025 }, { "epoch": 0.16, "learning_rate": 0.00019425560159298444, "loss": 3.1179, "step": 2030 }, { "epoch": 0.16, "learning_rate": 0.0001942153747133835, "loss": 2.9869, "step": 2035 }, { "epoch": 0.16, "learning_rate": 0.00019417514783378253, "loss": 3.0633, "step": 2040 }, { "epoch": 0.16, "learning_rate": 0.0001941349209541816, "loss": 3.2251, "step": 2045 }, { "epoch": 0.16, "learning_rate": 0.00019409469407458065, "loss": 0.7779, "step": 2050 }, { "epoch": 0.16, "learning_rate": 0.0001940544671949797, "loss": 4.3598, "step": 2055 }, { "epoch": 0.16, "learning_rate": 0.00019401424031537874, "loss": 4.9221, "step": 2060 }, { "epoch": 0.16, "learning_rate": 0.0001939740134357778, "loss": 4.2301, "step": 2065 }, { "epoch": 0.16, "learning_rate": 0.00019393378655617685, "loss": 5.2469, "step": 2070 }, { "epoch": 0.16, "learning_rate": 0.00019389355967657589, "loss": 3.3051, "step": 2075 }, { "epoch": 0.16, "learning_rate": 0.00019385333279697494, "loss": 3.6274, "step": 2080 }, { "epoch": 0.16, "learning_rate": 0.000193813105917374, "loss": 4.6274, "step": 2085 }, { "epoch": 0.16, "learning_rate": 0.00019377287903777306, "loss": 2.9797, "step": 2090 }, { "epoch": 0.16, "learning_rate": 0.0001937326521581721, "loss": 2.6226, "step": 2095 }, { "epoch": 0.16, "learning_rate": 0.00019369242527857115, "loss": 2.4438, "step": 2100 }, { "epoch": 0.16, "learning_rate": 0.0001936521983989702, "loss": 4.2717, "step": 2105 }, { "epoch": 0.16, "learning_rate": 0.00019361197151936924, "loss": 4.4154, "step": 2110 }, { "epoch": 0.16, "learning_rate": 0.0001935717446397683, "loss": 5.1223, "step": 2115 }, { "epoch": 0.16, "learning_rate": 0.00019353151776016736, "loss": 4.3166, "step": 2120 }, { "epoch": 0.16, "learning_rate": 0.00019349129088056641, "loss": 3.5363, "step": 2125 }, { "epoch": 0.16, "learning_rate": 0.00019345106400096547, "loss": 4.075, "step": 2130 }, { "epoch": 0.16, "learning_rate": 0.0001934108371213645, "loss": 2.5993, "step": 2135 }, { "epoch": 0.16, "learning_rate": 0.00019337061024176356, "loss": 2.8878, "step": 2140 }, { "epoch": 0.16, "learning_rate": 0.0001933303833621626, "loss": 2.9542, "step": 2145 }, { "epoch": 0.16, "learning_rate": 0.00019329015648256165, "loss": 3.0859, "step": 2150 }, { "epoch": 0.16, "learning_rate": 0.0001932499296029607, "loss": 5.251, "step": 2155 }, { "epoch": 0.17, "learning_rate": 0.00019320970272335977, "loss": 4.7705, "step": 2160 }, { "epoch": 0.17, "learning_rate": 0.00019316947584375883, "loss": 4.5398, "step": 2165 }, { "epoch": 0.17, "learning_rate": 0.00019312924896415786, "loss": 4.2553, "step": 2170 }, { "epoch": 0.17, "learning_rate": 0.00019308902208455691, "loss": 5.5578, "step": 2175 }, { "epoch": 0.17, "learning_rate": 0.00019304879520495595, "loss": 3.7627, "step": 2180 }, { "epoch": 0.17, "learning_rate": 0.000193008568325355, "loss": 3.8887, "step": 2185 }, { "epoch": 0.17, "learning_rate": 0.00019296834144575406, "loss": 3.8691, "step": 2190 }, { "epoch": 0.17, "learning_rate": 0.00019292811456615312, "loss": 2.8983, "step": 2195 }, { "epoch": 0.17, "learning_rate": 0.00019288788768655218, "loss": 1.3964, "step": 2200 }, { "epoch": 0.17, "learning_rate": 0.00019284766080695124, "loss": 5.0994, "step": 2205 }, { "epoch": 0.17, "learning_rate": 0.00019280743392735027, "loss": 5.5555, "step": 2210 }, { "epoch": 0.17, "learning_rate": 0.0001927672070477493, "loss": 4.6605, "step": 2215 }, { "epoch": 0.17, "learning_rate": 0.00019272698016814836, "loss": 3.9756, "step": 2220 }, { "epoch": 0.17, "learning_rate": 0.00019268675328854742, "loss": 4.1566, "step": 2225 }, { "epoch": 0.17, "learning_rate": 0.00019264652640894647, "loss": 3.3558, "step": 2230 }, { "epoch": 0.17, "learning_rate": 0.00019260629952934553, "loss": 4.5201, "step": 2235 }, { "epoch": 0.17, "learning_rate": 0.0001925660726497446, "loss": 3.4633, "step": 2240 }, { "epoch": 0.17, "learning_rate": 0.00019252584577014362, "loss": 3.0613, "step": 2245 }, { "epoch": 0.17, "learning_rate": 0.00019248561889054265, "loss": 1.4768, "step": 2250 }, { "epoch": 0.17, "learning_rate": 0.0001924453920109417, "loss": 5.6229, "step": 2255 }, { "epoch": 0.17, "learning_rate": 0.00019240516513134077, "loss": 4.9016, "step": 2260 }, { "epoch": 0.17, "learning_rate": 0.00019236493825173983, "loss": 4.9314, "step": 2265 }, { "epoch": 0.17, "learning_rate": 0.00019232471137213889, "loss": 3.6584, "step": 2270 }, { "epoch": 0.17, "learning_rate": 0.00019228448449253794, "loss": 2.9309, "step": 2275 }, { "epoch": 0.17, "learning_rate": 0.00019224425761293698, "loss": 5.1328, "step": 2280 }, { "epoch": 0.17, "learning_rate": 0.000192204030733336, "loss": 1.9352, "step": 2285 }, { "epoch": 0.18, "learning_rate": 0.00019216380385373506, "loss": 2.3981, "step": 2290 }, { "epoch": 0.18, "learning_rate": 0.00019212357697413412, "loss": 4.707, "step": 2295 }, { "epoch": 0.18, "learning_rate": 0.00019208335009453318, "loss": 0.5809, "step": 2300 }, { "epoch": 0.18, "learning_rate": 0.00019204312321493224, "loss": 5.2449, "step": 2305 }, { "epoch": 0.18, "learning_rate": 0.00019200289633533127, "loss": 4.7281, "step": 2310 }, { "epoch": 0.18, "learning_rate": 0.00019196266945573033, "loss": 4.5684, "step": 2315 }, { "epoch": 0.18, "learning_rate": 0.00019192244257612936, "loss": 4.249, "step": 2320 }, { "epoch": 0.18, "learning_rate": 0.00019188221569652842, "loss": 3.5209, "step": 2325 }, { "epoch": 0.18, "learning_rate": 0.00019184198881692748, "loss": 3.7265, "step": 2330 }, { "epoch": 0.18, "learning_rate": 0.00019180176193732653, "loss": 3.7072, "step": 2335 }, { "epoch": 0.18, "learning_rate": 0.0001917615350577256, "loss": 2.195, "step": 2340 }, { "epoch": 0.18, "learning_rate": 0.00019172130817812462, "loss": 3.1261, "step": 2345 }, { "epoch": 0.18, "learning_rate": 0.00019168108129852368, "loss": 1.413, "step": 2350 }, { "epoch": 0.18, "learning_rate": 0.00019164085441892274, "loss": 4.6439, "step": 2355 }, { "epoch": 0.18, "learning_rate": 0.00019160062753932177, "loss": 6.491, "step": 2360 }, { "epoch": 0.18, "learning_rate": 0.00019156040065972083, "loss": 4.1846, "step": 2365 }, { "epoch": 0.18, "learning_rate": 0.0001915201737801199, "loss": 4.8385, "step": 2370 }, { "epoch": 0.18, "learning_rate": 0.00019147994690051895, "loss": 4.0875, "step": 2375 }, { "epoch": 0.18, "learning_rate": 0.00019143972002091798, "loss": 3.4586, "step": 2380 }, { "epoch": 0.18, "learning_rate": 0.00019139949314131704, "loss": 3.9514, "step": 2385 }, { "epoch": 0.18, "learning_rate": 0.0001913592662617161, "loss": 1.9388, "step": 2390 }, { "epoch": 0.18, "learning_rate": 0.00019131903938211513, "loss": 3.4899, "step": 2395 }, { "epoch": 0.18, "learning_rate": 0.00019127881250251418, "loss": 0.8589, "step": 2400 }, { "epoch": 0.18, "learning_rate": 0.00019123858562291324, "loss": 5.1305, "step": 2405 }, { "epoch": 0.18, "learning_rate": 0.0001911983587433123, "loss": 5.6215, "step": 2410 }, { "epoch": 0.18, "learning_rate": 0.00019115813186371133, "loss": 5.152, "step": 2415 }, { "epoch": 0.18, "learning_rate": 0.0001911179049841104, "loss": 4.2031, "step": 2420 }, { "epoch": 0.19, "learning_rate": 0.00019107767810450945, "loss": 3.8862, "step": 2425 }, { "epoch": 0.19, "learning_rate": 0.0001910374512249085, "loss": 3.2398, "step": 2430 }, { "epoch": 0.19, "learning_rate": 0.00019099722434530754, "loss": 3.3589, "step": 2435 }, { "epoch": 0.19, "learning_rate": 0.0001909569974657066, "loss": 1.6699, "step": 2440 }, { "epoch": 0.19, "learning_rate": 0.00019091677058610565, "loss": 2.6013, "step": 2445 }, { "epoch": 0.19, "learning_rate": 0.00019087654370650468, "loss": 1.346, "step": 2450 }, { "epoch": 0.19, "learning_rate": 0.00019083631682690374, "loss": 5.7719, "step": 2455 }, { "epoch": 0.19, "learning_rate": 0.0001907960899473028, "loss": 4.8369, "step": 2460 }, { "epoch": 0.19, "learning_rate": 0.00019075586306770186, "loss": 3.7029, "step": 2465 }, { "epoch": 0.19, "learning_rate": 0.0001907156361881009, "loss": 2.6815, "step": 2470 }, { "epoch": 0.19, "learning_rate": 0.00019067540930849995, "loss": 4.5318, "step": 2475 }, { "epoch": 0.19, "learning_rate": 0.000190635182428899, "loss": 4.4757, "step": 2480 }, { "epoch": 0.19, "learning_rate": 0.00019059495554929804, "loss": 3.0317, "step": 2485 }, { "epoch": 0.19, "learning_rate": 0.0001905547286696971, "loss": 3.3823, "step": 2490 }, { "epoch": 0.19, "learning_rate": 0.00019051450179009615, "loss": 2.1039, "step": 2495 }, { "epoch": 0.19, "learning_rate": 0.0001904742749104952, "loss": 1.3054, "step": 2500 }, { "epoch": 0.19, "learning_rate": 0.00019043404803089427, "loss": 4.5176, "step": 2505 }, { "epoch": 0.19, "learning_rate": 0.0001903938211512933, "loss": 5.5738, "step": 2510 }, { "epoch": 0.19, "learning_rate": 0.00019035359427169236, "loss": 4.4791, "step": 2515 }, { "epoch": 0.19, "learning_rate": 0.0001903133673920914, "loss": 3.3446, "step": 2520 }, { "epoch": 0.19, "learning_rate": 0.00019027314051249045, "loss": 3.6072, "step": 2525 }, { "epoch": 0.19, "learning_rate": 0.0001902329136328895, "loss": 3.7518, "step": 2530 }, { "epoch": 0.19, "learning_rate": 0.00019019268675328857, "loss": 3.9649, "step": 2535 }, { "epoch": 0.19, "learning_rate": 0.00019015245987368762, "loss": 2.0974, "step": 2540 }, { "epoch": 0.19, "learning_rate": 0.00019011223299408666, "loss": 1.7716, "step": 2545 }, { "epoch": 0.19, "learning_rate": 0.00019007200611448571, "loss": 2.7839, "step": 2550 }, { "epoch": 0.2, "learning_rate": 0.00019003177923488474, "loss": 5.41, "step": 2555 }, { "epoch": 0.2, "learning_rate": 0.0001899915523552838, "loss": 4.9215, "step": 2560 }, { "epoch": 0.2, "learning_rate": 0.00018995132547568286, "loss": 3.7002, "step": 2565 }, { "epoch": 0.2, "learning_rate": 0.00018991109859608192, "loss": 4.8951, "step": 2570 }, { "epoch": 0.2, "learning_rate": 0.00018987087171648098, "loss": 2.5297, "step": 2575 }, { "epoch": 0.2, "learning_rate": 0.00018983064483688004, "loss": 2.9542, "step": 2580 }, { "epoch": 0.2, "learning_rate": 0.00018979041795727907, "loss": 2.4865, "step": 2585 }, { "epoch": 0.2, "learning_rate": 0.0001897501910776781, "loss": 2.3631, "step": 2590 }, { "epoch": 0.2, "learning_rate": 0.00018970996419807716, "loss": 2.8949, "step": 2595 }, { "epoch": 0.2, "learning_rate": 0.00018966973731847621, "loss": 0.6757, "step": 2600 }, { "epoch": 0.2, "learning_rate": 0.00018962951043887527, "loss": 4.5512, "step": 2605 }, { "epoch": 0.2, "learning_rate": 0.00018958928355927433, "loss": 5.1891, "step": 2610 }, { "epoch": 0.2, "learning_rate": 0.00018954905667967336, "loss": 5.5706, "step": 2615 }, { "epoch": 0.2, "learning_rate": 0.0001895088298000724, "loss": 6.3279, "step": 2620 }, { "epoch": 0.2, "learning_rate": 0.00018946860292047145, "loss": 4.7213, "step": 2625 }, { "epoch": 0.2, "learning_rate": 0.0001894283760408705, "loss": 3.6586, "step": 2630 }, { "epoch": 0.2, "learning_rate": 0.00018938814916126957, "loss": 3.4731, "step": 2635 }, { "epoch": 0.2, "learning_rate": 0.00018934792228166863, "loss": 2.9237, "step": 2640 }, { "epoch": 0.2, "learning_rate": 0.00018930769540206768, "loss": 1.2635, "step": 2645 }, { "epoch": 0.2, "learning_rate": 0.00018926746852246672, "loss": 2.0249, "step": 2650 }, { "epoch": 0.2, "learning_rate": 0.00018922724164286577, "loss": 4.6764, "step": 2655 }, { "epoch": 0.2, "learning_rate": 0.0001891870147632648, "loss": 5.0914, "step": 2660 }, { "epoch": 0.2, "learning_rate": 0.00018914678788366386, "loss": 4.6342, "step": 2665 }, { "epoch": 0.2, "learning_rate": 0.00018910656100406292, "loss": 4.3582, "step": 2670 }, { "epoch": 0.2, "learning_rate": 0.00018906633412446198, "loss": 3.7621, "step": 2675 }, { "epoch": 0.2, "learning_rate": 0.00018902610724486104, "loss": 3.5162, "step": 2680 }, { "epoch": 0.21, "learning_rate": 0.00018898588036526007, "loss": 2.9408, "step": 2685 }, { "epoch": 0.21, "learning_rate": 0.00018894565348565913, "loss": 2.3332, "step": 2690 }, { "epoch": 0.21, "learning_rate": 0.00018890542660605816, "loss": 2.6293, "step": 2695 }, { "epoch": 0.21, "learning_rate": 0.00018886519972645722, "loss": 0.7619, "step": 2700 }, { "epoch": 0.21, "learning_rate": 0.00018882497284685628, "loss": 5.459, "step": 2705 }, { "epoch": 0.21, "learning_rate": 0.00018878474596725533, "loss": 4.375, "step": 2710 }, { "epoch": 0.21, "learning_rate": 0.0001887445190876544, "loss": 5.0764, "step": 2715 }, { "epoch": 0.21, "learning_rate": 0.00018870429220805342, "loss": 3.6271, "step": 2720 }, { "epoch": 0.21, "learning_rate": 0.00018866406532845248, "loss": 3.8543, "step": 2725 }, { "epoch": 0.21, "learning_rate": 0.00018862383844885154, "loss": 2.9905, "step": 2730 }, { "epoch": 0.21, "learning_rate": 0.00018858361156925057, "loss": 3.3131, "step": 2735 }, { "epoch": 0.21, "learning_rate": 0.00018854338468964963, "loss": 2.6086, "step": 2740 }, { "epoch": 0.21, "learning_rate": 0.0001885031578100487, "loss": 1.4445, "step": 2745 }, { "epoch": 0.21, "learning_rate": 0.00018846293093044775, "loss": 1.3326, "step": 2750 }, { "epoch": 0.21, "learning_rate": 0.00018842270405084678, "loss": 3.508, "step": 2755 }, { "epoch": 0.21, "learning_rate": 0.00018838247717124583, "loss": 4.6805, "step": 2760 }, { "epoch": 0.21, "learning_rate": 0.0001883422502916449, "loss": 5.0881, "step": 2765 }, { "epoch": 0.21, "learning_rate": 0.00018830202341204392, "loss": 4.0855, "step": 2770 }, { "epoch": 0.21, "learning_rate": 0.00018826179653244298, "loss": 2.9991, "step": 2775 }, { "epoch": 0.21, "learning_rate": 0.00018822156965284204, "loss": 2.9354, "step": 2780 }, { "epoch": 0.21, "learning_rate": 0.0001881813427732411, "loss": 1.5435, "step": 2785 }, { "epoch": 0.21, "learning_rate": 0.00018814111589364013, "loss": 2.5249, "step": 2790 }, { "epoch": 0.21, "learning_rate": 0.0001881008890140392, "loss": 1.2265, "step": 2795 }, { "epoch": 0.21, "learning_rate": 0.00018806066213443825, "loss": 0.833, "step": 2800 }, { "epoch": 0.21, "learning_rate": 0.0001880204352548373, "loss": 5.0906, "step": 2805 }, { "epoch": 0.21, "learning_rate": 0.00018798020837523634, "loss": 5.0311, "step": 2810 }, { "epoch": 0.22, "learning_rate": 0.0001879399814956354, "loss": 5.8641, "step": 2815 }, { "epoch": 0.22, "learning_rate": 0.00018789975461603445, "loss": 4.1654, "step": 2820 }, { "epoch": 0.22, "learning_rate": 0.00018785952773643348, "loss": 3.4861, "step": 2825 }, { "epoch": 0.22, "learning_rate": 0.00018781930085683254, "loss": 3.3804, "step": 2830 }, { "epoch": 0.22, "learning_rate": 0.0001877790739772316, "loss": 4.5379, "step": 2835 }, { "epoch": 0.22, "learning_rate": 0.00018773884709763066, "loss": 1.8242, "step": 2840 }, { "epoch": 0.22, "learning_rate": 0.0001876986202180297, "loss": 3.9177, "step": 2845 }, { "epoch": 0.22, "learning_rate": 0.00018765839333842875, "loss": 0.7359, "step": 2850 }, { "epoch": 0.22, "learning_rate": 0.0001876181664588278, "loss": 6.2848, "step": 2855 }, { "epoch": 0.22, "learning_rate": 0.00018757793957922684, "loss": 4.1291, "step": 2860 }, { "epoch": 0.22, "learning_rate": 0.0001875377126996259, "loss": 4.0441, "step": 2865 }, { "epoch": 0.22, "learning_rate": 0.00018749748582002495, "loss": 3.3439, "step": 2870 }, { "epoch": 0.22, "learning_rate": 0.000187457258940424, "loss": 3.2429, "step": 2875 }, { "epoch": 0.22, "learning_rate": 0.00018741703206082307, "loss": 3.4903, "step": 2880 }, { "epoch": 0.22, "learning_rate": 0.0001873768051812221, "loss": 3.8007, "step": 2885 }, { "epoch": 0.22, "learning_rate": 0.00018733657830162116, "loss": 2.9526, "step": 2890 }, { "epoch": 0.22, "learning_rate": 0.0001872963514220202, "loss": 3.0772, "step": 2895 }, { "epoch": 0.22, "learning_rate": 0.00018725612454241925, "loss": 0.8114, "step": 2900 }, { "epoch": 0.22, "learning_rate": 0.0001872158976628183, "loss": 5.1357, "step": 2905 }, { "epoch": 0.22, "learning_rate": 0.00018717567078321737, "loss": 5.2641, "step": 2910 }, { "epoch": 0.22, "learning_rate": 0.00018713544390361642, "loss": 4.9559, "step": 2915 }, { "epoch": 0.22, "learning_rate": 0.00018709521702401545, "loss": 4.3744, "step": 2920 }, { "epoch": 0.22, "learning_rate": 0.00018705499014441449, "loss": 4.2383, "step": 2925 }, { "epoch": 0.22, "learning_rate": 0.00018701476326481354, "loss": 3.8998, "step": 2930 }, { "epoch": 0.22, "learning_rate": 0.0001869745363852126, "loss": 2.7238, "step": 2935 }, { "epoch": 0.22, "learning_rate": 0.00018693430950561166, "loss": 2.415, "step": 2940 }, { "epoch": 0.23, "learning_rate": 0.00018689408262601072, "loss": 2.7549, "step": 2945 }, { "epoch": 0.23, "learning_rate": 0.00018685385574640978, "loss": 2.9196, "step": 2950 }, { "epoch": 0.23, "learning_rate": 0.0001868136288668088, "loss": 4.4637, "step": 2955 }, { "epoch": 0.23, "learning_rate": 0.00018677340198720784, "loss": 4.441, "step": 2960 }, { "epoch": 0.23, "learning_rate": 0.0001867331751076069, "loss": 4.4455, "step": 2965 }, { "epoch": 0.23, "learning_rate": 0.00018669294822800596, "loss": 3.5803, "step": 2970 }, { "epoch": 0.23, "learning_rate": 0.00018665272134840501, "loss": 3.7873, "step": 2975 }, { "epoch": 0.23, "learning_rate": 0.00018661249446880407, "loss": 4.2402, "step": 2980 }, { "epoch": 0.23, "learning_rate": 0.00018657226758920313, "loss": 4.591, "step": 2985 }, { "epoch": 0.23, "learning_rate": 0.00018653204070960216, "loss": 2.1384, "step": 2990 }, { "epoch": 0.23, "learning_rate": 0.0001864918138300012, "loss": 1.5015, "step": 2995 }, { "epoch": 0.23, "learning_rate": 0.00018645158695040025, "loss": 1.4467, "step": 3000 }, { "epoch": 0.23, "learning_rate": 0.0001864113600707993, "loss": 5.4559, "step": 3005 }, { "epoch": 0.23, "learning_rate": 0.00018637113319119837, "loss": 4.7643, "step": 3010 }, { "epoch": 0.23, "learning_rate": 0.00018633090631159743, "loss": 5.4004, "step": 3015 }, { "epoch": 0.23, "learning_rate": 0.00018629067943199648, "loss": 3.9115, "step": 3020 }, { "epoch": 0.23, "learning_rate": 0.00018625045255239552, "loss": 3.2608, "step": 3025 }, { "epoch": 0.23, "learning_rate": 0.00018621022567279457, "loss": 3.4641, "step": 3030 }, { "epoch": 0.23, "learning_rate": 0.0001861699987931936, "loss": 3.2434, "step": 3035 }, { "epoch": 0.23, "learning_rate": 0.00018612977191359266, "loss": 2.1317, "step": 3040 }, { "epoch": 0.23, "learning_rate": 0.00018608954503399172, "loss": 2.0085, "step": 3045 }, { "epoch": 0.23, "learning_rate": 0.00018604931815439078, "loss": 0.4546, "step": 3050 }, { "epoch": 0.23, "learning_rate": 0.00018600909127478984, "loss": 4.4299, "step": 3055 }, { "epoch": 0.23, "learning_rate": 0.00018596886439518887, "loss": 5.439, "step": 3060 }, { "epoch": 0.23, "learning_rate": 0.00018592863751558793, "loss": 5.1666, "step": 3065 }, { "epoch": 0.23, "learning_rate": 0.00018588841063598696, "loss": 4.3708, "step": 3070 }, { "epoch": 0.24, "learning_rate": 0.00018584818375638602, "loss": 4.1246, "step": 3075 }, { "epoch": 0.24, "learning_rate": 0.00018580795687678507, "loss": 4.082, "step": 3080 }, { "epoch": 0.24, "learning_rate": 0.00018576772999718413, "loss": 2.4525, "step": 3085 }, { "epoch": 0.24, "learning_rate": 0.0001857275031175832, "loss": 3.7855, "step": 3090 }, { "epoch": 0.24, "learning_rate": 0.00018568727623798222, "loss": 1.7568, "step": 3095 }, { "epoch": 0.24, "learning_rate": 0.00018564704935838128, "loss": 1.5382, "step": 3100 }, { "epoch": 0.24, "learning_rate": 0.00018560682247878034, "loss": 4.3197, "step": 3105 }, { "epoch": 0.24, "learning_rate": 0.00018556659559917937, "loss": 5.0633, "step": 3110 }, { "epoch": 0.24, "learning_rate": 0.00018552636871957843, "loss": 4.0307, "step": 3115 }, { "epoch": 0.24, "learning_rate": 0.00018548614183997749, "loss": 3.3434, "step": 3120 }, { "epoch": 0.24, "learning_rate": 0.00018544591496037654, "loss": 4.3059, "step": 3125 }, { "epoch": 0.24, "learning_rate": 0.00018540568808077558, "loss": 4.701, "step": 3130 }, { "epoch": 0.24, "learning_rate": 0.00018536546120117463, "loss": 2.9802, "step": 3135 }, { "epoch": 0.24, "learning_rate": 0.0001853252343215737, "loss": 1.449, "step": 3140 }, { "epoch": 0.24, "learning_rate": 0.00018528500744197272, "loss": 3.2532, "step": 3145 }, { "epoch": 0.24, "learning_rate": 0.00018524478056237178, "loss": 0.6617, "step": 3150 }, { "epoch": 0.24, "learning_rate": 0.00018520455368277084, "loss": 4.2574, "step": 3155 }, { "epoch": 0.24, "learning_rate": 0.0001851643268031699, "loss": 4.36, "step": 3160 }, { "epoch": 0.24, "learning_rate": 0.00018512409992356893, "loss": 4.2436, "step": 3165 }, { "epoch": 0.24, "learning_rate": 0.000185083873043968, "loss": 4.8123, "step": 3170 }, { "epoch": 0.24, "learning_rate": 0.00018504364616436705, "loss": 5.1578, "step": 3175 }, { "epoch": 0.24, "learning_rate": 0.0001850034192847661, "loss": 4.4125, "step": 3180 }, { "epoch": 0.24, "learning_rate": 0.00018496319240516513, "loss": 2.5215, "step": 3185 }, { "epoch": 0.24, "learning_rate": 0.0001849229655255642, "loss": 3.3153, "step": 3190 }, { "epoch": 0.24, "learning_rate": 0.00018488273864596325, "loss": 3.2267, "step": 3195 }, { "epoch": 0.24, "learning_rate": 0.00018484251176636228, "loss": 2.41, "step": 3200 }, { "epoch": 0.24, "learning_rate": 0.00018480228488676134, "loss": 5.0113, "step": 3205 }, { "epoch": 0.25, "learning_rate": 0.0001847620580071604, "loss": 4.7652, "step": 3210 }, { "epoch": 0.25, "learning_rate": 0.00018472183112755946, "loss": 4.9766, "step": 3215 }, { "epoch": 0.25, "learning_rate": 0.0001846816042479585, "loss": 4.8104, "step": 3220 }, { "epoch": 0.25, "learning_rate": 0.00018464137736835755, "loss": 4.0664, "step": 3225 }, { "epoch": 0.25, "learning_rate": 0.00018460115048875658, "loss": 3.1479, "step": 3230 }, { "epoch": 0.25, "learning_rate": 0.00018456092360915564, "loss": 3.6193, "step": 3235 }, { "epoch": 0.25, "learning_rate": 0.0001845206967295547, "loss": 2.3463, "step": 3240 }, { "epoch": 0.25, "learning_rate": 0.00018448046984995375, "loss": 2.5716, "step": 3245 }, { "epoch": 0.25, "learning_rate": 0.0001844402429703528, "loss": 1.1213, "step": 3250 }, { "epoch": 0.25, "learning_rate": 0.00018440001609075187, "loss": 5.2396, "step": 3255 }, { "epoch": 0.25, "learning_rate": 0.0001843597892111509, "loss": 4.9785, "step": 3260 }, { "epoch": 0.25, "learning_rate": 0.00018431956233154993, "loss": 4.757, "step": 3265 }, { "epoch": 0.25, "learning_rate": 0.000184279335451949, "loss": 4.6502, "step": 3270 }, { "epoch": 0.25, "learning_rate": 0.00018423910857234805, "loss": 4.1502, "step": 3275 }, { "epoch": 0.25, "learning_rate": 0.0001841988816927471, "loss": 3.6057, "step": 3280 }, { "epoch": 0.25, "learning_rate": 0.00018415865481314616, "loss": 4.0316, "step": 3285 }, { "epoch": 0.25, "learning_rate": 0.00018411842793354522, "loss": 3.9748, "step": 3290 }, { "epoch": 0.25, "learning_rate": 0.00018407820105394425, "loss": 2.0564, "step": 3295 }, { "epoch": 0.25, "learning_rate": 0.00018403797417434328, "loss": 1.9967, "step": 3300 }, { "epoch": 0.25, "learning_rate": 0.00018399774729474234, "loss": 5.2789, "step": 3305 }, { "epoch": 0.25, "learning_rate": 0.0001839575204151414, "loss": 4.1012, "step": 3310 }, { "epoch": 0.25, "learning_rate": 0.00018391729353554046, "loss": 4.8074, "step": 3315 }, { "epoch": 0.25, "learning_rate": 0.00018387706665593952, "loss": 2.8919, "step": 3320 }, { "epoch": 0.25, "learning_rate": 0.00018383683977633858, "loss": 3.7766, "step": 3325 }, { "epoch": 0.25, "learning_rate": 0.0001837966128967376, "loss": 5.0438, "step": 3330 }, { "epoch": 0.25, "learning_rate": 0.00018375638601713664, "loss": 2.7931, "step": 3335 }, { "epoch": 0.26, "learning_rate": 0.0001837161591375357, "loss": 2.0797, "step": 3340 }, { "epoch": 0.26, "learning_rate": 0.00018367593225793475, "loss": 2.7992, "step": 3345 }, { "epoch": 0.26, "learning_rate": 0.0001836357053783338, "loss": 0.7685, "step": 3350 }, { "epoch": 0.26, "learning_rate": 0.00018359547849873287, "loss": 4.7795, "step": 3355 }, { "epoch": 0.26, "learning_rate": 0.00018355525161913193, "loss": 4.2709, "step": 3360 }, { "epoch": 0.26, "learning_rate": 0.00018351502473953096, "loss": 5.034, "step": 3365 }, { "epoch": 0.26, "learning_rate": 0.00018347479785993, "loss": 4.5508, "step": 3370 }, { "epoch": 0.26, "learning_rate": 0.00018343457098032905, "loss": 3.427, "step": 3375 }, { "epoch": 0.26, "learning_rate": 0.0001833943441007281, "loss": 3.2379, "step": 3380 }, { "epoch": 0.26, "learning_rate": 0.00018335411722112717, "loss": 3.8982, "step": 3385 }, { "epoch": 0.26, "learning_rate": 0.00018331389034152622, "loss": 2.4782, "step": 3390 }, { "epoch": 0.26, "learning_rate": 0.00018327366346192528, "loss": 3.1309, "step": 3395 }, { "epoch": 0.26, "learning_rate": 0.00018323343658232431, "loss": 1.8897, "step": 3400 }, { "epoch": 0.26, "learning_rate": 0.00018319320970272337, "loss": 5.2643, "step": 3405 }, { "epoch": 0.26, "learning_rate": 0.0001831529828231224, "loss": 4.215, "step": 3410 }, { "epoch": 0.26, "learning_rate": 0.00018311275594352146, "loss": 4.032, "step": 3415 }, { "epoch": 0.26, "learning_rate": 0.00018307252906392052, "loss": 3.9859, "step": 3420 }, { "epoch": 0.26, "learning_rate": 0.00018303230218431958, "loss": 3.4547, "step": 3425 }, { "epoch": 0.26, "learning_rate": 0.00018299207530471864, "loss": 3.0241, "step": 3430 }, { "epoch": 0.26, "learning_rate": 0.00018295184842511767, "loss": 2.1052, "step": 3435 }, { "epoch": 0.26, "learning_rate": 0.00018291162154551673, "loss": 1.5382, "step": 3440 }, { "epoch": 0.26, "learning_rate": 0.00018287139466591576, "loss": 4.3531, "step": 3445 }, { "epoch": 0.26, "learning_rate": 0.00018283116778631482, "loss": 1.5439, "step": 3450 }, { "epoch": 0.26, "learning_rate": 0.00018279094090671387, "loss": 4.5682, "step": 3455 }, { "epoch": 0.26, "learning_rate": 0.00018275071402711293, "loss": 4.4668, "step": 3460 }, { "epoch": 0.26, "learning_rate": 0.000182710487147512, "loss": 5.041, "step": 3465 }, { "epoch": 0.27, "learning_rate": 0.00018267026026791102, "loss": 4.1639, "step": 3470 }, { "epoch": 0.27, "learning_rate": 0.00018263003338831008, "loss": 4.3855, "step": 3475 }, { "epoch": 0.27, "learning_rate": 0.00018258980650870914, "loss": 3.7016, "step": 3480 }, { "epoch": 0.27, "learning_rate": 0.00018254957962910817, "loss": 3.9585, "step": 3485 }, { "epoch": 0.27, "learning_rate": 0.00018250935274950723, "loss": 2.0294, "step": 3490 }, { "epoch": 0.27, "learning_rate": 0.00018246912586990629, "loss": 2.6052, "step": 3495 }, { "epoch": 0.27, "learning_rate": 0.00018242889899030534, "loss": 1.1796, "step": 3500 }, { "epoch": 0.27, "learning_rate": 0.00018238867211070437, "loss": 3.7271, "step": 3505 }, { "epoch": 0.27, "learning_rate": 0.00018234844523110343, "loss": 4.1547, "step": 3510 }, { "epoch": 0.27, "learning_rate": 0.0001823082183515025, "loss": 4.6098, "step": 3515 }, { "epoch": 0.27, "learning_rate": 0.00018226799147190152, "loss": 5.2176, "step": 3520 }, { "epoch": 0.27, "learning_rate": 0.00018222776459230058, "loss": 4.0721, "step": 3525 }, { "epoch": 0.27, "learning_rate": 0.00018218753771269964, "loss": 4.3234, "step": 3530 }, { "epoch": 0.27, "learning_rate": 0.00018214731083309867, "loss": 3.1592, "step": 3535 }, { "epoch": 0.27, "learning_rate": 0.00018210708395349773, "loss": 3.2439, "step": 3540 }, { "epoch": 0.27, "learning_rate": 0.00018206685707389679, "loss": 2.8346, "step": 3545 }, { "epoch": 0.27, "learning_rate": 0.00018202663019429584, "loss": 1.0432, "step": 3550 }, { "epoch": 0.27, "learning_rate": 0.0001819864033146949, "loss": 4.7693, "step": 3555 }, { "epoch": 0.27, "learning_rate": 0.00018194617643509393, "loss": 3.7051, "step": 3560 }, { "epoch": 0.27, "learning_rate": 0.000181905949555493, "loss": 4.0299, "step": 3565 }, { "epoch": 0.27, "learning_rate": 0.00018186572267589202, "loss": 4.5971, "step": 3570 }, { "epoch": 0.27, "learning_rate": 0.00018182549579629108, "loss": 3.5709, "step": 3575 }, { "epoch": 0.27, "learning_rate": 0.00018178526891669014, "loss": 3.5243, "step": 3580 }, { "epoch": 0.27, "learning_rate": 0.0001817450420370892, "loss": 2.8105, "step": 3585 }, { "epoch": 0.27, "learning_rate": 0.00018170481515748826, "loss": 1.6252, "step": 3590 }, { "epoch": 0.27, "learning_rate": 0.0001816645882778873, "loss": 1.5084, "step": 3595 }, { "epoch": 0.28, "learning_rate": 0.00018162436139828635, "loss": 1.4158, "step": 3600 }, { "epoch": 0.28, "learning_rate": 0.00018158413451868538, "loss": 4.3527, "step": 3605 }, { "epoch": 0.28, "learning_rate": 0.00018154390763908444, "loss": 6.2471, "step": 3610 }, { "epoch": 0.28, "learning_rate": 0.0001815036807594835, "loss": 3.4996, "step": 3615 }, { "epoch": 0.28, "learning_rate": 0.00018146345387988255, "loss": 3.4791, "step": 3620 }, { "epoch": 0.28, "learning_rate": 0.0001814232270002816, "loss": 3.9434, "step": 3625 }, { "epoch": 0.28, "learning_rate": 0.00018138300012068067, "loss": 3.8335, "step": 3630 }, { "epoch": 0.28, "learning_rate": 0.0001813427732410797, "loss": 2.7479, "step": 3635 }, { "epoch": 0.28, "learning_rate": 0.00018130254636147873, "loss": 3.481, "step": 3640 }, { "epoch": 0.28, "learning_rate": 0.0001812623194818778, "loss": 2.2178, "step": 3645 }, { "epoch": 0.28, "learning_rate": 0.00018122209260227685, "loss": 0.5455, "step": 3650 }, { "epoch": 0.28, "learning_rate": 0.0001811818657226759, "loss": 4.8406, "step": 3655 }, { "epoch": 0.28, "learning_rate": 0.00018114163884307496, "loss": 4.449, "step": 3660 }, { "epoch": 0.28, "learning_rate": 0.00018110141196347402, "loss": 5.3301, "step": 3665 }, { "epoch": 0.28, "learning_rate": 0.00018106118508387305, "loss": 5.1273, "step": 3670 }, { "epoch": 0.28, "learning_rate": 0.00018102095820427208, "loss": 3.4895, "step": 3675 }, { "epoch": 0.28, "learning_rate": 0.00018098073132467114, "loss": 2.6304, "step": 3680 }, { "epoch": 0.28, "learning_rate": 0.0001809405044450702, "loss": 2.2214, "step": 3685 }, { "epoch": 0.28, "learning_rate": 0.00018090027756546926, "loss": 2.3575, "step": 3690 }, { "epoch": 0.28, "learning_rate": 0.00018086005068586832, "loss": 1.2417, "step": 3695 }, { "epoch": 0.28, "learning_rate": 0.00018081982380626738, "loss": 2.804, "step": 3700 }, { "epoch": 0.28, "learning_rate": 0.0001807795969266664, "loss": 4.5449, "step": 3705 }, { "epoch": 0.28, "learning_rate": 0.00018073937004706544, "loss": 4.7197, "step": 3710 }, { "epoch": 0.28, "learning_rate": 0.0001806991431674645, "loss": 5.1338, "step": 3715 }, { "epoch": 0.28, "learning_rate": 0.00018065891628786355, "loss": 4.2385, "step": 3720 }, { "epoch": 0.28, "learning_rate": 0.0001806186894082626, "loss": 3.7326, "step": 3725 }, { "epoch": 0.29, "learning_rate": 0.00018057846252866167, "loss": 3.5176, "step": 3730 }, { "epoch": 0.29, "learning_rate": 0.00018053823564906073, "loss": 2.6052, "step": 3735 }, { "epoch": 0.29, "learning_rate": 0.00018049800876945976, "loss": 2.286, "step": 3740 }, { "epoch": 0.29, "learning_rate": 0.0001804577818898588, "loss": 3.0982, "step": 3745 }, { "epoch": 0.29, "learning_rate": 0.00018041755501025785, "loss": 2.8813, "step": 3750 }, { "epoch": 0.29, "learning_rate": 0.0001803773281306569, "loss": 4.1352, "step": 3755 }, { "epoch": 0.29, "learning_rate": 0.00018033710125105597, "loss": 4.5643, "step": 3760 }, { "epoch": 0.29, "learning_rate": 0.00018029687437145502, "loss": 4.6842, "step": 3765 }, { "epoch": 0.29, "learning_rate": 0.00018025664749185408, "loss": 3.4552, "step": 3770 }, { "epoch": 0.29, "learning_rate": 0.0001802164206122531, "loss": 3.9146, "step": 3775 }, { "epoch": 0.29, "learning_rate": 0.00018017619373265217, "loss": 3.2099, "step": 3780 }, { "epoch": 0.29, "learning_rate": 0.0001801359668530512, "loss": 4.366, "step": 3785 }, { "epoch": 0.29, "learning_rate": 0.00018009573997345026, "loss": 2.5956, "step": 3790 }, { "epoch": 0.29, "learning_rate": 0.00018005551309384932, "loss": 1.6513, "step": 3795 }, { "epoch": 0.29, "learning_rate": 0.00018001528621424838, "loss": 2.3943, "step": 3800 }, { "epoch": 0.29, "learning_rate": 0.00017997505933464744, "loss": 4.4051, "step": 3805 }, { "epoch": 0.29, "learning_rate": 0.00017993483245504647, "loss": 4.1432, "step": 3810 }, { "epoch": 0.29, "learning_rate": 0.00017989460557544552, "loss": 4.6814, "step": 3815 }, { "epoch": 0.29, "learning_rate": 0.00017985437869584456, "loss": 3.7793, "step": 3820 }, { "epoch": 0.29, "learning_rate": 0.00017981415181624361, "loss": 3.6339, "step": 3825 }, { "epoch": 0.29, "learning_rate": 0.00017977392493664267, "loss": 3.716, "step": 3830 }, { "epoch": 0.29, "learning_rate": 0.00017973369805704173, "loss": 3.7518, "step": 3835 }, { "epoch": 0.29, "learning_rate": 0.00017969347117744076, "loss": 3.3349, "step": 3840 }, { "epoch": 0.29, "learning_rate": 0.00017965324429783982, "loss": 1.3231, "step": 3845 }, { "epoch": 0.29, "learning_rate": 0.00017961301741823888, "loss": 2.5204, "step": 3850 }, { "epoch": 0.29, "learning_rate": 0.00017957279053863794, "loss": 5.1436, "step": 3855 }, { "epoch": 0.3, "learning_rate": 0.00017953256365903697, "loss": 5.2109, "step": 3860 }, { "epoch": 0.3, "learning_rate": 0.00017949233677943603, "loss": 3.9754, "step": 3865 }, { "epoch": 0.3, "learning_rate": 0.00017945210989983508, "loss": 3.8529, "step": 3870 }, { "epoch": 0.3, "learning_rate": 0.00017941188302023412, "loss": 4.5021, "step": 3875 }, { "epoch": 0.3, "learning_rate": 0.00017937165614063317, "loss": 3.8877, "step": 3880 }, { "epoch": 0.3, "learning_rate": 0.00017933142926103223, "loss": 1.8866, "step": 3885 }, { "epoch": 0.3, "learning_rate": 0.0001792912023814313, "loss": 4.2512, "step": 3890 }, { "epoch": 0.3, "learning_rate": 0.00017925097550183032, "loss": 2.6688, "step": 3895 }, { "epoch": 0.3, "learning_rate": 0.00017921074862222938, "loss": 3.2995, "step": 3900 }, { "epoch": 0.3, "learning_rate": 0.00017917052174262844, "loss": 4.2304, "step": 3905 }, { "epoch": 0.3, "learning_rate": 0.00017913029486302747, "loss": 4.9676, "step": 3910 }, { "epoch": 0.3, "learning_rate": 0.00017909006798342653, "loss": 4.2441, "step": 3915 }, { "epoch": 0.3, "learning_rate": 0.00017904984110382559, "loss": 3.8947, "step": 3920 }, { "epoch": 0.3, "learning_rate": 0.00017900961422422464, "loss": 4.1439, "step": 3925 }, { "epoch": 0.3, "learning_rate": 0.0001789693873446237, "loss": 3.5044, "step": 3930 }, { "epoch": 0.3, "learning_rate": 0.00017892916046502273, "loss": 2.705, "step": 3935 }, { "epoch": 0.3, "learning_rate": 0.0001788889335854218, "loss": 0.9807, "step": 3940 }, { "epoch": 0.3, "learning_rate": 0.00017884870670582082, "loss": 2.8294, "step": 3945 }, { "epoch": 0.3, "learning_rate": 0.00017880847982621988, "loss": 0.8584, "step": 3950 }, { "epoch": 0.3, "learning_rate": 0.00017876825294661894, "loss": 4.865, "step": 3955 }, { "epoch": 0.3, "learning_rate": 0.000178728026067018, "loss": 4.4303, "step": 3960 }, { "epoch": 0.3, "learning_rate": 0.00017868779918741706, "loss": 4.9551, "step": 3965 }, { "epoch": 0.3, "learning_rate": 0.00017864757230781611, "loss": 4.6406, "step": 3970 }, { "epoch": 0.3, "learning_rate": 0.00017860734542821514, "loss": 3.2033, "step": 3975 }, { "epoch": 0.3, "learning_rate": 0.00017856711854861418, "loss": 3.6012, "step": 3980 }, { "epoch": 0.3, "learning_rate": 0.00017852689166901323, "loss": 1.4554, "step": 3985 }, { "epoch": 0.3, "learning_rate": 0.0001784866647894123, "loss": 2.4178, "step": 3990 }, { "epoch": 0.31, "learning_rate": 0.00017844643790981135, "loss": 1.7368, "step": 3995 }, { "epoch": 0.31, "learning_rate": 0.0001784062110302104, "loss": 1.9992, "step": 4000 }, { "epoch": 0.31, "learning_rate": 0.00017836598415060947, "loss": 3.7744, "step": 4005 }, { "epoch": 0.31, "learning_rate": 0.0001783257572710085, "loss": 4.1831, "step": 4010 }, { "epoch": 0.31, "learning_rate": 0.00017828553039140753, "loss": 3.9318, "step": 4015 }, { "epoch": 0.31, "learning_rate": 0.0001782453035118066, "loss": 3.6878, "step": 4020 }, { "epoch": 0.31, "learning_rate": 0.00017820507663220565, "loss": 4.0703, "step": 4025 }, { "epoch": 0.31, "learning_rate": 0.0001781648497526047, "loss": 3.9203, "step": 4030 }, { "epoch": 0.31, "learning_rate": 0.00017812462287300376, "loss": 2.88, "step": 4035 }, { "epoch": 0.31, "learning_rate": 0.00017808439599340282, "loss": 2.49, "step": 4040 }, { "epoch": 0.31, "learning_rate": 0.00017804416911380185, "loss": 1.8029, "step": 4045 }, { "epoch": 0.31, "learning_rate": 0.00017800394223420088, "loss": 1.4607, "step": 4050 }, { "epoch": 0.31, "learning_rate": 0.00017796371535459994, "loss": 4.866, "step": 4055 }, { "epoch": 0.31, "learning_rate": 0.000177923488474999, "loss": 4.9695, "step": 4060 }, { "epoch": 0.31, "learning_rate": 0.00017788326159539806, "loss": 4.073, "step": 4065 }, { "epoch": 0.31, "learning_rate": 0.00017784303471579712, "loss": 4.3596, "step": 4070 }, { "epoch": 0.31, "learning_rate": 0.00017780280783619617, "loss": 3.5149, "step": 4075 }, { "epoch": 0.31, "learning_rate": 0.0001777625809565952, "loss": 3.9059, "step": 4080 }, { "epoch": 0.31, "learning_rate": 0.00017772235407699424, "loss": 2.6699, "step": 4085 }, { "epoch": 0.31, "learning_rate": 0.0001776821271973933, "loss": 3.2956, "step": 4090 }, { "epoch": 0.31, "learning_rate": 0.00017764190031779235, "loss": 2.8765, "step": 4095 }, { "epoch": 0.31, "learning_rate": 0.0001776016734381914, "loss": 2.6877, "step": 4100 }, { "epoch": 0.31, "learning_rate": 0.00017756144655859047, "loss": 4.8078, "step": 4105 }, { "epoch": 0.31, "learning_rate": 0.00017752121967898953, "loss": 4.3077, "step": 4110 }, { "epoch": 0.31, "learning_rate": 0.00017748099279938856, "loss": 3.7305, "step": 4115 }, { "epoch": 0.31, "learning_rate": 0.00017744076591978762, "loss": 3.5543, "step": 4120 }, { "epoch": 0.32, "learning_rate": 0.00017740053904018665, "loss": 4.3678, "step": 4125 }, { "epoch": 0.32, "learning_rate": 0.0001773603121605857, "loss": 3.1074, "step": 4130 }, { "epoch": 0.32, "learning_rate": 0.00017732008528098476, "loss": 3.485, "step": 4135 }, { "epoch": 0.32, "learning_rate": 0.00017727985840138382, "loss": 2.4198, "step": 4140 }, { "epoch": 0.32, "learning_rate": 0.00017723963152178285, "loss": 2.7068, "step": 4145 }, { "epoch": 0.32, "learning_rate": 0.0001771994046421819, "loss": 2.6594, "step": 4150 }, { "epoch": 0.32, "learning_rate": 0.00017715917776258097, "loss": 3.9896, "step": 4155 }, { "epoch": 0.32, "learning_rate": 0.00017711895088298, "loss": 4.7062, "step": 4160 }, { "epoch": 0.32, "learning_rate": 0.00017707872400337906, "loss": 5.0463, "step": 4165 }, { "epoch": 0.32, "learning_rate": 0.00017703849712377812, "loss": 3.9547, "step": 4170 }, { "epoch": 0.32, "learning_rate": 0.00017699827024417718, "loss": 4.6994, "step": 4175 }, { "epoch": 0.32, "learning_rate": 0.0001769580433645762, "loss": 4.1545, "step": 4180 }, { "epoch": 0.32, "learning_rate": 0.00017691781648497527, "loss": 3.0788, "step": 4185 }, { "epoch": 0.32, "learning_rate": 0.00017687758960537432, "loss": 2.744, "step": 4190 }, { "epoch": 0.32, "learning_rate": 0.00017683736272577338, "loss": 2.6146, "step": 4195 }, { "epoch": 0.32, "learning_rate": 0.00017680518122209263, "loss": 3.9939, "step": 4200 }, { "epoch": 0.32, "learning_rate": 0.00017676495434249166, "loss": 4.5344, "step": 4205 }, { "epoch": 0.32, "learning_rate": 0.0001767247274628907, "loss": 4.7307, "step": 4210 }, { "epoch": 0.32, "learning_rate": 0.00017668450058328975, "loss": 4.5513, "step": 4215 }, { "epoch": 0.32, "learning_rate": 0.0001766442737036888, "loss": 3.2362, "step": 4220 }, { "epoch": 0.32, "learning_rate": 0.00017660404682408787, "loss": 3.5146, "step": 4225 }, { "epoch": 0.32, "learning_rate": 0.00017656381994448692, "loss": 2.4402, "step": 4230 }, { "epoch": 0.32, "learning_rate": 0.00017652359306488598, "loss": 2.7357, "step": 4235 }, { "epoch": 0.32, "learning_rate": 0.000176483366185285, "loss": 2.2243, "step": 4240 }, { "epoch": 0.32, "learning_rate": 0.00017644313930568404, "loss": 2.6992, "step": 4245 }, { "epoch": 0.32, "learning_rate": 0.0001764029124260831, "loss": 2.3734, "step": 4250 }, { "epoch": 0.33, "learning_rate": 0.00017636268554648216, "loss": 4.1707, "step": 4255 }, { "epoch": 0.33, "learning_rate": 0.00017632245866688122, "loss": 5.4875, "step": 4260 }, { "epoch": 0.33, "learning_rate": 0.00017628223178728028, "loss": 4.6299, "step": 4265 }, { "epoch": 0.33, "learning_rate": 0.00017624200490767934, "loss": 3.0701, "step": 4270 }, { "epoch": 0.33, "learning_rate": 0.00017620177802807837, "loss": 3.775, "step": 4275 }, { "epoch": 0.33, "learning_rate": 0.0001761615511484774, "loss": 2.9104, "step": 4280 }, { "epoch": 0.33, "learning_rate": 0.00017612132426887646, "loss": 2.7511, "step": 4285 }, { "epoch": 0.33, "learning_rate": 0.00017608109738927551, "loss": 2.603, "step": 4290 }, { "epoch": 0.33, "learning_rate": 0.00017604087050967457, "loss": 2.2994, "step": 4295 }, { "epoch": 0.33, "learning_rate": 0.00017600064363007363, "loss": 2.7078, "step": 4300 }, { "epoch": 0.33, "learning_rate": 0.0001759604167504727, "loss": 4.4588, "step": 4305 }, { "epoch": 0.33, "learning_rate": 0.00017592018987087172, "loss": 4.8812, "step": 4310 }, { "epoch": 0.33, "learning_rate": 0.00017587996299127078, "loss": 4.8076, "step": 4315 }, { "epoch": 0.33, "learning_rate": 0.0001758397361116698, "loss": 3.1662, "step": 4320 }, { "epoch": 0.33, "learning_rate": 0.00017579950923206887, "loss": 4.4642, "step": 4325 }, { "epoch": 0.33, "learning_rate": 0.00017575928235246793, "loss": 2.0759, "step": 4330 }, { "epoch": 0.33, "learning_rate": 0.00017571905547286698, "loss": 2.2219, "step": 4335 }, { "epoch": 0.33, "learning_rate": 0.00017567882859326604, "loss": 1.4699, "step": 4340 }, { "epoch": 0.33, "learning_rate": 0.00017563860171366507, "loss": 1.0181, "step": 4345 }, { "epoch": 0.33, "learning_rate": 0.00017559837483406413, "loss": 1.4557, "step": 4350 }, { "epoch": 0.33, "learning_rate": 0.00017555814795446316, "loss": 5.0143, "step": 4355 }, { "epoch": 0.33, "learning_rate": 0.00017551792107486222, "loss": 4.6441, "step": 4360 }, { "epoch": 0.33, "learning_rate": 0.00017547769419526128, "loss": 4.8084, "step": 4365 }, { "epoch": 0.33, "learning_rate": 0.00017543746731566034, "loss": 5.1527, "step": 4370 }, { "epoch": 0.33, "learning_rate": 0.0001753972404360594, "loss": 4.5236, "step": 4375 }, { "epoch": 0.33, "learning_rate": 0.00017535701355645843, "loss": 4.1115, "step": 4380 }, { "epoch": 0.34, "learning_rate": 0.00017531678667685749, "loss": 3.2589, "step": 4385 }, { "epoch": 0.34, "learning_rate": 0.00017527655979725654, "loss": 3.387, "step": 4390 }, { "epoch": 0.34, "learning_rate": 0.00017523633291765558, "loss": 1.2526, "step": 4395 }, { "epoch": 0.34, "learning_rate": 0.00017519610603805463, "loss": 0.799, "step": 4400 }, { "epoch": 0.34, "learning_rate": 0.0001751558791584537, "loss": 5.3492, "step": 4405 }, { "epoch": 0.34, "learning_rate": 0.00017511565227885275, "loss": 4.1848, "step": 4410 }, { "epoch": 0.34, "learning_rate": 0.00017507542539925178, "loss": 5.2502, "step": 4415 }, { "epoch": 0.34, "learning_rate": 0.00017503519851965084, "loss": 4.8549, "step": 4420 }, { "epoch": 0.34, "learning_rate": 0.0001749949716400499, "loss": 4.3082, "step": 4425 }, { "epoch": 0.34, "learning_rate": 0.00017495474476044893, "loss": 3.2034, "step": 4430 }, { "epoch": 0.34, "learning_rate": 0.000174914517880848, "loss": 3.5008, "step": 4435 }, { "epoch": 0.34, "learning_rate": 0.00017487429100124705, "loss": 2.2585, "step": 4440 }, { "epoch": 0.34, "learning_rate": 0.0001748340641216461, "loss": 1.9539, "step": 4445 }, { "epoch": 0.34, "learning_rate": 0.00017479383724204513, "loss": 1.5011, "step": 4450 }, { "epoch": 0.34, "learning_rate": 0.0001747536103624442, "loss": 4.0939, "step": 4455 }, { "epoch": 0.34, "learning_rate": 0.00017471338348284325, "loss": 4.458, "step": 4460 }, { "epoch": 0.34, "learning_rate": 0.0001746731566032423, "loss": 6.1611, "step": 4465 }, { "epoch": 0.34, "learning_rate": 0.00017463292972364134, "loss": 4.3582, "step": 4470 }, { "epoch": 0.34, "learning_rate": 0.0001745927028440404, "loss": 3.2851, "step": 4475 }, { "epoch": 0.34, "learning_rate": 0.00017455247596443946, "loss": 2.6692, "step": 4480 }, { "epoch": 0.34, "learning_rate": 0.0001745122490848385, "loss": 3.419, "step": 4485 }, { "epoch": 0.34, "learning_rate": 0.00017447202220523755, "loss": 3.178, "step": 4490 }, { "epoch": 0.34, "learning_rate": 0.0001744317953256366, "loss": 2.9248, "step": 4495 }, { "epoch": 0.34, "learning_rate": 0.00017439156844603566, "loss": 2.8947, "step": 4500 }, { "epoch": 0.34, "learning_rate": 0.0001743513415664347, "loss": 5.7676, "step": 4505 }, { "epoch": 0.34, "learning_rate": 0.00017431111468683375, "loss": 4.4447, "step": 4510 }, { "epoch": 0.35, "learning_rate": 0.00017427088780723278, "loss": 4.7262, "step": 4515 }, { "epoch": 0.35, "learning_rate": 0.00017423066092763184, "loss": 3.8962, "step": 4520 }, { "epoch": 0.35, "learning_rate": 0.0001741904340480309, "loss": 3.9734, "step": 4525 }, { "epoch": 0.35, "learning_rate": 0.00017415020716842996, "loss": 3.832, "step": 4530 }, { "epoch": 0.35, "learning_rate": 0.00017410998028882902, "loss": 2.866, "step": 4535 }, { "epoch": 0.35, "learning_rate": 0.00017406975340922807, "loss": 2.5358, "step": 4540 }, { "epoch": 0.35, "learning_rate": 0.0001740295265296271, "loss": 1.7431, "step": 4545 }, { "epoch": 0.35, "learning_rate": 0.00017398929965002614, "loss": 2.6781, "step": 4550 }, { "epoch": 0.35, "learning_rate": 0.0001739490727704252, "loss": 4.4768, "step": 4555 }, { "epoch": 0.35, "learning_rate": 0.00017390884589082425, "loss": 3.8754, "step": 4560 }, { "epoch": 0.35, "learning_rate": 0.0001738686190112233, "loss": 3.7852, "step": 4565 }, { "epoch": 0.35, "learning_rate": 0.00017382839213162237, "loss": 4.2292, "step": 4570 }, { "epoch": 0.35, "learning_rate": 0.00017378816525202143, "loss": 3.6535, "step": 4575 }, { "epoch": 0.35, "learning_rate": 0.00017374793837242046, "loss": 4.4597, "step": 4580 }, { "epoch": 0.35, "learning_rate": 0.0001737077114928195, "loss": 2.1313, "step": 4585 }, { "epoch": 0.35, "learning_rate": 0.00017366748461321855, "loss": 2.3775, "step": 4590 }, { "epoch": 0.35, "learning_rate": 0.0001736272577336176, "loss": 1.5729, "step": 4595 }, { "epoch": 0.35, "learning_rate": 0.00017358703085401666, "loss": 1.3116, "step": 4600 }, { "epoch": 0.35, "learning_rate": 0.00017354680397441572, "loss": 5.5291, "step": 4605 }, { "epoch": 0.35, "learning_rate": 0.00017350657709481478, "loss": 4.3365, "step": 4610 }, { "epoch": 0.35, "learning_rate": 0.0001734663502152138, "loss": 3.6343, "step": 4615 }, { "epoch": 0.35, "learning_rate": 0.00017342612333561284, "loss": 4.5385, "step": 4620 }, { "epoch": 0.35, "learning_rate": 0.0001733858964560119, "loss": 3.4747, "step": 4625 }, { "epoch": 0.35, "learning_rate": 0.00017334566957641096, "loss": 2.505, "step": 4630 }, { "epoch": 0.35, "learning_rate": 0.00017330544269681002, "loss": 2.2642, "step": 4635 }, { "epoch": 0.35, "learning_rate": 0.00017326521581720908, "loss": 1.7137, "step": 4640 }, { "epoch": 0.36, "learning_rate": 0.00017322498893760813, "loss": 2.2715, "step": 4645 }, { "epoch": 0.36, "learning_rate": 0.00017318476205800717, "loss": 2.6712, "step": 4650 }, { "epoch": 0.36, "learning_rate": 0.0001731445351784062, "loss": 4.7334, "step": 4655 }, { "epoch": 0.36, "learning_rate": 0.00017310430829880526, "loss": 3.4988, "step": 4660 }, { "epoch": 0.36, "learning_rate": 0.00017306408141920431, "loss": 3.4859, "step": 4665 }, { "epoch": 0.36, "learning_rate": 0.00017302385453960337, "loss": 2.9528, "step": 4670 }, { "epoch": 0.36, "learning_rate": 0.00017298362766000243, "loss": 4.2112, "step": 4675 }, { "epoch": 0.36, "learning_rate": 0.0001729434007804015, "loss": 4.345, "step": 4680 }, { "epoch": 0.36, "learning_rate": 0.00017290317390080052, "loss": 2.7534, "step": 4685 }, { "epoch": 0.36, "learning_rate": 0.00017286294702119958, "loss": 1.9547, "step": 4690 }, { "epoch": 0.36, "learning_rate": 0.0001728227201415986, "loss": 2.9516, "step": 4695 }, { "epoch": 0.36, "learning_rate": 0.00017278249326199767, "loss": 0.7848, "step": 4700 }, { "epoch": 0.36, "learning_rate": 0.00017274226638239673, "loss": 5.2662, "step": 4705 }, { "epoch": 0.36, "learning_rate": 0.00017270203950279578, "loss": 5.1359, "step": 4710 }, { "epoch": 0.36, "learning_rate": 0.00017266181262319484, "loss": 4.5707, "step": 4715 }, { "epoch": 0.36, "learning_rate": 0.00017262158574359387, "loss": 3.8627, "step": 4720 }, { "epoch": 0.36, "learning_rate": 0.00017258135886399293, "loss": 5.6158, "step": 4725 }, { "epoch": 0.36, "learning_rate": 0.00017254113198439196, "loss": 4.3295, "step": 4730 }, { "epoch": 0.36, "learning_rate": 0.00017250090510479102, "loss": 2.5796, "step": 4735 }, { "epoch": 0.36, "learning_rate": 0.00017246067822519008, "loss": 2.2212, "step": 4740 }, { "epoch": 0.36, "learning_rate": 0.00017242045134558914, "loss": 1.7833, "step": 4745 }, { "epoch": 0.36, "learning_rate": 0.0001723802244659882, "loss": 1.0688, "step": 4750 }, { "epoch": 0.36, "learning_rate": 0.00017233999758638723, "loss": 5.7943, "step": 4755 }, { "epoch": 0.36, "learning_rate": 0.00017229977070678628, "loss": 4.3023, "step": 4760 }, { "epoch": 0.36, "learning_rate": 0.00017225954382718534, "loss": 4.1447, "step": 4765 }, { "epoch": 0.36, "learning_rate": 0.00017221931694758437, "loss": 4.5957, "step": 4770 }, { "epoch": 0.36, "learning_rate": 0.00017217909006798343, "loss": 4.5724, "step": 4775 }, { "epoch": 0.37, "learning_rate": 0.0001721388631883825, "loss": 3.3936, "step": 4780 }, { "epoch": 0.37, "learning_rate": 0.00017209863630878155, "loss": 1.8421, "step": 4785 }, { "epoch": 0.37, "learning_rate": 0.00017205840942918058, "loss": 2.0833, "step": 4790 }, { "epoch": 0.37, "learning_rate": 0.00017201818254957964, "loss": 2.1733, "step": 4795 }, { "epoch": 0.37, "learning_rate": 0.0001719779556699787, "loss": 1.4903, "step": 4800 }, { "epoch": 0.37, "learning_rate": 0.00017193772879037773, "loss": 5.8072, "step": 4805 }, { "epoch": 0.37, "learning_rate": 0.00017189750191077679, "loss": 4.2612, "step": 4810 }, { "epoch": 0.37, "learning_rate": 0.00017185727503117584, "loss": 3.9973, "step": 4815 }, { "epoch": 0.37, "learning_rate": 0.00017181704815157488, "loss": 4.2791, "step": 4820 }, { "epoch": 0.37, "learning_rate": 0.00017177682127197393, "loss": 4.2564, "step": 4825 }, { "epoch": 0.37, "learning_rate": 0.000171736594392373, "loss": 4.493, "step": 4830 }, { "epoch": 0.37, "learning_rate": 0.00017169636751277205, "loss": 3.7284, "step": 4835 }, { "epoch": 0.37, "learning_rate": 0.0001716561406331711, "loss": 2.7577, "step": 4840 }, { "epoch": 0.37, "learning_rate": 0.00017161591375357014, "loss": 1.5587, "step": 4845 }, { "epoch": 0.37, "learning_rate": 0.0001715756868739692, "loss": 1.974, "step": 4850 }, { "epoch": 0.37, "learning_rate": 0.00017153545999436823, "loss": 4.9592, "step": 4855 }, { "epoch": 0.37, "learning_rate": 0.0001714952331147673, "loss": 3.9775, "step": 4860 }, { "epoch": 0.37, "learning_rate": 0.00017145500623516635, "loss": 4.7291, "step": 4865 }, { "epoch": 0.37, "learning_rate": 0.0001714147793555654, "loss": 4.1361, "step": 4870 }, { "epoch": 0.37, "learning_rate": 0.00017137455247596446, "loss": 4.0131, "step": 4875 }, { "epoch": 0.37, "learning_rate": 0.0001713343255963635, "loss": 4.918, "step": 4880 }, { "epoch": 0.37, "learning_rate": 0.00017129409871676255, "loss": 3.039, "step": 4885 }, { "epoch": 0.37, "learning_rate": 0.00017125387183716158, "loss": 2.5513, "step": 4890 }, { "epoch": 0.37, "learning_rate": 0.00017121364495756064, "loss": 1.4615, "step": 4895 }, { "epoch": 0.37, "learning_rate": 0.0001711734180779597, "loss": 1.9534, "step": 4900 }, { "epoch": 0.37, "learning_rate": 0.00017113319119835876, "loss": 4.6439, "step": 4905 }, { "epoch": 0.38, "learning_rate": 0.00017109296431875782, "loss": 4.8441, "step": 4910 }, { "epoch": 0.38, "learning_rate": 0.00017105273743915687, "loss": 3.5526, "step": 4915 }, { "epoch": 0.38, "learning_rate": 0.0001710125105595559, "loss": 3.2357, "step": 4920 }, { "epoch": 0.38, "learning_rate": 0.00017097228367995494, "loss": 3.4221, "step": 4925 }, { "epoch": 0.38, "learning_rate": 0.000170932056800354, "loss": 3.1509, "step": 4930 }, { "epoch": 0.38, "learning_rate": 0.00017089182992075305, "loss": 1.6275, "step": 4935 }, { "epoch": 0.38, "learning_rate": 0.0001708516030411521, "loss": 2.1762, "step": 4940 }, { "epoch": 0.38, "learning_rate": 0.00017081137616155117, "loss": 1.9439, "step": 4945 }, { "epoch": 0.38, "learning_rate": 0.00017077114928195023, "loss": 0.8666, "step": 4950 }, { "epoch": 0.38, "learning_rate": 0.00017073092240234926, "loss": 5.1297, "step": 4955 }, { "epoch": 0.38, "learning_rate": 0.0001706906955227483, "loss": 4.4209, "step": 4960 }, { "epoch": 0.38, "learning_rate": 0.00017065046864314735, "loss": 4.9455, "step": 4965 }, { "epoch": 0.38, "learning_rate": 0.0001706102417635464, "loss": 4.8725, "step": 4970 }, { "epoch": 0.38, "learning_rate": 0.00017057001488394546, "loss": 3.9418, "step": 4975 }, { "epoch": 0.38, "learning_rate": 0.00017052978800434452, "loss": 4.1444, "step": 4980 }, { "epoch": 0.38, "learning_rate": 0.00017048956112474358, "loss": 3.5028, "step": 4985 }, { "epoch": 0.38, "learning_rate": 0.0001704493342451426, "loss": 2.5432, "step": 4990 }, { "epoch": 0.38, "learning_rate": 0.00017040910736554164, "loss": 1.4747, "step": 4995 }, { "epoch": 0.38, "learning_rate": 0.0001703688804859407, "loss": 0.9428, "step": 5000 }, { "epoch": 0.38, "learning_rate": 0.00017032865360633976, "loss": 5.8418, "step": 5005 }, { "epoch": 0.38, "learning_rate": 0.00017028842672673882, "loss": 3.3178, "step": 5010 }, { "epoch": 0.38, "learning_rate": 0.00017024819984713788, "loss": 4.2133, "step": 5015 }, { "epoch": 0.38, "learning_rate": 0.00017020797296753693, "loss": 3.9502, "step": 5020 }, { "epoch": 0.38, "learning_rate": 0.00017016774608793597, "loss": 3.731, "step": 5025 }, { "epoch": 0.38, "learning_rate": 0.000170127519208335, "loss": 4.6752, "step": 5030 }, { "epoch": 0.38, "learning_rate": 0.00017008729232873405, "loss": 2.7182, "step": 5035 }, { "epoch": 0.39, "learning_rate": 0.0001700470654491331, "loss": 2.9108, "step": 5040 }, { "epoch": 0.39, "learning_rate": 0.00017000683856953217, "loss": 3.8828, "step": 5045 }, { "epoch": 0.39, "learning_rate": 0.00016996661168993123, "loss": 1.53, "step": 5050 }, { "epoch": 0.39, "learning_rate": 0.0001699263848103303, "loss": 5.5035, "step": 5055 }, { "epoch": 0.39, "learning_rate": 0.00016988615793072932, "loss": 3.9553, "step": 5060 }, { "epoch": 0.39, "learning_rate": 0.00016984593105112838, "loss": 4.5656, "step": 5065 }, { "epoch": 0.39, "learning_rate": 0.0001698057041715274, "loss": 4.4553, "step": 5070 }, { "epoch": 0.39, "learning_rate": 0.00016976547729192647, "loss": 4.3305, "step": 5075 }, { "epoch": 0.39, "learning_rate": 0.00016972525041232552, "loss": 3.2616, "step": 5080 }, { "epoch": 0.39, "learning_rate": 0.00016968502353272458, "loss": 4.177, "step": 5085 }, { "epoch": 0.39, "learning_rate": 0.00016964479665312364, "loss": 3.3635, "step": 5090 }, { "epoch": 0.39, "learning_rate": 0.00016960456977352267, "loss": 0.7162, "step": 5095 }, { "epoch": 0.39, "learning_rate": 0.00016956434289392173, "loss": 3.3646, "step": 5100 }, { "epoch": 0.39, "learning_rate": 0.00016952411601432076, "loss": 3.9, "step": 5105 }, { "epoch": 0.39, "learning_rate": 0.00016948388913471982, "loss": 5.1976, "step": 5110 }, { "epoch": 0.39, "learning_rate": 0.00016944366225511888, "loss": 5.4234, "step": 5115 }, { "epoch": 0.39, "learning_rate": 0.00016940343537551794, "loss": 3.2449, "step": 5120 }, { "epoch": 0.39, "learning_rate": 0.00016936320849591697, "loss": 3.8414, "step": 5125 }, { "epoch": 0.39, "learning_rate": 0.00016932298161631603, "loss": 3.5964, "step": 5130 }, { "epoch": 0.39, "learning_rate": 0.00016928275473671508, "loss": 3.0642, "step": 5135 }, { "epoch": 0.39, "learning_rate": 0.00016924252785711414, "loss": 2.8784, "step": 5140 }, { "epoch": 0.39, "learning_rate": 0.00016920230097751317, "loss": 2.0424, "step": 5145 }, { "epoch": 0.39, "learning_rate": 0.00016916207409791223, "loss": 0.0858, "step": 5150 }, { "epoch": 0.39, "learning_rate": 0.0001691218472183113, "loss": 4.6318, "step": 5155 }, { "epoch": 0.39, "learning_rate": 0.00016908162033871032, "loss": 5.0166, "step": 5160 }, { "epoch": 0.39, "learning_rate": 0.00016904139345910938, "loss": 3.9994, "step": 5165 }, { "epoch": 0.4, "learning_rate": 0.00016900116657950844, "loss": 4.776, "step": 5170 }, { "epoch": 0.4, "learning_rate": 0.0001689609396999075, "loss": 4.778, "step": 5175 }, { "epoch": 0.4, "learning_rate": 0.00016892071282030653, "loss": 3.5281, "step": 5180 }, { "epoch": 0.4, "learning_rate": 0.00016888048594070558, "loss": 2.7328, "step": 5185 }, { "epoch": 0.4, "learning_rate": 0.00016884025906110464, "loss": 2.6921, "step": 5190 }, { "epoch": 0.4, "learning_rate": 0.00016880003218150367, "loss": 1.5729, "step": 5195 }, { "epoch": 0.4, "learning_rate": 0.00016875980530190273, "loss": 1.321, "step": 5200 }, { "epoch": 0.4, "learning_rate": 0.0001687195784223018, "loss": 3.92, "step": 5205 }, { "epoch": 0.4, "learning_rate": 0.00016867935154270085, "loss": 4.4383, "step": 5210 }, { "epoch": 0.4, "learning_rate": 0.0001686391246630999, "loss": 5.2021, "step": 5215 }, { "epoch": 0.4, "learning_rate": 0.00016859889778349894, "loss": 2.773, "step": 5220 }, { "epoch": 0.4, "learning_rate": 0.000168558670903898, "loss": 3.292, "step": 5225 }, { "epoch": 0.4, "learning_rate": 0.00016851844402429703, "loss": 4.218, "step": 5230 }, { "epoch": 0.4, "learning_rate": 0.00016847821714469609, "loss": 3.38, "step": 5235 }, { "epoch": 0.4, "learning_rate": 0.00016843799026509514, "loss": 2.0585, "step": 5240 }, { "epoch": 0.4, "learning_rate": 0.0001683977633854942, "loss": 3.6215, "step": 5245 }, { "epoch": 0.4, "learning_rate": 0.00016835753650589326, "loss": 0.1181, "step": 5250 }, { "epoch": 0.4, "learning_rate": 0.0001683173096262923, "loss": 4.9352, "step": 5255 }, { "epoch": 0.4, "learning_rate": 0.00016827708274669135, "loss": 4.5021, "step": 5260 }, { "epoch": 0.4, "learning_rate": 0.00016823685586709038, "loss": 4.435, "step": 5265 }, { "epoch": 0.4, "learning_rate": 0.00016819662898748944, "loss": 3.6467, "step": 5270 }, { "epoch": 0.4, "learning_rate": 0.0001681564021078885, "loss": 3.5488, "step": 5275 }, { "epoch": 0.4, "learning_rate": 0.00016811617522828756, "loss": 3.414, "step": 5280 }, { "epoch": 0.4, "learning_rate": 0.00016807594834868661, "loss": 3.1631, "step": 5285 }, { "epoch": 0.4, "learning_rate": 0.00016803572146908567, "loss": 2.0229, "step": 5290 }, { "epoch": 0.4, "learning_rate": 0.0001679954945894847, "loss": 1.2201, "step": 5295 }, { "epoch": 0.41, "learning_rate": 0.00016795526770988373, "loss": 3.7302, "step": 5300 }, { "epoch": 0.41, "learning_rate": 0.0001679150408302828, "loss": 4.5115, "step": 5305 }, { "epoch": 0.41, "learning_rate": 0.00016787481395068185, "loss": 4.2893, "step": 5310 }, { "epoch": 0.41, "learning_rate": 0.0001678345870710809, "loss": 3.5971, "step": 5315 }, { "epoch": 0.41, "learning_rate": 0.00016779436019147997, "loss": 4.424, "step": 5320 }, { "epoch": 0.41, "learning_rate": 0.00016775413331187903, "loss": 3.4488, "step": 5325 }, { "epoch": 0.41, "learning_rate": 0.00016771390643227806, "loss": 2.7201, "step": 5330 }, { "epoch": 0.41, "learning_rate": 0.0001676736795526771, "loss": 2.8584, "step": 5335 }, { "epoch": 0.41, "learning_rate": 0.00016763345267307615, "loss": 2.0608, "step": 5340 }, { "epoch": 0.41, "learning_rate": 0.0001675932257934752, "loss": 1.7905, "step": 5345 }, { "epoch": 0.41, "learning_rate": 0.00016755299891387426, "loss": 1.7893, "step": 5350 }, { "epoch": 0.41, "learning_rate": 0.00016751277203427332, "loss": 4.6043, "step": 5355 }, { "epoch": 0.41, "learning_rate": 0.00016747254515467238, "loss": 4.7084, "step": 5360 }, { "epoch": 0.41, "learning_rate": 0.0001674323182750714, "loss": 4.0279, "step": 5365 }, { "epoch": 0.41, "learning_rate": 0.00016739209139547044, "loss": 4.6014, "step": 5370 }, { "epoch": 0.41, "learning_rate": 0.0001673518645158695, "loss": 4.233, "step": 5375 }, { "epoch": 0.41, "learning_rate": 0.00016731163763626856, "loss": 4.4205, "step": 5380 }, { "epoch": 0.41, "learning_rate": 0.00016727141075666762, "loss": 3.7451, "step": 5385 }, { "epoch": 0.41, "learning_rate": 0.00016723118387706667, "loss": 1.9283, "step": 5390 }, { "epoch": 0.41, "learning_rate": 0.00016719095699746573, "loss": 1.5219, "step": 5395 }, { "epoch": 0.41, "learning_rate": 0.00016715073011786476, "loss": 1.0435, "step": 5400 }, { "epoch": 0.41, "learning_rate": 0.00016711050323826382, "loss": 3.7072, "step": 5405 }, { "epoch": 0.41, "learning_rate": 0.00016707027635866285, "loss": 4.7615, "step": 5410 }, { "epoch": 0.41, "learning_rate": 0.0001670300494790619, "loss": 3.8532, "step": 5415 }, { "epoch": 0.41, "learning_rate": 0.00016698982259946097, "loss": 4.3826, "step": 5420 }, { "epoch": 0.41, "learning_rate": 0.00016694959571986003, "loss": 3.766, "step": 5425 }, { "epoch": 0.42, "learning_rate": 0.00016690936884025906, "loss": 2.053, "step": 5430 }, { "epoch": 0.42, "learning_rate": 0.00016686914196065812, "loss": 3.3234, "step": 5435 }, { "epoch": 0.42, "learning_rate": 0.00016682891508105718, "loss": 1.802, "step": 5440 }, { "epoch": 0.42, "learning_rate": 0.0001667886882014562, "loss": 2.0026, "step": 5445 }, { "epoch": 0.42, "learning_rate": 0.00016674846132185527, "loss": 1.087, "step": 5450 }, { "epoch": 0.42, "learning_rate": 0.00016670823444225432, "loss": 4.4006, "step": 5455 }, { "epoch": 0.42, "learning_rate": 0.00016666800756265338, "loss": 4.633, "step": 5460 }, { "epoch": 0.42, "learning_rate": 0.0001666277806830524, "loss": 3.4943, "step": 5465 }, { "epoch": 0.42, "learning_rate": 0.00016658755380345147, "loss": 4.6764, "step": 5470 }, { "epoch": 0.42, "learning_rate": 0.00016654732692385053, "loss": 3.6434, "step": 5475 }, { "epoch": 0.42, "learning_rate": 0.0001665071000442496, "loss": 3.2263, "step": 5480 }, { "epoch": 0.42, "learning_rate": 0.00016646687316464862, "loss": 2.8007, "step": 5485 }, { "epoch": 0.42, "learning_rate": 0.00016642664628504768, "loss": 2.9424, "step": 5490 }, { "epoch": 0.42, "learning_rate": 0.00016638641940544674, "loss": 1.1535, "step": 5495 }, { "epoch": 0.42, "learning_rate": 0.00016634619252584577, "loss": 0.2137, "step": 5500 }, { "epoch": 0.42, "learning_rate": 0.00016630596564624482, "loss": 4.2225, "step": 5505 }, { "epoch": 0.42, "learning_rate": 0.00016626573876664388, "loss": 4.5811, "step": 5510 }, { "epoch": 0.42, "learning_rate": 0.00016622551188704294, "loss": 5.7295, "step": 5515 }, { "epoch": 0.42, "learning_rate": 0.00016618528500744197, "loss": 4.3728, "step": 5520 }, { "epoch": 0.42, "learning_rate": 0.00016614505812784103, "loss": 3.9244, "step": 5525 }, { "epoch": 0.42, "learning_rate": 0.0001661048312482401, "loss": 3.3557, "step": 5530 }, { "epoch": 0.42, "learning_rate": 0.00016606460436863912, "loss": 2.9875, "step": 5535 }, { "epoch": 0.42, "learning_rate": 0.00016602437748903818, "loss": 3.5087, "step": 5540 }, { "epoch": 0.42, "learning_rate": 0.00016598415060943724, "loss": 3.3396, "step": 5545 }, { "epoch": 0.42, "learning_rate": 0.0001659439237298363, "loss": 1.8395, "step": 5550 }, { "epoch": 0.42, "learning_rate": 0.00016590369685023535, "loss": 4.0984, "step": 5555 }, { "epoch": 0.42, "learning_rate": 0.00016586346997063438, "loss": 3.4518, "step": 5560 }, { "epoch": 0.43, "learning_rate": 0.00016582324309103344, "loss": 4.3408, "step": 5565 }, { "epoch": 0.43, "learning_rate": 0.00016578301621143247, "loss": 3.5854, "step": 5570 }, { "epoch": 0.43, "learning_rate": 0.00016574278933183153, "loss": 3.4309, "step": 5575 }, { "epoch": 0.43, "learning_rate": 0.0001657025624522306, "loss": 3.1176, "step": 5580 }, { "epoch": 0.43, "learning_rate": 0.00016566233557262965, "loss": 3.166, "step": 5585 }, { "epoch": 0.43, "learning_rate": 0.0001656221086930287, "loss": 2.8318, "step": 5590 }, { "epoch": 0.43, "learning_rate": 0.00016558188181342774, "loss": 1.8063, "step": 5595 }, { "epoch": 0.43, "learning_rate": 0.0001655416549338268, "loss": 0.0932, "step": 5600 }, { "epoch": 0.43, "learning_rate": 0.00016550142805422583, "loss": 4.1902, "step": 5605 }, { "epoch": 0.43, "learning_rate": 0.00016546120117462489, "loss": 4.9719, "step": 5610 }, { "epoch": 0.43, "learning_rate": 0.00016542097429502394, "loss": 4.8271, "step": 5615 }, { "epoch": 0.43, "learning_rate": 0.000165380747415423, "loss": 4.3954, "step": 5620 }, { "epoch": 0.43, "learning_rate": 0.00016534052053582206, "loss": 3.5303, "step": 5625 }, { "epoch": 0.43, "learning_rate": 0.00016530029365622112, "loss": 2.2174, "step": 5630 }, { "epoch": 0.43, "learning_rate": 0.00016526006677662015, "loss": 2.1981, "step": 5635 }, { "epoch": 0.43, "learning_rate": 0.00016521983989701918, "loss": 2.1, "step": 5640 }, { "epoch": 0.43, "learning_rate": 0.00016517961301741824, "loss": 2.3089, "step": 5645 }, { "epoch": 0.43, "learning_rate": 0.0001651393861378173, "loss": 3.9498, "step": 5650 }, { "epoch": 0.43, "learning_rate": 0.00016509915925821636, "loss": 4.8281, "step": 5655 }, { "epoch": 0.43, "learning_rate": 0.0001650589323786154, "loss": 5.3375, "step": 5660 }, { "epoch": 0.43, "learning_rate": 0.00016501870549901447, "loss": 4.2732, "step": 5665 }, { "epoch": 0.43, "learning_rate": 0.0001649784786194135, "loss": 3.8743, "step": 5670 }, { "epoch": 0.43, "learning_rate": 0.00016493825173981253, "loss": 2.8098, "step": 5675 }, { "epoch": 0.43, "learning_rate": 0.0001648980248602116, "loss": 3.626, "step": 5680 }, { "epoch": 0.43, "learning_rate": 0.00016485779798061065, "loss": 3.1708, "step": 5685 }, { "epoch": 0.43, "learning_rate": 0.0001648175711010097, "loss": 2.9451, "step": 5690 }, { "epoch": 0.44, "learning_rate": 0.00016477734422140877, "loss": 2.7528, "step": 5695 }, { "epoch": 0.44, "learning_rate": 0.00016473711734180783, "loss": 0.7335, "step": 5700 }, { "epoch": 0.44, "learning_rate": 0.00016469689046220686, "loss": 4.0922, "step": 5705 }, { "epoch": 0.44, "learning_rate": 0.0001646566635826059, "loss": 4.3029, "step": 5710 }, { "epoch": 0.44, "learning_rate": 0.00016461643670300495, "loss": 4.0666, "step": 5715 }, { "epoch": 0.44, "learning_rate": 0.000164576209823404, "loss": 4.3164, "step": 5720 }, { "epoch": 0.44, "learning_rate": 0.00016453598294380306, "loss": 3.5441, "step": 5725 }, { "epoch": 0.44, "learning_rate": 0.00016449575606420212, "loss": 2.875, "step": 5730 }, { "epoch": 0.44, "learning_rate": 0.00016445552918460115, "loss": 2.0636, "step": 5735 }, { "epoch": 0.44, "learning_rate": 0.0001644153023050002, "loss": 2.9502, "step": 5740 }, { "epoch": 0.44, "learning_rate": 0.00016437507542539924, "loss": 0.608, "step": 5745 }, { "epoch": 0.44, "learning_rate": 0.0001643348485457983, "loss": 2.9006, "step": 5750 }, { "epoch": 0.44, "learning_rate": 0.00016429462166619736, "loss": 5.0873, "step": 5755 }, { "epoch": 0.44, "learning_rate": 0.00016425439478659642, "loss": 4.1059, "step": 5760 }, { "epoch": 0.44, "learning_rate": 0.00016421416790699547, "loss": 3.5555, "step": 5765 }, { "epoch": 0.44, "learning_rate": 0.0001641739410273945, "loss": 3.1217, "step": 5770 }, { "epoch": 0.44, "learning_rate": 0.00016413371414779356, "loss": 3.9523, "step": 5775 }, { "epoch": 0.44, "learning_rate": 0.00016409348726819262, "loss": 2.5499, "step": 5780 }, { "epoch": 0.44, "learning_rate": 0.00016405326038859165, "loss": 1.9861, "step": 5785 }, { "epoch": 0.44, "learning_rate": 0.0001640130335089907, "loss": 1.938, "step": 5790 }, { "epoch": 0.44, "learning_rate": 0.00016397280662938977, "loss": 1.8224, "step": 5795 }, { "epoch": 0.44, "learning_rate": 0.00016393257974978883, "loss": 3.7677, "step": 5800 }, { "epoch": 0.44, "learning_rate": 0.00016389235287018786, "loss": 4.5553, "step": 5805 }, { "epoch": 0.44, "learning_rate": 0.00016385212599058692, "loss": 4.0107, "step": 5810 }, { "epoch": 0.44, "learning_rate": 0.00016381189911098597, "loss": 4.0619, "step": 5815 }, { "epoch": 0.44, "learning_rate": 0.000163771672231385, "loss": 4.0109, "step": 5820 }, { "epoch": 0.45, "learning_rate": 0.00016373144535178406, "loss": 3.6002, "step": 5825 }, { "epoch": 0.45, "learning_rate": 0.00016369121847218312, "loss": 3.1553, "step": 5830 }, { "epoch": 0.45, "learning_rate": 0.00016365099159258218, "loss": 2.579, "step": 5835 }, { "epoch": 0.45, "learning_rate": 0.0001636107647129812, "loss": 2.6138, "step": 5840 }, { "epoch": 0.45, "learning_rate": 0.00016357053783338027, "loss": 1.2922, "step": 5845 }, { "epoch": 0.45, "learning_rate": 0.00016353031095377933, "loss": 1.3788, "step": 5850 }, { "epoch": 0.45, "learning_rate": 0.0001634900840741784, "loss": 3.92, "step": 5855 }, { "epoch": 0.45, "learning_rate": 0.00016344985719457742, "loss": 4.1422, "step": 5860 }, { "epoch": 0.45, "learning_rate": 0.00016340963031497648, "loss": 4.1127, "step": 5865 }, { "epoch": 0.45, "learning_rate": 0.00016336940343537553, "loss": 3.7213, "step": 5870 }, { "epoch": 0.45, "learning_rate": 0.00016332917655577457, "loss": 3.4097, "step": 5875 }, { "epoch": 0.45, "learning_rate": 0.00016328894967617362, "loss": 2.2418, "step": 5880 }, { "epoch": 0.45, "learning_rate": 0.00016324872279657268, "loss": 2.4848, "step": 5885 }, { "epoch": 0.45, "learning_rate": 0.00016320849591697174, "loss": 2.2139, "step": 5890 }, { "epoch": 0.45, "learning_rate": 0.00016316826903737077, "loss": 2.1187, "step": 5895 }, { "epoch": 0.45, "learning_rate": 0.00016312804215776983, "loss": 0.5452, "step": 5900 }, { "epoch": 0.45, "learning_rate": 0.0001630878152781689, "loss": 5.1633, "step": 5905 }, { "epoch": 0.45, "learning_rate": 0.00016304758839856792, "loss": 5.2154, "step": 5910 }, { "epoch": 0.45, "learning_rate": 0.00016300736151896698, "loss": 4.3537, "step": 5915 }, { "epoch": 0.45, "learning_rate": 0.00016296713463936604, "loss": 4.3426, "step": 5920 }, { "epoch": 0.45, "learning_rate": 0.0001629269077597651, "loss": 3.8029, "step": 5925 }, { "epoch": 0.45, "learning_rate": 0.00016288668088016415, "loss": 3.3158, "step": 5930 }, { "epoch": 0.45, "learning_rate": 0.00016284645400056318, "loss": 2.3386, "step": 5935 }, { "epoch": 0.45, "learning_rate": 0.00016280622712096224, "loss": 2.2401, "step": 5940 }, { "epoch": 0.45, "learning_rate": 0.00016276600024136127, "loss": 3.1957, "step": 5945 }, { "epoch": 0.45, "learning_rate": 0.00016272577336176033, "loss": 3.0096, "step": 5950 }, { "epoch": 0.46, "learning_rate": 0.0001626855464821594, "loss": 4.1182, "step": 5955 }, { "epoch": 0.46, "learning_rate": 0.00016264531960255845, "loss": 4.3215, "step": 5960 }, { "epoch": 0.46, "learning_rate": 0.0001626050927229575, "loss": 4.4809, "step": 5965 }, { "epoch": 0.46, "learning_rate": 0.00016256486584335654, "loss": 4.4412, "step": 5970 }, { "epoch": 0.46, "learning_rate": 0.0001625246389637556, "loss": 5.4105, "step": 5975 }, { "epoch": 0.46, "learning_rate": 0.00016248441208415463, "loss": 4.3814, "step": 5980 }, { "epoch": 0.46, "learning_rate": 0.00016244418520455368, "loss": 2.7278, "step": 5985 }, { "epoch": 0.46, "learning_rate": 0.00016240395832495274, "loss": 3.1769, "step": 5990 }, { "epoch": 0.46, "learning_rate": 0.0001623637314453518, "loss": 0.9617, "step": 5995 }, { "epoch": 0.46, "learning_rate": 0.00016232350456575086, "loss": 1.1335, "step": 6000 }, { "epoch": 0.46, "learning_rate": 0.00016228327768614992, "loss": 4.5203, "step": 6005 }, { "epoch": 0.46, "learning_rate": 0.00016224305080654895, "loss": 4.9381, "step": 6010 }, { "epoch": 0.46, "learning_rate": 0.00016220282392694798, "loss": 4.7555, "step": 6015 }, { "epoch": 0.46, "learning_rate": 0.00016216259704734704, "loss": 4.7895, "step": 6020 }, { "epoch": 0.46, "learning_rate": 0.0001621223701677461, "loss": 3.355, "step": 6025 }, { "epoch": 0.46, "learning_rate": 0.00016208214328814515, "loss": 3.2165, "step": 6030 }, { "epoch": 0.46, "learning_rate": 0.0001620419164085442, "loss": 3.9241, "step": 6035 }, { "epoch": 0.46, "learning_rate": 0.00016200168952894324, "loss": 2.3204, "step": 6040 }, { "epoch": 0.46, "learning_rate": 0.00016196146264934227, "loss": 1.1676, "step": 6045 }, { "epoch": 0.46, "learning_rate": 0.00016192123576974133, "loss": 0.6222, "step": 6050 }, { "epoch": 0.46, "learning_rate": 0.0001618810088901404, "loss": 4.2146, "step": 6055 }, { "epoch": 0.46, "learning_rate": 0.00016184078201053945, "loss": 5.6813, "step": 6060 }, { "epoch": 0.46, "learning_rate": 0.0001618005551309385, "loss": 3.683, "step": 6065 }, { "epoch": 0.46, "learning_rate": 0.00016176032825133757, "loss": 3.5365, "step": 6070 }, { "epoch": 0.46, "learning_rate": 0.0001617201013717366, "loss": 2.232, "step": 6075 }, { "epoch": 0.46, "learning_rate": 0.00016167987449213566, "loss": 3.2946, "step": 6080 }, { "epoch": 0.47, "learning_rate": 0.00016163964761253469, "loss": 2.1471, "step": 6085 }, { "epoch": 0.47, "learning_rate": 0.00016159942073293374, "loss": 3.233, "step": 6090 }, { "epoch": 0.47, "learning_rate": 0.0001615591938533328, "loss": 3.6617, "step": 6095 }, { "epoch": 0.47, "learning_rate": 0.00016151896697373186, "loss": 1.3713, "step": 6100 }, { "epoch": 0.47, "learning_rate": 0.00016147874009413092, "loss": 5.2553, "step": 6105 }, { "epoch": 0.47, "learning_rate": 0.00016143851321452995, "loss": 4.0039, "step": 6110 }, { "epoch": 0.47, "learning_rate": 0.000161398286334929, "loss": 4.5361, "step": 6115 }, { "epoch": 0.47, "learning_rate": 0.00016135805945532804, "loss": 3.3616, "step": 6120 }, { "epoch": 0.47, "learning_rate": 0.0001613178325757271, "loss": 3.3566, "step": 6125 }, { "epoch": 0.47, "learning_rate": 0.00016127760569612616, "loss": 3.7517, "step": 6130 }, { "epoch": 0.47, "learning_rate": 0.00016123737881652521, "loss": 3.2124, "step": 6135 }, { "epoch": 0.47, "learning_rate": 0.00016119715193692427, "loss": 2.3577, "step": 6140 }, { "epoch": 0.47, "learning_rate": 0.0001611569250573233, "loss": 3.5035, "step": 6145 }, { "epoch": 0.47, "learning_rate": 0.00016111669817772236, "loss": 1.3117, "step": 6150 }, { "epoch": 0.47, "learning_rate": 0.00016107647129812142, "loss": 4.2394, "step": 6155 }, { "epoch": 0.47, "learning_rate": 0.00016103624441852045, "loss": 4.5457, "step": 6160 }, { "epoch": 0.47, "learning_rate": 0.0001609960175389195, "loss": 3.7481, "step": 6165 }, { "epoch": 0.47, "learning_rate": 0.00016095579065931857, "loss": 3.9391, "step": 6170 }, { "epoch": 0.47, "learning_rate": 0.00016091556377971763, "loss": 3.5035, "step": 6175 }, { "epoch": 0.47, "learning_rate": 0.00016087533690011666, "loss": 3.3803, "step": 6180 }, { "epoch": 0.47, "learning_rate": 0.00016083511002051572, "loss": 2.449, "step": 6185 }, { "epoch": 0.47, "learning_rate": 0.00016079488314091477, "loss": 1.8994, "step": 6190 }, { "epoch": 0.47, "learning_rate": 0.0001607546562613138, "loss": 2.9759, "step": 6195 }, { "epoch": 0.47, "learning_rate": 0.00016071442938171286, "loss": 2.7731, "step": 6200 }, { "epoch": 0.47, "learning_rate": 0.00016067420250211192, "loss": 4.325, "step": 6205 }, { "epoch": 0.47, "learning_rate": 0.00016063397562251098, "loss": 4.6818, "step": 6210 }, { "epoch": 0.48, "learning_rate": 0.00016059374874291, "loss": 3.3004, "step": 6215 }, { "epoch": 0.48, "learning_rate": 0.00016055352186330907, "loss": 3.7553, "step": 6220 }, { "epoch": 0.48, "learning_rate": 0.00016051329498370813, "loss": 3.8145, "step": 6225 }, { "epoch": 0.48, "learning_rate": 0.00016047306810410719, "loss": 3.1997, "step": 6230 }, { "epoch": 0.48, "learning_rate": 0.00016043284122450622, "loss": 3.2375, "step": 6235 }, { "epoch": 0.48, "learning_rate": 0.00016039261434490528, "loss": 2.0859, "step": 6240 }, { "epoch": 0.48, "learning_rate": 0.00016035238746530433, "loss": 3.2224, "step": 6245 }, { "epoch": 0.48, "learning_rate": 0.00016031216058570336, "loss": 2.5072, "step": 6250 }, { "epoch": 0.48, "learning_rate": 0.00016027193370610242, "loss": 4.9318, "step": 6255 }, { "epoch": 0.48, "learning_rate": 0.00016023170682650148, "loss": 4.1885, "step": 6260 }, { "epoch": 0.48, "learning_rate": 0.00016019147994690054, "loss": 4.3771, "step": 6265 }, { "epoch": 0.48, "learning_rate": 0.00016015125306729957, "loss": 4.267, "step": 6270 }, { "epoch": 0.48, "learning_rate": 0.00016011102618769863, "loss": 4.1869, "step": 6275 }, { "epoch": 0.48, "learning_rate": 0.0001600707993080977, "loss": 2.5321, "step": 6280 }, { "epoch": 0.48, "learning_rate": 0.00016003057242849672, "loss": 2.5823, "step": 6285 }, { "epoch": 0.48, "learning_rate": 0.00015999034554889578, "loss": 1.3439, "step": 6290 }, { "epoch": 0.48, "learning_rate": 0.00015995011866929483, "loss": 2.2149, "step": 6295 }, { "epoch": 0.48, "learning_rate": 0.0001599098917896939, "loss": 1.6957, "step": 6300 }, { "epoch": 0.48, "learning_rate": 0.00015986966491009295, "loss": 4.6896, "step": 6305 }, { "epoch": 0.48, "learning_rate": 0.00015982943803049198, "loss": 4.6721, "step": 6310 }, { "epoch": 0.48, "learning_rate": 0.00015978921115089104, "loss": 3.8354, "step": 6315 }, { "epoch": 0.48, "learning_rate": 0.00015974898427129007, "loss": 3.1878, "step": 6320 }, { "epoch": 0.48, "learning_rate": 0.00015970875739168913, "loss": 3.6049, "step": 6325 }, { "epoch": 0.48, "learning_rate": 0.0001596685305120882, "loss": 3.4877, "step": 6330 }, { "epoch": 0.48, "learning_rate": 0.00015962830363248725, "loss": 3.7444, "step": 6335 }, { "epoch": 0.48, "learning_rate": 0.0001595880767528863, "loss": 2.6608, "step": 6340 }, { "epoch": 0.48, "learning_rate": 0.00015954784987328534, "loss": 1.8235, "step": 6345 }, { "epoch": 0.49, "learning_rate": 0.00015950762299368437, "loss": 1.9221, "step": 6350 }, { "epoch": 0.49, "learning_rate": 0.00015946739611408342, "loss": 4.5215, "step": 6355 }, { "epoch": 0.49, "learning_rate": 0.00015942716923448248, "loss": 5.2854, "step": 6360 }, { "epoch": 0.49, "learning_rate": 0.00015938694235488154, "loss": 4.3033, "step": 6365 }, { "epoch": 0.49, "learning_rate": 0.0001593467154752806, "loss": 5.4211, "step": 6370 }, { "epoch": 0.49, "learning_rate": 0.00015930648859567966, "loss": 2.9237, "step": 6375 }, { "epoch": 0.49, "learning_rate": 0.0001592662617160787, "loss": 3.6648, "step": 6380 }, { "epoch": 0.49, "learning_rate": 0.00015922603483647772, "loss": 2.2996, "step": 6385 }, { "epoch": 0.49, "learning_rate": 0.00015918580795687678, "loss": 2.3559, "step": 6390 }, { "epoch": 0.49, "learning_rate": 0.00015914558107727584, "loss": 1.8411, "step": 6395 }, { "epoch": 0.49, "learning_rate": 0.0001591053541976749, "loss": 1.8784, "step": 6400 }, { "epoch": 0.49, "learning_rate": 0.00015906512731807395, "loss": 3.5871, "step": 6405 }, { "epoch": 0.49, "learning_rate": 0.000159024900438473, "loss": 4.8471, "step": 6410 }, { "epoch": 0.49, "learning_rate": 0.00015898467355887204, "loss": 3.7787, "step": 6415 }, { "epoch": 0.49, "learning_rate": 0.00015894444667927107, "loss": 3.7279, "step": 6420 }, { "epoch": 0.49, "learning_rate": 0.00015890421979967013, "loss": 4.0128, "step": 6425 }, { "epoch": 0.49, "learning_rate": 0.0001588639929200692, "loss": 3.1311, "step": 6430 }, { "epoch": 0.49, "learning_rate": 0.00015882376604046825, "loss": 2.5843, "step": 6435 }, { "epoch": 0.49, "learning_rate": 0.0001587835391608673, "loss": 4.284, "step": 6440 }, { "epoch": 0.49, "learning_rate": 0.00015874331228126636, "loss": 3.2978, "step": 6445 }, { "epoch": 0.49, "learning_rate": 0.0001587030854016654, "loss": 0.0883, "step": 6450 }, { "epoch": 0.49, "learning_rate": 0.00015866285852206445, "loss": 3.7918, "step": 6455 }, { "epoch": 0.49, "learning_rate": 0.00015862263164246349, "loss": 4.1014, "step": 6460 }, { "epoch": 0.49, "learning_rate": 0.00015858240476286254, "loss": 3.642, "step": 6465 }, { "epoch": 0.49, "learning_rate": 0.0001585421778832616, "loss": 4.5049, "step": 6470 }, { "epoch": 0.49, "learning_rate": 0.00015850195100366066, "loss": 3.2287, "step": 6475 }, { "epoch": 0.5, "learning_rate": 0.00015846172412405972, "loss": 2.9815, "step": 6480 }, { "epoch": 0.5, "learning_rate": 0.00015842149724445875, "loss": 3.578, "step": 6485 }, { "epoch": 0.5, "learning_rate": 0.0001583812703648578, "loss": 3.4713, "step": 6490 }, { "epoch": 0.5, "learning_rate": 0.00015834104348525684, "loss": 2.6449, "step": 6495 }, { "epoch": 0.5, "learning_rate": 0.0001583008166056559, "loss": 3.3023, "step": 6500 }, { "epoch": 0.5, "learning_rate": 0.00015826058972605496, "loss": 3.7168, "step": 6505 }, { "epoch": 0.5, "learning_rate": 0.00015822036284645401, "loss": 3.6531, "step": 6510 }, { "epoch": 0.5, "learning_rate": 0.00015818013596685307, "loss": 4.1709, "step": 6515 }, { "epoch": 0.5, "learning_rate": 0.0001581399090872521, "loss": 3.7805, "step": 6520 }, { "epoch": 0.5, "learning_rate": 0.00015809968220765116, "loss": 4.2029, "step": 6525 }, { "epoch": 0.5, "learning_rate": 0.00015805945532805022, "loss": 3.6784, "step": 6530 }, { "epoch": 0.5, "learning_rate": 0.00015801922844844925, "loss": 2.9541, "step": 6535 }, { "epoch": 0.5, "learning_rate": 0.0001579790015688483, "loss": 2.3555, "step": 6540 }, { "epoch": 0.5, "learning_rate": 0.00015793877468924737, "loss": 2.8182, "step": 6545 }, { "epoch": 0.5, "learning_rate": 0.00015789854780964643, "loss": 0.6913, "step": 6550 }, { "epoch": 0.5, "learning_rate": 0.00015785832093004546, "loss": 3.7799, "step": 6555 }, { "epoch": 0.5, "learning_rate": 0.00015781809405044451, "loss": 4.8266, "step": 6560 }, { "epoch": 0.5, "learning_rate": 0.00015777786717084357, "loss": 4.3416, "step": 6565 }, { "epoch": 0.5, "learning_rate": 0.0001577376402912426, "loss": 3.3998, "step": 6570 }, { "epoch": 0.5, "learning_rate": 0.00015769741341164166, "loss": 3.0765, "step": 6575 }, { "epoch": 0.5, "learning_rate": 0.00015765718653204072, "loss": 3.1966, "step": 6580 }, { "epoch": 0.5, "learning_rate": 0.00015761695965243978, "loss": 3.4283, "step": 6585 }, { "epoch": 0.5, "learning_rate": 0.0001575767327728388, "loss": 1.7517, "step": 6590 }, { "epoch": 0.5, "learning_rate": 0.00015753650589323787, "loss": 2.4111, "step": 6595 }, { "epoch": 0.5, "learning_rate": 0.00015749627901363693, "loss": 1.31, "step": 6600 }, { "epoch": 0.5, "learning_rate": 0.00015745605213403598, "loss": 5.1953, "step": 6605 }, { "epoch": 0.51, "learning_rate": 0.00015741582525443502, "loss": 4.7135, "step": 6610 }, { "epoch": 0.51, "learning_rate": 0.00015737559837483407, "loss": 3.4439, "step": 6615 }, { "epoch": 0.51, "learning_rate": 0.00015733537149523313, "loss": 4.5059, "step": 6620 }, { "epoch": 0.51, "learning_rate": 0.00015729514461563216, "loss": 4.826, "step": 6625 }, { "epoch": 0.51, "learning_rate": 0.00015725491773603122, "loss": 4.1916, "step": 6630 }, { "epoch": 0.51, "learning_rate": 0.00015721469085643028, "loss": 3.258, "step": 6635 }, { "epoch": 0.51, "learning_rate": 0.00015717446397682934, "loss": 2.7372, "step": 6640 }, { "epoch": 0.51, "learning_rate": 0.00015713423709722837, "loss": 2.4825, "step": 6645 }, { "epoch": 0.51, "learning_rate": 0.00015709401021762743, "loss": 0.8767, "step": 6650 }, { "epoch": 0.51, "learning_rate": 0.00015705378333802646, "loss": 5.0094, "step": 6655 }, { "epoch": 0.51, "learning_rate": 0.00015701355645842552, "loss": 5.1865, "step": 6660 }, { "epoch": 0.51, "learning_rate": 0.00015697332957882458, "loss": 3.6629, "step": 6665 }, { "epoch": 0.51, "learning_rate": 0.00015693310269922363, "loss": 4.3094, "step": 6670 }, { "epoch": 0.51, "learning_rate": 0.0001568928758196227, "loss": 3.6267, "step": 6675 }, { "epoch": 0.51, "learning_rate": 0.00015685264894002175, "loss": 3.056, "step": 6680 }, { "epoch": 0.51, "learning_rate": 0.00015681242206042078, "loss": 2.7199, "step": 6685 }, { "epoch": 0.51, "learning_rate": 0.0001567721951808198, "loss": 2.7694, "step": 6690 }, { "epoch": 0.51, "learning_rate": 0.00015673196830121887, "loss": 3.5551, "step": 6695 }, { "epoch": 0.51, "learning_rate": 0.00015669174142161793, "loss": 0.0749, "step": 6700 }, { "epoch": 0.51, "learning_rate": 0.000156651514542017, "loss": 4.768, "step": 6705 }, { "epoch": 0.51, "learning_rate": 0.00015661128766241605, "loss": 4.2012, "step": 6710 }, { "epoch": 0.51, "learning_rate": 0.0001565710607828151, "loss": 2.9939, "step": 6715 }, { "epoch": 0.51, "learning_rate": 0.00015653083390321413, "loss": 2.8134, "step": 6720 }, { "epoch": 0.51, "learning_rate": 0.00015649060702361317, "loss": 3.4247, "step": 6725 }, { "epoch": 0.51, "learning_rate": 0.00015645038014401222, "loss": 3.0152, "step": 6730 }, { "epoch": 0.51, "learning_rate": 0.00015641015326441128, "loss": 2.0896, "step": 6735 }, { "epoch": 0.52, "learning_rate": 0.00015636992638481034, "loss": 2.5528, "step": 6740 }, { "epoch": 0.52, "learning_rate": 0.0001563296995052094, "loss": 0.7059, "step": 6745 }, { "epoch": 0.52, "learning_rate": 0.00015628947262560846, "loss": 1.9621, "step": 6750 }, { "epoch": 0.52, "learning_rate": 0.0001562492457460075, "loss": 5.2277, "step": 6755 }, { "epoch": 0.52, "learning_rate": 0.00015620901886640652, "loss": 5.1182, "step": 6760 }, { "epoch": 0.52, "learning_rate": 0.00015616879198680558, "loss": 4.7277, "step": 6765 }, { "epoch": 0.52, "learning_rate": 0.00015612856510720464, "loss": 3.9717, "step": 6770 }, { "epoch": 0.52, "learning_rate": 0.0001560883382276037, "loss": 3.8693, "step": 6775 }, { "epoch": 0.52, "learning_rate": 0.00015604811134800275, "loss": 2.4765, "step": 6780 }, { "epoch": 0.52, "learning_rate": 0.0001560078844684018, "loss": 3.0296, "step": 6785 }, { "epoch": 0.52, "learning_rate": 0.00015596765758880084, "loss": 2.2676, "step": 6790 }, { "epoch": 0.52, "learning_rate": 0.00015592743070919987, "loss": 2.3231, "step": 6795 }, { "epoch": 0.52, "learning_rate": 0.00015588720382959893, "loss": 0.0427, "step": 6800 }, { "epoch": 0.52, "learning_rate": 0.000155846976949998, "loss": 4.8117, "step": 6805 }, { "epoch": 0.52, "learning_rate": 0.00015580675007039705, "loss": 4.2992, "step": 6810 }, { "epoch": 0.52, "learning_rate": 0.0001557665231907961, "loss": 4.5337, "step": 6815 }, { "epoch": 0.52, "learning_rate": 0.00015572629631119516, "loss": 5.334, "step": 6820 }, { "epoch": 0.52, "learning_rate": 0.0001556860694315942, "loss": 3.5176, "step": 6825 }, { "epoch": 0.52, "learning_rate": 0.00015564584255199325, "loss": 1.5627, "step": 6830 }, { "epoch": 0.52, "learning_rate": 0.00015560561567239228, "loss": 2.1864, "step": 6835 }, { "epoch": 0.52, "learning_rate": 0.00015556538879279134, "loss": 0.7217, "step": 6840 }, { "epoch": 0.52, "learning_rate": 0.0001555251619131904, "loss": 3.6855, "step": 6845 }, { "epoch": 0.52, "learning_rate": 0.00015548493503358946, "loss": 1.8674, "step": 6850 }, { "epoch": 0.52, "learning_rate": 0.00015544470815398852, "loss": 4.4283, "step": 6855 }, { "epoch": 0.52, "learning_rate": 0.00015540448127438755, "loss": 4.7135, "step": 6860 }, { "epoch": 0.52, "learning_rate": 0.0001553642543947866, "loss": 3.5176, "step": 6865 }, { "epoch": 0.53, "learning_rate": 0.00015532402751518564, "loss": 5.2475, "step": 6870 }, { "epoch": 0.53, "learning_rate": 0.0001552838006355847, "loss": 3.5299, "step": 6875 }, { "epoch": 0.53, "learning_rate": 0.00015524357375598375, "loss": 3.6828, "step": 6880 }, { "epoch": 0.53, "learning_rate": 0.0001552033468763828, "loss": 5.0184, "step": 6885 }, { "epoch": 0.53, "learning_rate": 0.00015516311999678187, "loss": 3.1216, "step": 6890 }, { "epoch": 0.53, "learning_rate": 0.0001551228931171809, "loss": 3.048, "step": 6895 }, { "epoch": 0.53, "learning_rate": 0.00015508266623757996, "loss": 3.0663, "step": 6900 }, { "epoch": 0.53, "learning_rate": 0.00015504243935797902, "loss": 5.0727, "step": 6905 }, { "epoch": 0.53, "learning_rate": 0.00015500221247837805, "loss": 4.3605, "step": 6910 }, { "epoch": 0.53, "learning_rate": 0.0001549619855987771, "loss": 4.4473, "step": 6915 }, { "epoch": 0.53, "learning_rate": 0.00015492175871917617, "loss": 3.3799, "step": 6920 }, { "epoch": 0.53, "learning_rate": 0.00015488153183957522, "loss": 3.7008, "step": 6925 }, { "epoch": 0.53, "learning_rate": 0.00015484130495997426, "loss": 3.7081, "step": 6930 }, { "epoch": 0.53, "learning_rate": 0.00015480107808037331, "loss": 3.5293, "step": 6935 }, { "epoch": 0.53, "learning_rate": 0.00015476085120077237, "loss": 2.8219, "step": 6940 }, { "epoch": 0.53, "learning_rate": 0.0001547206243211714, "loss": 2.5409, "step": 6945 }, { "epoch": 0.53, "learning_rate": 0.00015468039744157046, "loss": 1.8382, "step": 6950 }, { "epoch": 0.53, "learning_rate": 0.00015464017056196952, "loss": 4.9234, "step": 6955 }, { "epoch": 0.53, "learning_rate": 0.00015459994368236855, "loss": 5.3937, "step": 6960 }, { "epoch": 0.53, "learning_rate": 0.0001545597168027676, "loss": 4.5607, "step": 6965 }, { "epoch": 0.53, "learning_rate": 0.00015451948992316667, "loss": 3.7303, "step": 6970 }, { "epoch": 0.53, "learning_rate": 0.00015447926304356573, "loss": 3.9004, "step": 6975 }, { "epoch": 0.53, "learning_rate": 0.00015443903616396478, "loss": 3.7896, "step": 6980 }, { "epoch": 0.53, "learning_rate": 0.00015439880928436381, "loss": 2.2128, "step": 6985 }, { "epoch": 0.53, "learning_rate": 0.00015435858240476287, "loss": 1.8755, "step": 6990 }, { "epoch": 0.53, "learning_rate": 0.0001543183555251619, "loss": 2.1864, "step": 6995 }, { "epoch": 0.54, "learning_rate": 0.00015427812864556096, "loss": 0.8963, "step": 7000 }, { "epoch": 0.54, "learning_rate": 0.00015423790176596002, "loss": 4.0704, "step": 7005 }, { "epoch": 0.54, "learning_rate": 0.00015419767488635908, "loss": 4.6586, "step": 7010 }, { "epoch": 0.54, "learning_rate": 0.00015415744800675814, "loss": 4.1082, "step": 7015 }, { "epoch": 0.54, "learning_rate": 0.00015411722112715717, "loss": 3.795, "step": 7020 }, { "epoch": 0.54, "learning_rate": 0.00015407699424755623, "loss": 4.5971, "step": 7025 }, { "epoch": 0.54, "learning_rate": 0.00015403676736795526, "loss": 4.1455, "step": 7030 }, { "epoch": 0.54, "learning_rate": 0.00015399654048835432, "loss": 2.7245, "step": 7035 }, { "epoch": 0.54, "learning_rate": 0.00015395631360875337, "loss": 3.3336, "step": 7040 }, { "epoch": 0.54, "learning_rate": 0.00015391608672915243, "loss": 3.3736, "step": 7045 }, { "epoch": 0.54, "learning_rate": 0.0001538758598495515, "loss": 2.5276, "step": 7050 }, { "epoch": 0.54, "learning_rate": 0.00015383563296995055, "loss": 5.3309, "step": 7055 }, { "epoch": 0.54, "learning_rate": 0.00015379540609034958, "loss": 4.858, "step": 7060 }, { "epoch": 0.54, "learning_rate": 0.0001537551792107486, "loss": 4.5324, "step": 7065 }, { "epoch": 0.54, "learning_rate": 0.00015371495233114767, "loss": 4.2461, "step": 7070 }, { "epoch": 0.54, "learning_rate": 0.00015367472545154673, "loss": 3.8908, "step": 7075 }, { "epoch": 0.54, "learning_rate": 0.00015363449857194579, "loss": 3.1788, "step": 7080 }, { "epoch": 0.54, "learning_rate": 0.00015359427169234484, "loss": 3.3179, "step": 7085 }, { "epoch": 0.54, "learning_rate": 0.0001535540448127439, "loss": 2.7894, "step": 7090 }, { "epoch": 0.54, "learning_rate": 0.00015351381793314293, "loss": 2.9542, "step": 7095 }, { "epoch": 0.54, "learning_rate": 0.00015347359105354196, "loss": 0.7846, "step": 7100 }, { "epoch": 0.54, "learning_rate": 0.00015343336417394102, "loss": 5.3049, "step": 7105 }, { "epoch": 0.54, "learning_rate": 0.00015339313729434008, "loss": 4.2867, "step": 7110 }, { "epoch": 0.54, "learning_rate": 0.00015335291041473914, "loss": 4.4562, "step": 7115 }, { "epoch": 0.54, "learning_rate": 0.0001533126835351382, "loss": 3.9077, "step": 7120 }, { "epoch": 0.54, "learning_rate": 0.00015327245665553726, "loss": 3.4854, "step": 7125 }, { "epoch": 0.54, "learning_rate": 0.0001532322297759363, "loss": 2.647, "step": 7130 }, { "epoch": 0.55, "learning_rate": 0.00015319200289633532, "loss": 2.2412, "step": 7135 }, { "epoch": 0.55, "learning_rate": 0.00015315177601673438, "loss": 2.5988, "step": 7140 }, { "epoch": 0.55, "learning_rate": 0.00015311154913713343, "loss": 2.807, "step": 7145 }, { "epoch": 0.55, "learning_rate": 0.0001530713222575325, "loss": 3.1684, "step": 7150 }, { "epoch": 0.55, "learning_rate": 0.00015303109537793155, "loss": 4.0076, "step": 7155 }, { "epoch": 0.55, "learning_rate": 0.0001529908684983306, "loss": 3.8545, "step": 7160 }, { "epoch": 0.55, "learning_rate": 0.00015295064161872964, "loss": 4.7838, "step": 7165 }, { "epoch": 0.55, "learning_rate": 0.00015291041473912867, "loss": 4.7896, "step": 7170 }, { "epoch": 0.55, "learning_rate": 0.00015287018785952773, "loss": 4.5879, "step": 7175 }, { "epoch": 0.55, "learning_rate": 0.0001528299609799268, "loss": 4.1828, "step": 7180 }, { "epoch": 0.55, "learning_rate": 0.00015278973410032585, "loss": 3.6771, "step": 7185 }, { "epoch": 0.55, "learning_rate": 0.0001527495072207249, "loss": 2.4002, "step": 7190 }, { "epoch": 0.55, "learning_rate": 0.00015270928034112396, "loss": 2.9123, "step": 7195 }, { "epoch": 0.55, "learning_rate": 0.000152669053461523, "loss": 1.4825, "step": 7200 }, { "epoch": 0.55, "learning_rate": 0.00015262882658192205, "loss": 4.3521, "step": 7205 }, { "epoch": 0.55, "learning_rate": 0.00015258859970232108, "loss": 4.76, "step": 7210 }, { "epoch": 0.55, "learning_rate": 0.00015254837282272014, "loss": 4.7199, "step": 7215 }, { "epoch": 0.55, "learning_rate": 0.0001525081459431192, "loss": 3.7285, "step": 7220 }, { "epoch": 0.55, "learning_rate": 0.00015246791906351826, "loss": 4.2059, "step": 7225 }, { "epoch": 0.55, "learning_rate": 0.00015242769218391732, "loss": 2.4594, "step": 7230 }, { "epoch": 0.55, "learning_rate": 0.00015238746530431635, "loss": 1.7533, "step": 7235 }, { "epoch": 0.55, "learning_rate": 0.0001523472384247154, "loss": 2.0827, "step": 7240 }, { "epoch": 0.55, "learning_rate": 0.00015230701154511444, "loss": 3.332, "step": 7245 }, { "epoch": 0.55, "learning_rate": 0.0001522667846655135, "loss": 1.1347, "step": 7250 }, { "epoch": 0.55, "learning_rate": 0.00015222655778591255, "loss": 3.9932, "step": 7255 }, { "epoch": 0.55, "learning_rate": 0.0001521863309063116, "loss": 3.5039, "step": 7260 }, { "epoch": 0.56, "learning_rate": 0.00015214610402671064, "loss": 3.6082, "step": 7265 }, { "epoch": 0.56, "learning_rate": 0.0001521058771471097, "loss": 2.8597, "step": 7270 }, { "epoch": 0.56, "learning_rate": 0.00015206565026750876, "loss": 3.6561, "step": 7275 }, { "epoch": 0.56, "learning_rate": 0.00015202542338790782, "loss": 1.5933, "step": 7280 }, { "epoch": 0.56, "learning_rate": 0.00015198519650830685, "loss": 2.386, "step": 7285 }, { "epoch": 0.56, "learning_rate": 0.0001519449696287059, "loss": 2.4073, "step": 7290 }, { "epoch": 0.56, "learning_rate": 0.00015190474274910497, "loss": 2.3714, "step": 7295 }, { "epoch": 0.56, "learning_rate": 0.000151864515869504, "loss": 1.211, "step": 7300 }, { "epoch": 0.56, "learning_rate": 0.00015182428898990305, "loss": 3.9281, "step": 7305 }, { "epoch": 0.56, "learning_rate": 0.0001517840621103021, "loss": 4.6617, "step": 7310 }, { "epoch": 0.56, "learning_rate": 0.00015174383523070117, "loss": 4.624, "step": 7315 }, { "epoch": 0.56, "learning_rate": 0.0001517036083511002, "loss": 4.2254, "step": 7320 }, { "epoch": 0.56, "learning_rate": 0.00015166338147149926, "loss": 3.4564, "step": 7325 }, { "epoch": 0.56, "learning_rate": 0.00015162315459189832, "loss": 3.0671, "step": 7330 }, { "epoch": 0.56, "learning_rate": 0.00015158292771229735, "loss": 2.1117, "step": 7335 }, { "epoch": 0.56, "learning_rate": 0.0001515427008326964, "loss": 2.8584, "step": 7340 }, { "epoch": 0.56, "learning_rate": 0.00015150247395309547, "loss": 2.352, "step": 7345 }, { "epoch": 0.56, "learning_rate": 0.00015146224707349452, "loss": 1.9422, "step": 7350 }, { "epoch": 0.56, "learning_rate": 0.00015142202019389358, "loss": 4.873, "step": 7355 }, { "epoch": 0.56, "learning_rate": 0.00015138179331429261, "loss": 4.7299, "step": 7360 }, { "epoch": 0.56, "learning_rate": 0.00015134156643469167, "loss": 3.7564, "step": 7365 }, { "epoch": 0.56, "learning_rate": 0.0001513013395550907, "loss": 5.5928, "step": 7370 }, { "epoch": 0.56, "learning_rate": 0.00015126111267548976, "loss": 4.1801, "step": 7375 }, { "epoch": 0.56, "learning_rate": 0.00015122088579588882, "loss": 3.195, "step": 7380 }, { "epoch": 0.56, "learning_rate": 0.00015118065891628788, "loss": 2.6585, "step": 7385 }, { "epoch": 0.56, "learning_rate": 0.00015114043203668694, "loss": 2.2449, "step": 7390 }, { "epoch": 0.57, "learning_rate": 0.00015110020515708597, "loss": 3.2964, "step": 7395 }, { "epoch": 0.57, "learning_rate": 0.00015105997827748503, "loss": 1.8592, "step": 7400 }, { "epoch": 0.57, "learning_rate": 0.00015101975139788406, "loss": 5.1701, "step": 7405 }, { "epoch": 0.57, "learning_rate": 0.00015097952451828312, "loss": 3.9732, "step": 7410 }, { "epoch": 0.57, "learning_rate": 0.00015093929763868217, "loss": 3.7973, "step": 7415 }, { "epoch": 0.57, "learning_rate": 0.00015089907075908123, "loss": 4.9865, "step": 7420 }, { "epoch": 0.57, "learning_rate": 0.0001508588438794803, "loss": 3.4799, "step": 7425 }, { "epoch": 0.57, "learning_rate": 0.00015081861699987935, "loss": 2.9448, "step": 7430 }, { "epoch": 0.57, "learning_rate": 0.00015077839012027838, "loss": 2.2613, "step": 7435 }, { "epoch": 0.57, "learning_rate": 0.0001507381632406774, "loss": 3.6979, "step": 7440 }, { "epoch": 0.57, "learning_rate": 0.00015069793636107647, "loss": 1.1705, "step": 7445 }, { "epoch": 0.57, "learning_rate": 0.00015065770948147553, "loss": 1.0137, "step": 7450 }, { "epoch": 0.57, "learning_rate": 0.00015061748260187459, "loss": 4.4939, "step": 7455 }, { "epoch": 0.57, "learning_rate": 0.00015057725572227364, "loss": 4.7318, "step": 7460 }, { "epoch": 0.57, "learning_rate": 0.0001505370288426727, "loss": 4.4439, "step": 7465 }, { "epoch": 0.57, "learning_rate": 0.00015049680196307173, "loss": 3.9766, "step": 7470 }, { "epoch": 0.57, "learning_rate": 0.00015045657508347076, "loss": 4.8434, "step": 7475 }, { "epoch": 0.57, "learning_rate": 0.00015041634820386982, "loss": 3.7546, "step": 7480 }, { "epoch": 0.57, "learning_rate": 0.00015037612132426888, "loss": 4.257, "step": 7485 }, { "epoch": 0.57, "learning_rate": 0.00015033589444466794, "loss": 2.0724, "step": 7490 }, { "epoch": 0.57, "learning_rate": 0.000150295667565067, "loss": 2.3552, "step": 7495 }, { "epoch": 0.57, "learning_rate": 0.00015025544068546606, "loss": 2.8831, "step": 7500 }, { "epoch": 0.57, "learning_rate": 0.00015021521380586509, "loss": 4.2111, "step": 7505 }, { "epoch": 0.57, "learning_rate": 0.00015017498692626412, "loss": 3.5881, "step": 7510 }, { "epoch": 0.57, "learning_rate": 0.00015013476004666318, "loss": 3.7628, "step": 7515 }, { "epoch": 0.57, "learning_rate": 0.00015009453316706223, "loss": 4.3022, "step": 7520 }, { "epoch": 0.58, "learning_rate": 0.0001500543062874613, "loss": 3.9914, "step": 7525 }, { "epoch": 0.58, "learning_rate": 0.00015001407940786035, "loss": 3.0919, "step": 7530 }, { "epoch": 0.58, "learning_rate": 0.0001499738525282594, "loss": 3.287, "step": 7535 }, { "epoch": 0.58, "learning_rate": 0.00014993362564865844, "loss": 3.2431, "step": 7540 }, { "epoch": 0.58, "learning_rate": 0.00014989339876905747, "loss": 1.7876, "step": 7545 }, { "epoch": 0.58, "learning_rate": 0.00014985317188945653, "loss": 3.392, "step": 7550 }, { "epoch": 0.58, "learning_rate": 0.0001498129450098556, "loss": 4.3033, "step": 7555 }, { "epoch": 0.58, "learning_rate": 0.00014977271813025465, "loss": 4.7418, "step": 7560 }, { "epoch": 0.58, "learning_rate": 0.0001497324912506537, "loss": 3.6264, "step": 7565 }, { "epoch": 0.58, "learning_rate": 0.00014969226437105273, "loss": 3.9139, "step": 7570 }, { "epoch": 0.58, "learning_rate": 0.0001496520374914518, "loss": 4.6238, "step": 7575 }, { "epoch": 0.58, "learning_rate": 0.00014961181061185085, "loss": 3.354, "step": 7580 }, { "epoch": 0.58, "learning_rate": 0.00014957158373224988, "loss": 2.212, "step": 7585 }, { "epoch": 0.58, "learning_rate": 0.00014953135685264894, "loss": 2.9092, "step": 7590 }, { "epoch": 0.58, "learning_rate": 0.000149491129973048, "loss": 1.8592, "step": 7595 }, { "epoch": 0.58, "learning_rate": 0.00014945090309344706, "loss": 0.1596, "step": 7600 }, { "epoch": 0.58, "learning_rate": 0.0001494106762138461, "loss": 3.8275, "step": 7605 }, { "epoch": 0.58, "learning_rate": 0.00014937044933424515, "loss": 3.526, "step": 7610 }, { "epoch": 0.58, "learning_rate": 0.0001493302224546442, "loss": 4.7551, "step": 7615 }, { "epoch": 0.58, "learning_rate": 0.00014928999557504324, "loss": 2.7891, "step": 7620 }, { "epoch": 0.58, "learning_rate": 0.0001492497686954423, "loss": 3.836, "step": 7625 }, { "epoch": 0.58, "learning_rate": 0.00014920954181584135, "loss": 4.1405, "step": 7630 }, { "epoch": 0.58, "learning_rate": 0.0001491693149362404, "loss": 3.3979, "step": 7635 }, { "epoch": 0.58, "learning_rate": 0.00014912908805663944, "loss": 2.051, "step": 7640 }, { "epoch": 0.58, "learning_rate": 0.0001490888611770385, "loss": 2.0498, "step": 7645 }, { "epoch": 0.58, "learning_rate": 0.00014904863429743756, "loss": 1.2102, "step": 7650 }, { "epoch": 0.59, "learning_rate": 0.00014900840741783662, "loss": 4.4957, "step": 7655 }, { "epoch": 0.59, "learning_rate": 0.00014896818053823565, "loss": 4.0664, "step": 7660 }, { "epoch": 0.59, "learning_rate": 0.0001489279536586347, "loss": 4.7891, "step": 7665 }, { "epoch": 0.59, "learning_rate": 0.00014888772677903376, "loss": 2.6803, "step": 7670 }, { "epoch": 0.59, "learning_rate": 0.0001488474998994328, "loss": 3.4743, "step": 7675 }, { "epoch": 0.59, "learning_rate": 0.00014880727301983185, "loss": 3.2174, "step": 7680 }, { "epoch": 0.59, "learning_rate": 0.0001487670461402309, "loss": 3.109, "step": 7685 }, { "epoch": 0.59, "learning_rate": 0.00014872681926062997, "loss": 1.0869, "step": 7690 }, { "epoch": 0.59, "learning_rate": 0.00014868659238102903, "loss": 1.1583, "step": 7695 }, { "epoch": 0.59, "learning_rate": 0.00014864636550142806, "loss": 3.8975, "step": 7700 }, { "epoch": 0.59, "learning_rate": 0.00014860613862182712, "loss": 5.1785, "step": 7705 }, { "epoch": 0.59, "learning_rate": 0.00014856591174222615, "loss": 3.8462, "step": 7710 }, { "epoch": 0.59, "learning_rate": 0.0001485256848626252, "loss": 3.6188, "step": 7715 }, { "epoch": 0.59, "learning_rate": 0.00014848545798302427, "loss": 3.4301, "step": 7720 }, { "epoch": 0.59, "learning_rate": 0.00014844523110342332, "loss": 4.3648, "step": 7725 }, { "epoch": 0.59, "learning_rate": 0.00014840500422382238, "loss": 1.6885, "step": 7730 }, { "epoch": 0.59, "learning_rate": 0.0001483647773442214, "loss": 2.2321, "step": 7735 }, { "epoch": 0.59, "learning_rate": 0.00014832455046462047, "loss": 2.4661, "step": 7740 }, { "epoch": 0.59, "learning_rate": 0.0001482843235850195, "loss": 3.3984, "step": 7745 }, { "epoch": 0.59, "learning_rate": 0.00014824409670541856, "loss": 1.2363, "step": 7750 }, { "epoch": 0.59, "learning_rate": 0.00014820386982581762, "loss": 5.4285, "step": 7755 }, { "epoch": 0.59, "learning_rate": 0.00014816364294621668, "loss": 3.917, "step": 7760 }, { "epoch": 0.59, "learning_rate": 0.00014812341606661574, "loss": 4.5963, "step": 7765 }, { "epoch": 0.59, "learning_rate": 0.0001480831891870148, "loss": 3.875, "step": 7770 }, { "epoch": 0.59, "learning_rate": 0.00014804296230741382, "loss": 3.8596, "step": 7775 }, { "epoch": 0.59, "learning_rate": 0.00014800273542781286, "loss": 3.2287, "step": 7780 }, { "epoch": 0.6, "learning_rate": 0.00014796250854821191, "loss": 3.3803, "step": 7785 }, { "epoch": 0.6, "learning_rate": 0.00014792228166861097, "loss": 2.5698, "step": 7790 }, { "epoch": 0.6, "learning_rate": 0.00014788205478901003, "loss": 1.9961, "step": 7795 }, { "epoch": 0.6, "learning_rate": 0.0001478418279094091, "loss": 1.7948, "step": 7800 }, { "epoch": 0.6, "learning_rate": 0.00014780160102980815, "loss": 4.4584, "step": 7805 }, { "epoch": 0.6, "learning_rate": 0.00014776137415020718, "loss": 4.4805, "step": 7810 }, { "epoch": 0.6, "learning_rate": 0.0001477211472706062, "loss": 4.1227, "step": 7815 }, { "epoch": 0.6, "learning_rate": 0.00014768092039100527, "loss": 4.8541, "step": 7820 }, { "epoch": 0.6, "learning_rate": 0.00014764069351140433, "loss": 3.4299, "step": 7825 }, { "epoch": 0.6, "learning_rate": 0.00014760046663180338, "loss": 4.4625, "step": 7830 }, { "epoch": 0.6, "learning_rate": 0.00014756023975220244, "loss": 2.7753, "step": 7835 }, { "epoch": 0.6, "learning_rate": 0.0001475200128726015, "loss": 2.1818, "step": 7840 }, { "epoch": 0.6, "learning_rate": 0.00014747978599300053, "loss": 0.9003, "step": 7845 }, { "epoch": 0.6, "learning_rate": 0.00014743955911339956, "loss": 2.6693, "step": 7850 }, { "epoch": 0.6, "learning_rate": 0.00014739933223379862, "loss": 4.8398, "step": 7855 }, { "epoch": 0.6, "learning_rate": 0.00014735910535419768, "loss": 4.6248, "step": 7860 }, { "epoch": 0.6, "learning_rate": 0.00014731887847459674, "loss": 4.7467, "step": 7865 }, { "epoch": 0.6, "learning_rate": 0.0001472786515949958, "loss": 4.7848, "step": 7870 }, { "epoch": 0.6, "learning_rate": 0.00014723842471539483, "loss": 5.0921, "step": 7875 }, { "epoch": 0.6, "learning_rate": 0.00014719819783579389, "loss": 3.9907, "step": 7880 }, { "epoch": 0.6, "learning_rate": 0.00014715797095619292, "loss": 4.1201, "step": 7885 }, { "epoch": 0.6, "learning_rate": 0.00014711774407659197, "loss": 1.9702, "step": 7890 }, { "epoch": 0.6, "learning_rate": 0.00014707751719699103, "loss": 2.9985, "step": 7895 }, { "epoch": 0.6, "learning_rate": 0.0001470372903173901, "loss": 1.4111, "step": 7900 }, { "epoch": 0.6, "learning_rate": 0.00014699706343778915, "loss": 4.8258, "step": 7905 }, { "epoch": 0.6, "learning_rate": 0.00014695683655818818, "loss": 4.6049, "step": 7910 }, { "epoch": 0.6, "learning_rate": 0.00014691660967858724, "loss": 4.159, "step": 7915 }, { "epoch": 0.61, "learning_rate": 0.0001468763827989863, "loss": 3.5428, "step": 7920 }, { "epoch": 0.61, "learning_rate": 0.00014683615591938533, "loss": 3.757, "step": 7925 }, { "epoch": 0.61, "learning_rate": 0.00014679592903978439, "loss": 3.7508, "step": 7930 }, { "epoch": 0.61, "learning_rate": 0.00014675570216018344, "loss": 3.3074, "step": 7935 }, { "epoch": 0.61, "learning_rate": 0.0001467154752805825, "loss": 3.104, "step": 7940 }, { "epoch": 0.61, "learning_rate": 0.00014667524840098153, "loss": 2.4619, "step": 7945 }, { "epoch": 0.61, "learning_rate": 0.0001466350215213806, "loss": 2.2569, "step": 7950 }, { "epoch": 0.61, "learning_rate": 0.00014659479464177965, "loss": 3.9715, "step": 7955 }, { "epoch": 0.61, "learning_rate": 0.00014655456776217868, "loss": 3.9365, "step": 7960 }, { "epoch": 0.61, "learning_rate": 0.00014651434088257774, "loss": 4.6123, "step": 7965 }, { "epoch": 0.61, "learning_rate": 0.0001464741140029768, "loss": 4.1201, "step": 7970 }, { "epoch": 0.61, "learning_rate": 0.00014643388712337586, "loss": 2.651, "step": 7975 }, { "epoch": 0.61, "learning_rate": 0.0001463936602437749, "loss": 3.2354, "step": 7980 }, { "epoch": 0.61, "learning_rate": 0.00014635343336417395, "loss": 2.7716, "step": 7985 }, { "epoch": 0.61, "learning_rate": 0.000146313206484573, "loss": 1.5226, "step": 7990 }, { "epoch": 0.61, "learning_rate": 0.00014627297960497206, "loss": 1.8045, "step": 7995 }, { "epoch": 0.61, "learning_rate": 0.0001462327527253711, "loss": 2.6607, "step": 8000 }, { "epoch": 0.61, "learning_rate": 0.00014619252584577015, "loss": 5.0256, "step": 8005 }, { "epoch": 0.61, "learning_rate": 0.0001461522989661692, "loss": 4.3643, "step": 8010 }, { "epoch": 0.61, "learning_rate": 0.00014611207208656824, "loss": 4.3105, "step": 8015 }, { "epoch": 0.61, "learning_rate": 0.0001460718452069673, "loss": 4.2754, "step": 8020 }, { "epoch": 0.61, "learning_rate": 0.00014603161832736636, "loss": 4.1685, "step": 8025 }, { "epoch": 0.61, "learning_rate": 0.00014599139144776542, "loss": 4.0061, "step": 8030 }, { "epoch": 0.61, "learning_rate": 0.00014595116456816445, "loss": 2.8045, "step": 8035 }, { "epoch": 0.61, "learning_rate": 0.0001459109376885635, "loss": 3.6893, "step": 8040 }, { "epoch": 0.61, "learning_rate": 0.00014587071080896256, "loss": 3.1893, "step": 8045 }, { "epoch": 0.62, "learning_rate": 0.0001458304839293616, "loss": 2.4559, "step": 8050 }, { "epoch": 0.62, "learning_rate": 0.00014579025704976065, "loss": 4.7047, "step": 8055 }, { "epoch": 0.62, "learning_rate": 0.0001457500301701597, "loss": 5.4594, "step": 8060 }, { "epoch": 0.62, "learning_rate": 0.00014570980329055877, "loss": 3.6199, "step": 8065 }, { "epoch": 0.62, "learning_rate": 0.00014566957641095783, "loss": 3.1002, "step": 8070 }, { "epoch": 0.62, "learning_rate": 0.00014562934953135686, "loss": 3.0232, "step": 8075 }, { "epoch": 0.62, "learning_rate": 0.00014558912265175592, "loss": 4.4474, "step": 8080 }, { "epoch": 0.62, "learning_rate": 0.00014554889577215495, "loss": 2.7882, "step": 8085 }, { "epoch": 0.62, "learning_rate": 0.000145508668892554, "loss": 2.3826, "step": 8090 }, { "epoch": 0.62, "learning_rate": 0.00014546844201295306, "loss": 1.9198, "step": 8095 }, { "epoch": 0.62, "learning_rate": 0.00014542821513335212, "loss": 1.3647, "step": 8100 }, { "epoch": 0.62, "learning_rate": 0.00014538798825375118, "loss": 5.1926, "step": 8105 }, { "epoch": 0.62, "learning_rate": 0.0001453477613741502, "loss": 3.6435, "step": 8110 }, { "epoch": 0.62, "learning_rate": 0.00014530753449454927, "loss": 4.091, "step": 8115 }, { "epoch": 0.62, "learning_rate": 0.0001452673076149483, "loss": 4.3703, "step": 8120 }, { "epoch": 0.62, "learning_rate": 0.00014522708073534736, "loss": 4.308, "step": 8125 }, { "epoch": 0.62, "learning_rate": 0.00014518685385574642, "loss": 2.2132, "step": 8130 }, { "epoch": 0.62, "learning_rate": 0.00014514662697614548, "loss": 2.4709, "step": 8135 }, { "epoch": 0.62, "learning_rate": 0.00014510640009654453, "loss": 2.6757, "step": 8140 }, { "epoch": 0.62, "learning_rate": 0.0001450661732169436, "loss": 2.0093, "step": 8145 }, { "epoch": 0.62, "learning_rate": 0.00014502594633734262, "loss": 1.4906, "step": 8150 }, { "epoch": 0.62, "learning_rate": 0.00014498571945774166, "loss": 4.9254, "step": 8155 }, { "epoch": 0.62, "learning_rate": 0.0001449454925781407, "loss": 4.8832, "step": 8160 }, { "epoch": 0.62, "learning_rate": 0.00014490526569853977, "loss": 4.3543, "step": 8165 }, { "epoch": 0.62, "learning_rate": 0.00014486503881893883, "loss": 4.5822, "step": 8170 }, { "epoch": 0.62, "learning_rate": 0.0001448248119393379, "loss": 3.6982, "step": 8175 }, { "epoch": 0.63, "learning_rate": 0.00014478458505973692, "loss": 2.7762, "step": 8180 }, { "epoch": 0.63, "learning_rate": 0.00014474435818013598, "loss": 3.1775, "step": 8185 }, { "epoch": 0.63, "learning_rate": 0.000144704131300535, "loss": 1.7506, "step": 8190 }, { "epoch": 0.63, "learning_rate": 0.00014466390442093407, "loss": 2.6996, "step": 8195 }, { "epoch": 0.63, "learning_rate": 0.00014462367754133312, "loss": 1.8007, "step": 8200 }, { "epoch": 0.63, "learning_rate": 0.00014458345066173218, "loss": 5.0187, "step": 8205 }, { "epoch": 0.63, "learning_rate": 0.00014454322378213124, "loss": 3.435, "step": 8210 }, { "epoch": 0.63, "learning_rate": 0.00014450299690253027, "loss": 3.5404, "step": 8215 }, { "epoch": 0.63, "learning_rate": 0.00014446277002292933, "loss": 5.1195, "step": 8220 }, { "epoch": 0.63, "learning_rate": 0.00014442254314332836, "loss": 3.7687, "step": 8225 }, { "epoch": 0.63, "learning_rate": 0.00014438231626372742, "loss": 4.91, "step": 8230 }, { "epoch": 0.63, "learning_rate": 0.00014434208938412648, "loss": 2.3242, "step": 8235 }, { "epoch": 0.63, "learning_rate": 0.00014430186250452554, "loss": 1.712, "step": 8240 }, { "epoch": 0.63, "learning_rate": 0.0001442616356249246, "loss": 1.66, "step": 8245 }, { "epoch": 0.63, "learning_rate": 0.00014422140874532363, "loss": 1.6371, "step": 8250 }, { "epoch": 0.63, "learning_rate": 0.00014418118186572268, "loss": 4.1268, "step": 8255 }, { "epoch": 0.63, "learning_rate": 0.00014414095498612172, "loss": 4.1861, "step": 8260 }, { "epoch": 0.63, "learning_rate": 0.00014410072810652077, "loss": 3.8643, "step": 8265 }, { "epoch": 0.63, "learning_rate": 0.00014406050122691983, "loss": 3.8318, "step": 8270 }, { "epoch": 0.63, "learning_rate": 0.0001440202743473189, "loss": 3.3503, "step": 8275 }, { "epoch": 0.63, "learning_rate": 0.00014398004746771795, "loss": 3.3456, "step": 8280 }, { "epoch": 0.63, "learning_rate": 0.00014393982058811698, "loss": 4.1158, "step": 8285 }, { "epoch": 0.63, "learning_rate": 0.00014389959370851604, "loss": 3.0195, "step": 8290 }, { "epoch": 0.63, "learning_rate": 0.0001438593668289151, "loss": 0.8465, "step": 8295 }, { "epoch": 0.63, "learning_rate": 0.00014381913994931413, "loss": 1.5685, "step": 8300 }, { "epoch": 0.63, "learning_rate": 0.00014377891306971319, "loss": 4.1578, "step": 8305 }, { "epoch": 0.64, "learning_rate": 0.00014373868619011224, "loss": 4.6133, "step": 8310 }, { "epoch": 0.64, "learning_rate": 0.0001436984593105113, "loss": 5.6063, "step": 8315 }, { "epoch": 0.64, "learning_rate": 0.00014365823243091033, "loss": 4.8566, "step": 8320 }, { "epoch": 0.64, "learning_rate": 0.0001436180055513094, "loss": 3.8412, "step": 8325 }, { "epoch": 0.64, "learning_rate": 0.00014357777867170845, "loss": 2.9457, "step": 8330 }, { "epoch": 0.64, "learning_rate": 0.00014353755179210748, "loss": 3.2986, "step": 8335 }, { "epoch": 0.64, "learning_rate": 0.00014349732491250654, "loss": 3.7619, "step": 8340 }, { "epoch": 0.64, "learning_rate": 0.0001434570980329056, "loss": 2.0469, "step": 8345 }, { "epoch": 0.64, "learning_rate": 0.00014341687115330466, "loss": 1.2048, "step": 8350 }, { "epoch": 0.64, "learning_rate": 0.0001433766442737037, "loss": 4.6137, "step": 8355 }, { "epoch": 0.64, "learning_rate": 0.00014333641739410274, "loss": 4.2262, "step": 8360 }, { "epoch": 0.64, "learning_rate": 0.0001432961905145018, "loss": 3.5321, "step": 8365 }, { "epoch": 0.64, "learning_rate": 0.00014325596363490086, "loss": 3.1123, "step": 8370 }, { "epoch": 0.64, "learning_rate": 0.0001432157367552999, "loss": 4.0621, "step": 8375 }, { "epoch": 0.64, "learning_rate": 0.00014317550987569895, "loss": 3.2757, "step": 8380 }, { "epoch": 0.64, "learning_rate": 0.000143135282996098, "loss": 3.4299, "step": 8385 }, { "epoch": 0.64, "learning_rate": 0.00014309505611649704, "loss": 2.9666, "step": 8390 }, { "epoch": 0.64, "learning_rate": 0.0001430548292368961, "loss": 3.7125, "step": 8395 }, { "epoch": 0.64, "learning_rate": 0.00014301460235729516, "loss": 3.3482, "step": 8400 }, { "epoch": 0.64, "learning_rate": 0.00014297437547769421, "loss": 3.2589, "step": 8405 }, { "epoch": 0.64, "learning_rate": 0.00014293414859809325, "loss": 4.1152, "step": 8410 }, { "epoch": 0.64, "learning_rate": 0.0001428939217184923, "loss": 4.1553, "step": 8415 }, { "epoch": 0.64, "learning_rate": 0.00014285369483889136, "loss": 2.563, "step": 8420 }, { "epoch": 0.64, "learning_rate": 0.0001428134679592904, "loss": 3.1831, "step": 8425 }, { "epoch": 0.64, "learning_rate": 0.00014277324107968945, "loss": 3.0423, "step": 8430 }, { "epoch": 0.64, "learning_rate": 0.0001427330142000885, "loss": 2.415, "step": 8435 }, { "epoch": 0.65, "learning_rate": 0.00014269278732048757, "loss": 3.3074, "step": 8440 }, { "epoch": 0.65, "learning_rate": 0.00014265256044088663, "loss": 2.9256, "step": 8445 }, { "epoch": 0.65, "learning_rate": 0.00014261233356128566, "loss": 0.7464, "step": 8450 }, { "epoch": 0.65, "learning_rate": 0.00014257210668168472, "loss": 3.7371, "step": 8455 }, { "epoch": 0.65, "learning_rate": 0.00014253187980208375, "loss": 3.3926, "step": 8460 }, { "epoch": 0.65, "learning_rate": 0.0001424916529224828, "loss": 3.8283, "step": 8465 }, { "epoch": 0.65, "learning_rate": 0.00014245142604288186, "loss": 2.7588, "step": 8470 }, { "epoch": 0.65, "learning_rate": 0.00014241119916328092, "loss": 3.9244, "step": 8475 }, { "epoch": 0.65, "learning_rate": 0.00014237097228367998, "loss": 3.9608, "step": 8480 }, { "epoch": 0.65, "learning_rate": 0.000142330745404079, "loss": 3.4815, "step": 8485 }, { "epoch": 0.65, "learning_rate": 0.00014229051852447804, "loss": 2.2937, "step": 8490 }, { "epoch": 0.65, "learning_rate": 0.0001422502916448771, "loss": 1.4673, "step": 8495 }, { "epoch": 0.65, "learning_rate": 0.00014221811014119635, "loss": 2.0945, "step": 8500 }, { "epoch": 0.65, "learning_rate": 0.0001421778832615954, "loss": 5.1734, "step": 8505 }, { "epoch": 0.65, "learning_rate": 0.00014213765638199446, "loss": 4.7025, "step": 8510 }, { "epoch": 0.65, "learning_rate": 0.00014209742950239352, "loss": 3.0898, "step": 8515 }, { "epoch": 0.65, "learning_rate": 0.00014205720262279255, "loss": 3.6896, "step": 8520 }, { "epoch": 0.65, "learning_rate": 0.0001420169757431916, "loss": 3.1475, "step": 8525 }, { "epoch": 0.65, "learning_rate": 0.00014197674886359064, "loss": 1.9613, "step": 8530 }, { "epoch": 0.65, "learning_rate": 0.0001419365219839897, "loss": 2.6826, "step": 8535 }, { "epoch": 0.65, "learning_rate": 0.00014189629510438876, "loss": 1.0485, "step": 8540 }, { "epoch": 0.65, "learning_rate": 0.00014185606822478782, "loss": 2.8809, "step": 8545 }, { "epoch": 0.65, "learning_rate": 0.00014181584134518685, "loss": 0.0693, "step": 8550 }, { "epoch": 0.65, "learning_rate": 0.0001417756144655859, "loss": 4.4512, "step": 8555 }, { "epoch": 0.65, "learning_rate": 0.00014173538758598496, "loss": 4.483, "step": 8560 }, { "epoch": 0.65, "learning_rate": 0.00014169516070638402, "loss": 4.1109, "step": 8565 }, { "epoch": 0.65, "learning_rate": 0.00014165493382678305, "loss": 3.7912, "step": 8570 }, { "epoch": 0.66, "learning_rate": 0.0001416147069471821, "loss": 4.1405, "step": 8575 }, { "epoch": 0.66, "learning_rate": 0.00014157448006758117, "loss": 2.772, "step": 8580 }, { "epoch": 0.66, "learning_rate": 0.0001415342531879802, "loss": 2.3251, "step": 8585 }, { "epoch": 0.66, "learning_rate": 0.00014149402630837926, "loss": 3.733, "step": 8590 }, { "epoch": 0.66, "learning_rate": 0.00014145379942877832, "loss": 1.8271, "step": 8595 }, { "epoch": 0.66, "learning_rate": 0.00014141357254917738, "loss": 1.507, "step": 8600 }, { "epoch": 0.66, "learning_rate": 0.0001413733456695764, "loss": 5.0523, "step": 8605 }, { "epoch": 0.66, "learning_rate": 0.00014133311878997547, "loss": 4.0752, "step": 8610 }, { "epoch": 0.66, "learning_rate": 0.00014129289191037452, "loss": 4.1002, "step": 8615 }, { "epoch": 0.66, "learning_rate": 0.00014125266503077356, "loss": 3.0717, "step": 8620 }, { "epoch": 0.66, "learning_rate": 0.0001412124381511726, "loss": 2.2346, "step": 8625 }, { "epoch": 0.66, "learning_rate": 0.00014117221127157167, "loss": 2.3687, "step": 8630 }, { "epoch": 0.66, "learning_rate": 0.00014113198439197073, "loss": 2.5631, "step": 8635 }, { "epoch": 0.66, "learning_rate": 0.0001410917575123698, "loss": 1.2662, "step": 8640 }, { "epoch": 0.66, "learning_rate": 0.00014105153063276882, "loss": 3.2849, "step": 8645 }, { "epoch": 0.66, "learning_rate": 0.00014101130375316788, "loss": 1.8528, "step": 8650 }, { "epoch": 0.66, "learning_rate": 0.0001409710768735669, "loss": 4.3492, "step": 8655 }, { "epoch": 0.66, "learning_rate": 0.00014093084999396597, "loss": 4.6064, "step": 8660 }, { "epoch": 0.66, "learning_rate": 0.00014089062311436503, "loss": 4.0658, "step": 8665 }, { "epoch": 0.66, "learning_rate": 0.00014085039623476408, "loss": 3.9941, "step": 8670 }, { "epoch": 0.66, "learning_rate": 0.00014081016935516314, "loss": 3.4873, "step": 8675 }, { "epoch": 0.66, "learning_rate": 0.00014076994247556217, "loss": 1.615, "step": 8680 }, { "epoch": 0.66, "learning_rate": 0.00014072971559596123, "loss": 1.9473, "step": 8685 }, { "epoch": 0.66, "learning_rate": 0.00014068948871636026, "loss": 3.5422, "step": 8690 }, { "epoch": 0.66, "learning_rate": 0.00014064926183675932, "loss": 1.6255, "step": 8695 }, { "epoch": 0.66, "learning_rate": 0.00014060903495715838, "loss": 0.6512, "step": 8700 }, { "epoch": 0.67, "learning_rate": 0.00014056880807755744, "loss": 4.1771, "step": 8705 }, { "epoch": 0.67, "learning_rate": 0.0001405285811979565, "loss": 4.2812, "step": 8710 }, { "epoch": 0.67, "learning_rate": 0.00014048835431835555, "loss": 3.3185, "step": 8715 }, { "epoch": 0.67, "learning_rate": 0.00014044812743875458, "loss": 3.8672, "step": 8720 }, { "epoch": 0.67, "learning_rate": 0.00014040790055915362, "loss": 4.4287, "step": 8725 }, { "epoch": 0.67, "learning_rate": 0.00014036767367955267, "loss": 3.4561, "step": 8730 }, { "epoch": 0.67, "learning_rate": 0.00014032744679995173, "loss": 3.5, "step": 8735 }, { "epoch": 0.67, "learning_rate": 0.0001402872199203508, "loss": 2.2681, "step": 8740 }, { "epoch": 0.67, "learning_rate": 0.00014024699304074985, "loss": 1.6788, "step": 8745 }, { "epoch": 0.67, "learning_rate": 0.0001402067661611489, "loss": 4.0663, "step": 8750 }, { "epoch": 0.67, "learning_rate": 0.00014016653928154794, "loss": 4.5193, "step": 8755 }, { "epoch": 0.67, "learning_rate": 0.00014012631240194697, "loss": 4.5533, "step": 8760 }, { "epoch": 0.67, "learning_rate": 0.00014008608552234603, "loss": 3.6523, "step": 8765 }, { "epoch": 0.67, "learning_rate": 0.00014004585864274509, "loss": 4.5703, "step": 8770 }, { "epoch": 0.67, "learning_rate": 0.00014000563176314414, "loss": 4.8623, "step": 8775 }, { "epoch": 0.67, "learning_rate": 0.0001399654048835432, "loss": 3.0412, "step": 8780 }, { "epoch": 0.67, "learning_rate": 0.00013992517800394226, "loss": 2.1579, "step": 8785 }, { "epoch": 0.67, "learning_rate": 0.0001398849511243413, "loss": 3.1091, "step": 8790 }, { "epoch": 0.67, "learning_rate": 0.00013984472424474032, "loss": 2.0995, "step": 8795 }, { "epoch": 0.67, "learning_rate": 0.00013980449736513938, "loss": 2.2463, "step": 8800 }, { "epoch": 0.67, "learning_rate": 0.00013976427048553844, "loss": 5.2713, "step": 8805 }, { "epoch": 0.67, "learning_rate": 0.0001397240436059375, "loss": 3.9492, "step": 8810 }, { "epoch": 0.67, "learning_rate": 0.00013968381672633656, "loss": 3.9627, "step": 8815 }, { "epoch": 0.67, "learning_rate": 0.00013964358984673561, "loss": 4.1762, "step": 8820 }, { "epoch": 0.67, "learning_rate": 0.00013960336296713465, "loss": 3.9076, "step": 8825 }, { "epoch": 0.67, "learning_rate": 0.00013956313608753368, "loss": 2.9175, "step": 8830 }, { "epoch": 0.68, "learning_rate": 0.00013952290920793273, "loss": 1.738, "step": 8835 }, { "epoch": 0.68, "learning_rate": 0.0001394826823283318, "loss": 1.9772, "step": 8840 }, { "epoch": 0.68, "learning_rate": 0.00013944245544873085, "loss": 1.0877, "step": 8845 }, { "epoch": 0.68, "learning_rate": 0.0001394022285691299, "loss": 1.9826, "step": 8850 }, { "epoch": 0.68, "learning_rate": 0.00013936200168952894, "loss": 5.7988, "step": 8855 }, { "epoch": 0.68, "learning_rate": 0.000139321774809928, "loss": 5.0049, "step": 8860 }, { "epoch": 0.68, "learning_rate": 0.00013928154793032706, "loss": 4.1568, "step": 8865 }, { "epoch": 0.68, "learning_rate": 0.0001392413210507261, "loss": 3.3445, "step": 8870 }, { "epoch": 0.68, "learning_rate": 0.00013920109417112515, "loss": 4.5633, "step": 8875 }, { "epoch": 0.68, "learning_rate": 0.0001391608672915242, "loss": 2.7033, "step": 8880 }, { "epoch": 0.68, "learning_rate": 0.00013912064041192326, "loss": 3.8346, "step": 8885 }, { "epoch": 0.68, "learning_rate": 0.0001390804135323223, "loss": 2.3505, "step": 8890 }, { "epoch": 0.68, "learning_rate": 0.00013904018665272135, "loss": 1.7089, "step": 8895 }, { "epoch": 0.68, "learning_rate": 0.0001389999597731204, "loss": 1.9751, "step": 8900 }, { "epoch": 0.68, "learning_rate": 0.00013895973289351944, "loss": 3.9482, "step": 8905 }, { "epoch": 0.68, "learning_rate": 0.0001389195060139185, "loss": 5.2367, "step": 8910 }, { "epoch": 0.68, "learning_rate": 0.00013887927913431756, "loss": 4.2158, "step": 8915 }, { "epoch": 0.68, "learning_rate": 0.00013883905225471662, "loss": 4.0254, "step": 8920 }, { "epoch": 0.68, "learning_rate": 0.00013879882537511565, "loss": 4.4863, "step": 8925 }, { "epoch": 0.68, "learning_rate": 0.0001387585984955147, "loss": 3.8158, "step": 8930 }, { "epoch": 0.68, "learning_rate": 0.00013871837161591376, "loss": 2.6618, "step": 8935 }, { "epoch": 0.68, "learning_rate": 0.00013867814473631282, "loss": 2.6897, "step": 8940 }, { "epoch": 0.68, "learning_rate": 0.00013863791785671185, "loss": 1.2268, "step": 8945 }, { "epoch": 0.68, "learning_rate": 0.0001385976909771109, "loss": 2.3218, "step": 8950 }, { "epoch": 0.68, "learning_rate": 0.00013855746409750997, "loss": 3.4869, "step": 8955 }, { "epoch": 0.68, "learning_rate": 0.000138517237217909, "loss": 4.4197, "step": 8960 }, { "epoch": 0.69, "learning_rate": 0.00013847701033830806, "loss": 3.4383, "step": 8965 }, { "epoch": 0.69, "learning_rate": 0.00013843678345870712, "loss": 3.1334, "step": 8970 }, { "epoch": 0.69, "learning_rate": 0.00013839655657910618, "loss": 3.8105, "step": 8975 }, { "epoch": 0.69, "learning_rate": 0.0001383563296995052, "loss": 2.6163, "step": 8980 }, { "epoch": 0.69, "learning_rate": 0.00013831610281990426, "loss": 2.1495, "step": 8985 }, { "epoch": 0.69, "learning_rate": 0.00013827587594030332, "loss": 3.512, "step": 8990 }, { "epoch": 0.69, "learning_rate": 0.00013823564906070235, "loss": 4.8705, "step": 8995 }, { "epoch": 0.69, "learning_rate": 0.0001381954221811014, "loss": 1.2528, "step": 9000 }, { "epoch": 0.69, "learning_rate": 0.00013815519530150047, "loss": 4.6674, "step": 9005 }, { "epoch": 0.69, "learning_rate": 0.00013811496842189953, "loss": 3.8508, "step": 9010 }, { "epoch": 0.69, "learning_rate": 0.0001380747415422986, "loss": 3.9963, "step": 9015 }, { "epoch": 0.69, "learning_rate": 0.00013803451466269762, "loss": 4.5911, "step": 9020 }, { "epoch": 0.69, "learning_rate": 0.00013799428778309668, "loss": 4.2135, "step": 9025 }, { "epoch": 0.69, "learning_rate": 0.0001379540609034957, "loss": 3.3, "step": 9030 }, { "epoch": 0.69, "learning_rate": 0.00013791383402389477, "loss": 3.1516, "step": 9035 }, { "epoch": 0.69, "learning_rate": 0.00013787360714429382, "loss": 2.2726, "step": 9040 }, { "epoch": 0.69, "learning_rate": 0.00013783338026469288, "loss": 1.3781, "step": 9045 }, { "epoch": 0.69, "learning_rate": 0.00013779315338509194, "loss": 1.5878, "step": 9050 }, { "epoch": 0.69, "learning_rate": 0.00013775292650549097, "loss": 3.8682, "step": 9055 }, { "epoch": 0.69, "learning_rate": 0.00013771269962589003, "loss": 5.8621, "step": 9060 }, { "epoch": 0.69, "learning_rate": 0.00013767247274628906, "loss": 4.1668, "step": 9065 }, { "epoch": 0.69, "learning_rate": 0.00013763224586668812, "loss": 3.3687, "step": 9070 }, { "epoch": 0.69, "learning_rate": 0.00013759201898708718, "loss": 3.5014, "step": 9075 }, { "epoch": 0.69, "learning_rate": 0.00013755179210748624, "loss": 2.693, "step": 9080 }, { "epoch": 0.69, "learning_rate": 0.0001375115652278853, "loss": 3.4979, "step": 9085 }, { "epoch": 0.69, "learning_rate": 0.00013747133834828435, "loss": 2.3423, "step": 9090 }, { "epoch": 0.7, "learning_rate": 0.00013743111146868338, "loss": 3.0961, "step": 9095 }, { "epoch": 0.7, "learning_rate": 0.00013739088458908241, "loss": 2.2662, "step": 9100 }, { "epoch": 0.7, "learning_rate": 0.00013735065770948147, "loss": 4.8318, "step": 9105 }, { "epoch": 0.7, "learning_rate": 0.00013731043082988053, "loss": 4.9508, "step": 9110 }, { "epoch": 0.7, "learning_rate": 0.0001372702039502796, "loss": 3.5961, "step": 9115 }, { "epoch": 0.7, "learning_rate": 0.00013722997707067865, "loss": 4.1072, "step": 9120 }, { "epoch": 0.7, "learning_rate": 0.0001371897501910777, "loss": 3.7145, "step": 9125 }, { "epoch": 0.7, "learning_rate": 0.00013714952331147674, "loss": 3.7985, "step": 9130 }, { "epoch": 0.7, "learning_rate": 0.00013710929643187577, "loss": 3.2468, "step": 9135 }, { "epoch": 0.7, "learning_rate": 0.00013706906955227483, "loss": 2.0802, "step": 9140 }, { "epoch": 0.7, "learning_rate": 0.00013702884267267388, "loss": 1.4174, "step": 9145 }, { "epoch": 0.7, "learning_rate": 0.00013698861579307294, "loss": 0.3039, "step": 9150 }, { "epoch": 0.7, "learning_rate": 0.000136948388913472, "loss": 4.1092, "step": 9155 }, { "epoch": 0.7, "learning_rate": 0.00013690816203387103, "loss": 4.8393, "step": 9160 }, { "epoch": 0.7, "learning_rate": 0.0001368679351542701, "loss": 4.3215, "step": 9165 }, { "epoch": 0.7, "learning_rate": 0.00013682770827466912, "loss": 4.8441, "step": 9170 }, { "epoch": 0.7, "learning_rate": 0.00013678748139506818, "loss": 3.8361, "step": 9175 }, { "epoch": 0.7, "learning_rate": 0.00013674725451546724, "loss": 3.663, "step": 9180 }, { "epoch": 0.7, "learning_rate": 0.0001367070276358663, "loss": 3.2722, "step": 9185 }, { "epoch": 0.7, "learning_rate": 0.00013666680075626535, "loss": 3.3886, "step": 9190 }, { "epoch": 0.7, "learning_rate": 0.00013662657387666439, "loss": 3.5016, "step": 9195 }, { "epoch": 0.7, "learning_rate": 0.00013658634699706344, "loss": 2.5376, "step": 9200 }, { "epoch": 0.7, "learning_rate": 0.0001365461201174625, "loss": 4.8418, "step": 9205 }, { "epoch": 0.7, "learning_rate": 0.00013650589323786153, "loss": 4.2836, "step": 9210 }, { "epoch": 0.7, "learning_rate": 0.0001364656663582606, "loss": 4.0514, "step": 9215 }, { "epoch": 0.7, "learning_rate": 0.00013642543947865965, "loss": 3.6088, "step": 9220 }, { "epoch": 0.71, "learning_rate": 0.0001363852125990587, "loss": 3.5259, "step": 9225 }, { "epoch": 0.71, "learning_rate": 0.00013634498571945774, "loss": 3.9555, "step": 9230 }, { "epoch": 0.71, "learning_rate": 0.0001363047588398568, "loss": 2.5013, "step": 9235 }, { "epoch": 0.71, "learning_rate": 0.00013626453196025586, "loss": 1.8686, "step": 9240 }, { "epoch": 0.71, "learning_rate": 0.0001362243050806549, "loss": 2.7858, "step": 9245 }, { "epoch": 0.71, "learning_rate": 0.00013618407820105395, "loss": 2.5493, "step": 9250 }, { "epoch": 0.71, "learning_rate": 0.000136143851321453, "loss": 4.5645, "step": 9255 }, { "epoch": 0.71, "learning_rate": 0.00013610362444185206, "loss": 4.4248, "step": 9260 }, { "epoch": 0.71, "learning_rate": 0.0001360633975622511, "loss": 4.1404, "step": 9265 }, { "epoch": 0.71, "learning_rate": 0.00013602317068265015, "loss": 4.19, "step": 9270 }, { "epoch": 0.71, "learning_rate": 0.0001359829438030492, "loss": 3.1199, "step": 9275 }, { "epoch": 0.71, "learning_rate": 0.00013594271692344827, "loss": 3.3325, "step": 9280 }, { "epoch": 0.71, "learning_rate": 0.0001359024900438473, "loss": 1.8506, "step": 9285 }, { "epoch": 0.71, "learning_rate": 0.00013586226316424636, "loss": 2.0855, "step": 9290 }, { "epoch": 0.71, "learning_rate": 0.00013582203628464542, "loss": 2.7468, "step": 9295 }, { "epoch": 0.71, "learning_rate": 0.00013578180940504445, "loss": 3.2028, "step": 9300 }, { "epoch": 0.71, "learning_rate": 0.0001357415825254435, "loss": 4.6888, "step": 9305 }, { "epoch": 0.71, "learning_rate": 0.00013570135564584256, "loss": 4.9221, "step": 9310 }, { "epoch": 0.71, "learning_rate": 0.00013566112876624162, "loss": 4.0654, "step": 9315 }, { "epoch": 0.71, "learning_rate": 0.00013562090188664065, "loss": 3.5768, "step": 9320 }, { "epoch": 0.71, "learning_rate": 0.0001355806750070397, "loss": 4.2205, "step": 9325 }, { "epoch": 0.71, "learning_rate": 0.00013554044812743877, "loss": 2.9903, "step": 9330 }, { "epoch": 0.71, "learning_rate": 0.0001355002212478378, "loss": 3.491, "step": 9335 }, { "epoch": 0.71, "learning_rate": 0.00013545999436823686, "loss": 2.4792, "step": 9340 }, { "epoch": 0.71, "learning_rate": 0.00013541976748863592, "loss": 0.8765, "step": 9345 }, { "epoch": 0.71, "learning_rate": 0.00013537954060903497, "loss": 1.7416, "step": 9350 }, { "epoch": 0.71, "learning_rate": 0.00013533931372943403, "loss": 4.7154, "step": 9355 }, { "epoch": 0.72, "learning_rate": 0.00013529908684983306, "loss": 3.5176, "step": 9360 }, { "epoch": 0.72, "learning_rate": 0.00013525885997023212, "loss": 3.8848, "step": 9365 }, { "epoch": 0.72, "learning_rate": 0.00013521863309063115, "loss": 2.9452, "step": 9370 }, { "epoch": 0.72, "learning_rate": 0.0001351784062110302, "loss": 3.007, "step": 9375 }, { "epoch": 0.72, "learning_rate": 0.00013513817933142927, "loss": 3.152, "step": 9380 }, { "epoch": 0.72, "learning_rate": 0.00013509795245182833, "loss": 3.2887, "step": 9385 }, { "epoch": 0.72, "learning_rate": 0.00013505772557222739, "loss": 2.8927, "step": 9390 }, { "epoch": 0.72, "learning_rate": 0.00013501749869262642, "loss": 2.6005, "step": 9395 }, { "epoch": 0.72, "learning_rate": 0.00013497727181302548, "loss": 3.1346, "step": 9400 }, { "epoch": 0.72, "learning_rate": 0.0001349370449334245, "loss": 4.4902, "step": 9405 }, { "epoch": 0.72, "learning_rate": 0.00013489681805382357, "loss": 4.3199, "step": 9410 }, { "epoch": 0.72, "learning_rate": 0.00013485659117422262, "loss": 4.7805, "step": 9415 }, { "epoch": 0.72, "learning_rate": 0.00013481636429462168, "loss": 3.8885, "step": 9420 }, { "epoch": 0.72, "learning_rate": 0.00013477613741502074, "loss": 2.4693, "step": 9425 }, { "epoch": 0.72, "learning_rate": 0.0001347359105354198, "loss": 2.7459, "step": 9430 }, { "epoch": 0.72, "learning_rate": 0.00013469568365581883, "loss": 3.4011, "step": 9435 }, { "epoch": 0.72, "learning_rate": 0.00013465545677621786, "loss": 3.0815, "step": 9440 }, { "epoch": 0.72, "learning_rate": 0.00013461522989661692, "loss": 1.5173, "step": 9445 }, { "epoch": 0.72, "learning_rate": 0.00013457500301701598, "loss": 2.4051, "step": 9450 }, { "epoch": 0.72, "learning_rate": 0.00013453477613741504, "loss": 4.7314, "step": 9455 }, { "epoch": 0.72, "learning_rate": 0.0001344945492578141, "loss": 4.0549, "step": 9460 }, { "epoch": 0.72, "learning_rate": 0.00013445432237821312, "loss": 3.774, "step": 9465 }, { "epoch": 0.72, "learning_rate": 0.00013441409549861218, "loss": 3.5994, "step": 9470 }, { "epoch": 0.72, "learning_rate": 0.00013437386861901121, "loss": 3.5275, "step": 9475 }, { "epoch": 0.72, "learning_rate": 0.00013433364173941027, "loss": 3.8396, "step": 9480 }, { "epoch": 0.72, "learning_rate": 0.00013429341485980933, "loss": 2.5812, "step": 9485 }, { "epoch": 0.73, "learning_rate": 0.0001342531879802084, "loss": 3.3454, "step": 9490 }, { "epoch": 0.73, "learning_rate": 0.00013421296110060745, "loss": 1.5726, "step": 9495 }, { "epoch": 0.73, "learning_rate": 0.00013417273422100648, "loss": 0.6411, "step": 9500 }, { "epoch": 0.73, "learning_rate": 0.00013413250734140554, "loss": 4.6568, "step": 9505 }, { "epoch": 0.73, "learning_rate": 0.00013409228046180457, "loss": 4.666, "step": 9510 }, { "epoch": 0.73, "learning_rate": 0.00013405205358220363, "loss": 3.4013, "step": 9515 }, { "epoch": 0.73, "learning_rate": 0.00013401182670260268, "loss": 3.9559, "step": 9520 }, { "epoch": 0.73, "learning_rate": 0.00013397159982300174, "loss": 3.7091, "step": 9525 }, { "epoch": 0.73, "learning_rate": 0.0001339313729434008, "loss": 3.2853, "step": 9530 }, { "epoch": 0.73, "learning_rate": 0.00013389114606379983, "loss": 2.6608, "step": 9535 }, { "epoch": 0.73, "learning_rate": 0.0001338509191841989, "loss": 1.7764, "step": 9540 }, { "epoch": 0.73, "learning_rate": 0.00013381069230459792, "loss": 2.1373, "step": 9545 }, { "epoch": 0.73, "learning_rate": 0.00013377046542499698, "loss": 2.9499, "step": 9550 }, { "epoch": 0.73, "learning_rate": 0.00013373023854539604, "loss": 4.3037, "step": 9555 }, { "epoch": 0.73, "learning_rate": 0.0001336900116657951, "loss": 3.468, "step": 9560 }, { "epoch": 0.73, "learning_rate": 0.00013364978478619415, "loss": 4.5646, "step": 9565 }, { "epoch": 0.73, "learning_rate": 0.00013360955790659318, "loss": 3.7449, "step": 9570 }, { "epoch": 0.73, "learning_rate": 0.00013356933102699224, "loss": 3.6267, "step": 9575 }, { "epoch": 0.73, "learning_rate": 0.0001335291041473913, "loss": 3.0011, "step": 9580 }, { "epoch": 0.73, "learning_rate": 0.00013348887726779033, "loss": 3.9645, "step": 9585 }, { "epoch": 0.73, "learning_rate": 0.0001334486503881894, "loss": 2.6243, "step": 9590 }, { "epoch": 0.73, "learning_rate": 0.00013340842350858845, "loss": 3.0731, "step": 9595 }, { "epoch": 0.73, "learning_rate": 0.0001333681966289875, "loss": 1.45, "step": 9600 }, { "epoch": 0.73, "learning_rate": 0.00013332796974938654, "loss": 3.6967, "step": 9605 }, { "epoch": 0.73, "learning_rate": 0.0001332877428697856, "loss": 4.2301, "step": 9610 }, { "epoch": 0.73, "learning_rate": 0.00013324751599018465, "loss": 3.9514, "step": 9615 }, { "epoch": 0.74, "learning_rate": 0.00013320728911058369, "loss": 3.9246, "step": 9620 }, { "epoch": 0.74, "learning_rate": 0.00013316706223098274, "loss": 3.4285, "step": 9625 }, { "epoch": 0.74, "learning_rate": 0.0001331268353513818, "loss": 3.0667, "step": 9630 }, { "epoch": 0.74, "learning_rate": 0.00013308660847178086, "loss": 2.9836, "step": 9635 }, { "epoch": 0.74, "learning_rate": 0.0001330463815921799, "loss": 2.4699, "step": 9640 }, { "epoch": 0.74, "learning_rate": 0.00013300615471257895, "loss": 2.3858, "step": 9645 }, { "epoch": 0.74, "learning_rate": 0.000132965927832978, "loss": 1.9694, "step": 9650 }, { "epoch": 0.74, "learning_rate": 0.00013292570095337707, "loss": 4.6979, "step": 9655 }, { "epoch": 0.74, "learning_rate": 0.0001328854740737761, "loss": 4.1387, "step": 9660 }, { "epoch": 0.74, "learning_rate": 0.00013284524719417516, "loss": 4.4168, "step": 9665 }, { "epoch": 0.74, "learning_rate": 0.00013280502031457421, "loss": 4.8387, "step": 9670 }, { "epoch": 0.74, "learning_rate": 0.00013276479343497325, "loss": 3.6943, "step": 9675 }, { "epoch": 0.74, "learning_rate": 0.0001327245665553723, "loss": 3.0823, "step": 9680 }, { "epoch": 0.74, "learning_rate": 0.00013268433967577136, "loss": 2.7571, "step": 9685 }, { "epoch": 0.74, "learning_rate": 0.00013264411279617042, "loss": 3.0168, "step": 9690 }, { "epoch": 0.74, "learning_rate": 0.00013260388591656945, "loss": 2.7996, "step": 9695 }, { "epoch": 0.74, "learning_rate": 0.0001325636590369685, "loss": 2.6553, "step": 9700 }, { "epoch": 0.74, "learning_rate": 0.00013252343215736757, "loss": 4.643, "step": 9705 }, { "epoch": 0.74, "learning_rate": 0.0001324832052777666, "loss": 4.3184, "step": 9710 }, { "epoch": 0.74, "learning_rate": 0.00013244297839816566, "loss": 3.1918, "step": 9715 }, { "epoch": 0.74, "learning_rate": 0.00013240275151856472, "loss": 2.6649, "step": 9720 }, { "epoch": 0.74, "learning_rate": 0.00013236252463896377, "loss": 3.9285, "step": 9725 }, { "epoch": 0.74, "learning_rate": 0.00013232229775936283, "loss": 2.9876, "step": 9730 }, { "epoch": 0.74, "learning_rate": 0.00013228207087976186, "loss": 2.318, "step": 9735 }, { "epoch": 0.74, "learning_rate": 0.00013224184400016092, "loss": 2.6796, "step": 9740 }, { "epoch": 0.74, "learning_rate": 0.00013220161712055995, "loss": 1.675, "step": 9745 }, { "epoch": 0.75, "learning_rate": 0.000132161390240959, "loss": 1.1842, "step": 9750 }, { "epoch": 0.75, "learning_rate": 0.00013212116336135807, "loss": 3.9574, "step": 9755 }, { "epoch": 0.75, "learning_rate": 0.00013208093648175713, "loss": 5.1955, "step": 9760 }, { "epoch": 0.75, "learning_rate": 0.00013204070960215619, "loss": 3.4871, "step": 9765 }, { "epoch": 0.75, "learning_rate": 0.00013200048272255522, "loss": 4.4609, "step": 9770 }, { "epoch": 0.75, "learning_rate": 0.00013196025584295427, "loss": 5.2233, "step": 9775 }, { "epoch": 0.75, "learning_rate": 0.0001319200289633533, "loss": 4.0359, "step": 9780 }, { "epoch": 0.75, "learning_rate": 0.00013187980208375236, "loss": 2.6144, "step": 9785 }, { "epoch": 0.75, "learning_rate": 0.00013183957520415142, "loss": 1.9424, "step": 9790 }, { "epoch": 0.75, "learning_rate": 0.00013179934832455048, "loss": 3.2209, "step": 9795 }, { "epoch": 0.75, "learning_rate": 0.00013175912144494954, "loss": 2.7711, "step": 9800 }, { "epoch": 0.75, "learning_rate": 0.00013171889456534857, "loss": 3.8485, "step": 9805 }, { "epoch": 0.75, "learning_rate": 0.0001316786676857476, "loss": 3.1569, "step": 9810 }, { "epoch": 0.75, "learning_rate": 0.00013163844080614666, "loss": 4.6219, "step": 9815 }, { "epoch": 0.75, "learning_rate": 0.00013159821392654572, "loss": 3.0187, "step": 9820 }, { "epoch": 0.75, "learning_rate": 0.00013155798704694478, "loss": 3.7212, "step": 9825 }, { "epoch": 0.75, "learning_rate": 0.00013151776016734383, "loss": 2.5, "step": 9830 }, { "epoch": 0.75, "learning_rate": 0.0001314775332877429, "loss": 2.1953, "step": 9835 }, { "epoch": 0.75, "learning_rate": 0.00013143730640814192, "loss": 1.6314, "step": 9840 }, { "epoch": 0.75, "learning_rate": 0.00013139707952854095, "loss": 2.1152, "step": 9845 }, { "epoch": 0.75, "learning_rate": 0.00013135685264894, "loss": 3.3051, "step": 9850 }, { "epoch": 0.75, "learning_rate": 0.00013131662576933907, "loss": 4.8615, "step": 9855 }, { "epoch": 0.75, "learning_rate": 0.00013127639888973813, "loss": 4.4182, "step": 9860 }, { "epoch": 0.75, "learning_rate": 0.0001312361720101372, "loss": 4.64, "step": 9865 }, { "epoch": 0.75, "learning_rate": 0.00013119594513053625, "loss": 3.8684, "step": 9870 }, { "epoch": 0.75, "learning_rate": 0.00013115571825093528, "loss": 3.9086, "step": 9875 }, { "epoch": 0.76, "learning_rate": 0.00013111549137133434, "loss": 2.4209, "step": 9880 }, { "epoch": 0.76, "learning_rate": 0.00013107526449173337, "loss": 1.358, "step": 9885 }, { "epoch": 0.76, "learning_rate": 0.00013103503761213242, "loss": 1.3452, "step": 9890 }, { "epoch": 0.76, "learning_rate": 0.00013099481073253148, "loss": 2.1419, "step": 9895 }, { "epoch": 0.76, "learning_rate": 0.00013095458385293054, "loss": 2.2326, "step": 9900 }, { "epoch": 0.76, "learning_rate": 0.0001309143569733296, "loss": 4.982, "step": 9905 }, { "epoch": 0.76, "learning_rate": 0.00013087413009372863, "loss": 4.2432, "step": 9910 }, { "epoch": 0.76, "learning_rate": 0.0001308339032141277, "loss": 4.2863, "step": 9915 }, { "epoch": 0.76, "learning_rate": 0.00013079367633452672, "loss": 3.5053, "step": 9920 }, { "epoch": 0.76, "learning_rate": 0.00013075344945492578, "loss": 4.0033, "step": 9925 }, { "epoch": 0.76, "learning_rate": 0.00013071322257532484, "loss": 3.812, "step": 9930 }, { "epoch": 0.76, "learning_rate": 0.0001306729956957239, "loss": 3.1587, "step": 9935 }, { "epoch": 0.76, "learning_rate": 0.00013063276881612295, "loss": 3.0073, "step": 9940 }, { "epoch": 0.76, "learning_rate": 0.00013059254193652198, "loss": 3.4743, "step": 9945 }, { "epoch": 0.76, "learning_rate": 0.00013055231505692104, "loss": 1.0452, "step": 9950 }, { "epoch": 0.76, "learning_rate": 0.0001305120881773201, "loss": 3.9975, "step": 9955 }, { "epoch": 0.76, "learning_rate": 0.00013047186129771913, "loss": 3.7115, "step": 9960 }, { "epoch": 0.76, "learning_rate": 0.0001304316344181182, "loss": 3.8148, "step": 9965 }, { "epoch": 0.76, "learning_rate": 0.00013039140753851725, "loss": 3.2805, "step": 9970 }, { "epoch": 0.76, "learning_rate": 0.0001303511806589163, "loss": 3.5577, "step": 9975 }, { "epoch": 0.76, "learning_rate": 0.00013031095377931534, "loss": 3.2957, "step": 9980 }, { "epoch": 0.76, "learning_rate": 0.0001302707268997144, "loss": 1.8262, "step": 9985 }, { "epoch": 0.76, "learning_rate": 0.00013023050002011345, "loss": 3.8027, "step": 9990 }, { "epoch": 0.76, "learning_rate": 0.00013019027314051249, "loss": 1.8096, "step": 9995 }, { "epoch": 0.76, "learning_rate": 0.00013015004626091154, "loss": 1.4512, "step": 10000 }, { "epoch": 0.76, "learning_rate": 0.0001301098193813106, "loss": 3.9459, "step": 10005 }, { "epoch": 0.77, "learning_rate": 0.00013006959250170966, "loss": 4.8336, "step": 10010 }, { "epoch": 0.77, "learning_rate": 0.0001300293656221087, "loss": 4.3008, "step": 10015 }, { "epoch": 0.77, "learning_rate": 0.00012998913874250775, "loss": 3.5926, "step": 10020 }, { "epoch": 0.77, "learning_rate": 0.0001299489118629068, "loss": 3.3028, "step": 10025 }, { "epoch": 0.77, "learning_rate": 0.00012990868498330587, "loss": 3.0276, "step": 10030 }, { "epoch": 0.77, "learning_rate": 0.0001298684581037049, "loss": 1.6533, "step": 10035 }, { "epoch": 0.77, "learning_rate": 0.00012982823122410396, "loss": 2.652, "step": 10040 }, { "epoch": 0.77, "learning_rate": 0.000129788004344503, "loss": 1.1642, "step": 10045 }, { "epoch": 0.77, "learning_rate": 0.00012974777746490204, "loss": 2.6869, "step": 10050 }, { "epoch": 0.77, "learning_rate": 0.0001297075505853011, "loss": 4.8092, "step": 10055 }, { "epoch": 0.77, "learning_rate": 0.00012966732370570016, "loss": 5.8297, "step": 10060 }, { "epoch": 0.77, "learning_rate": 0.00012962709682609922, "loss": 3.7469, "step": 10065 }, { "epoch": 0.77, "learning_rate": 0.00012958686994649825, "loss": 4.5357, "step": 10070 }, { "epoch": 0.77, "learning_rate": 0.0001295466430668973, "loss": 3.599, "step": 10075 }, { "epoch": 0.77, "learning_rate": 0.00012950641618729634, "loss": 2.9362, "step": 10080 }, { "epoch": 0.77, "learning_rate": 0.0001294661893076954, "loss": 2.9291, "step": 10085 }, { "epoch": 0.77, "learning_rate": 0.00012942596242809446, "loss": 0.7292, "step": 10090 }, { "epoch": 0.77, "learning_rate": 0.00012938573554849351, "loss": 2.6529, "step": 10095 }, { "epoch": 0.77, "learning_rate": 0.00012934550866889257, "loss": 1.4879, "step": 10100 }, { "epoch": 0.77, "learning_rate": 0.00012930528178929163, "loss": 4.5059, "step": 10105 }, { "epoch": 0.77, "learning_rate": 0.00012926505490969066, "loss": 4.3889, "step": 10110 }, { "epoch": 0.77, "learning_rate": 0.0001292248280300897, "loss": 5.0908, "step": 10115 }, { "epoch": 0.77, "learning_rate": 0.00012918460115048875, "loss": 4.3072, "step": 10120 }, { "epoch": 0.77, "learning_rate": 0.0001291443742708878, "loss": 3.3316, "step": 10125 }, { "epoch": 0.77, "learning_rate": 0.00012910414739128687, "loss": 3.8155, "step": 10130 }, { "epoch": 0.77, "learning_rate": 0.00012906392051168593, "loss": 2.2591, "step": 10135 }, { "epoch": 0.77, "learning_rate": 0.00012902369363208498, "loss": 3.473, "step": 10140 }, { "epoch": 0.78, "learning_rate": 0.00012898346675248402, "loss": 1.5188, "step": 10145 }, { "epoch": 0.78, "learning_rate": 0.00012894323987288305, "loss": 2.933, "step": 10150 }, { "epoch": 0.78, "learning_rate": 0.0001289030129932821, "loss": 4.2186, "step": 10155 }, { "epoch": 0.78, "learning_rate": 0.00012886278611368116, "loss": 4.9096, "step": 10160 }, { "epoch": 0.78, "learning_rate": 0.00012882255923408022, "loss": 4.5613, "step": 10165 }, { "epoch": 0.78, "learning_rate": 0.00012878233235447928, "loss": 3.0895, "step": 10170 }, { "epoch": 0.78, "learning_rate": 0.00012874210547487834, "loss": 3.5822, "step": 10175 }, { "epoch": 0.78, "learning_rate": 0.00012870187859527737, "loss": 2.599, "step": 10180 }, { "epoch": 0.78, "learning_rate": 0.0001286616517156764, "loss": 3.6651, "step": 10185 }, { "epoch": 0.78, "learning_rate": 0.00012862142483607546, "loss": 2.747, "step": 10190 }, { "epoch": 0.78, "learning_rate": 0.00012858119795647452, "loss": 2.2399, "step": 10195 }, { "epoch": 0.78, "learning_rate": 0.00012854097107687357, "loss": 0.2028, "step": 10200 }, { "epoch": 0.78, "learning_rate": 0.00012850074419727263, "loss": 3.8363, "step": 10205 }, { "epoch": 0.78, "learning_rate": 0.0001284605173176717, "loss": 3.6502, "step": 10210 }, { "epoch": 0.78, "learning_rate": 0.00012842029043807072, "loss": 4.1717, "step": 10215 }, { "epoch": 0.78, "learning_rate": 0.00012838006355846975, "loss": 3.6252, "step": 10220 }, { "epoch": 0.78, "learning_rate": 0.0001283398366788688, "loss": 3.8029, "step": 10225 }, { "epoch": 0.78, "learning_rate": 0.00012829960979926787, "loss": 2.7107, "step": 10230 }, { "epoch": 0.78, "learning_rate": 0.00012825938291966693, "loss": 3.614, "step": 10235 }, { "epoch": 0.78, "learning_rate": 0.000128219156040066, "loss": 2.4932, "step": 10240 }, { "epoch": 0.78, "learning_rate": 0.00012817892916046504, "loss": 3.0111, "step": 10245 }, { "epoch": 0.78, "learning_rate": 0.00012813870228086408, "loss": 2.2005, "step": 10250 }, { "epoch": 0.78, "learning_rate": 0.00012809847540126313, "loss": 4.8871, "step": 10255 }, { "epoch": 0.78, "learning_rate": 0.00012805824852166217, "loss": 4.4473, "step": 10260 }, { "epoch": 0.78, "learning_rate": 0.00012801802164206122, "loss": 4.5402, "step": 10265 }, { "epoch": 0.78, "learning_rate": 0.00012797779476246028, "loss": 3.0773, "step": 10270 }, { "epoch": 0.79, "learning_rate": 0.00012793756788285934, "loss": 2.9697, "step": 10275 }, { "epoch": 0.79, "learning_rate": 0.0001278973410032584, "loss": 1.9987, "step": 10280 }, { "epoch": 0.79, "learning_rate": 0.00012785711412365743, "loss": 3.0007, "step": 10285 }, { "epoch": 0.79, "learning_rate": 0.0001278168872440565, "loss": 2.0372, "step": 10290 }, { "epoch": 0.79, "learning_rate": 0.00012777666036445552, "loss": 2.4596, "step": 10295 }, { "epoch": 0.79, "learning_rate": 0.00012773643348485458, "loss": 1.395, "step": 10300 }, { "epoch": 0.79, "learning_rate": 0.00012769620660525364, "loss": 4.6918, "step": 10305 }, { "epoch": 0.79, "learning_rate": 0.0001276559797256527, "loss": 3.618, "step": 10310 }, { "epoch": 0.79, "learning_rate": 0.00012761575284605175, "loss": 3.8582, "step": 10315 }, { "epoch": 0.79, "learning_rate": 0.00012757552596645078, "loss": 4.6336, "step": 10320 }, { "epoch": 0.79, "learning_rate": 0.00012753529908684984, "loss": 2.6338, "step": 10325 }, { "epoch": 0.79, "learning_rate": 0.0001274950722072489, "loss": 3.15, "step": 10330 }, { "epoch": 0.79, "learning_rate": 0.00012745484532764793, "loss": 1.2446, "step": 10335 }, { "epoch": 0.79, "learning_rate": 0.000127414618448047, "loss": 2.2504, "step": 10340 }, { "epoch": 0.79, "learning_rate": 0.00012737439156844605, "loss": 3.426, "step": 10345 }, { "epoch": 0.79, "learning_rate": 0.0001273341646888451, "loss": 0.0367, "step": 10350 }, { "epoch": 0.79, "learning_rate": 0.00012729393780924414, "loss": 4.5221, "step": 10355 }, { "epoch": 0.79, "learning_rate": 0.0001272537109296432, "loss": 4.7127, "step": 10360 }, { "epoch": 0.79, "learning_rate": 0.00012721348405004225, "loss": 3.3664, "step": 10365 }, { "epoch": 0.79, "learning_rate": 0.00012717325717044128, "loss": 3.905, "step": 10370 }, { "epoch": 0.79, "learning_rate": 0.00012713303029084034, "loss": 3.6577, "step": 10375 }, { "epoch": 0.79, "learning_rate": 0.0001270928034112394, "loss": 2.1809, "step": 10380 }, { "epoch": 0.79, "learning_rate": 0.00012705257653163843, "loss": 2.0813, "step": 10385 }, { "epoch": 0.79, "learning_rate": 0.0001270123496520375, "loss": 3.2018, "step": 10390 }, { "epoch": 0.79, "learning_rate": 0.00012697212277243655, "loss": 2.3257, "step": 10395 }, { "epoch": 0.79, "learning_rate": 0.0001269318958928356, "loss": 1.8804, "step": 10400 }, { "epoch": 0.8, "learning_rate": 0.00012689166901323466, "loss": 4.5512, "step": 10405 }, { "epoch": 0.8, "learning_rate": 0.0001268514421336337, "loss": 3.9561, "step": 10410 }, { "epoch": 0.8, "learning_rate": 0.00012681121525403275, "loss": 3.5211, "step": 10415 }, { "epoch": 0.8, "learning_rate": 0.00012677098837443179, "loss": 3.8664, "step": 10420 }, { "epoch": 0.8, "learning_rate": 0.00012673076149483084, "loss": 3.3927, "step": 10425 }, { "epoch": 0.8, "learning_rate": 0.0001266905346152299, "loss": 3.9993, "step": 10430 }, { "epoch": 0.8, "learning_rate": 0.00012665030773562896, "loss": 3.6185, "step": 10435 }, { "epoch": 0.8, "learning_rate": 0.00012661008085602802, "loss": 2.7498, "step": 10440 }, { "epoch": 0.8, "learning_rate": 0.00012656985397642705, "loss": 1.6683, "step": 10445 }, { "epoch": 0.8, "learning_rate": 0.0001265296270968261, "loss": 0.0475, "step": 10450 }, { "epoch": 0.8, "learning_rate": 0.00012648940021722514, "loss": 4.1752, "step": 10455 }, { "epoch": 0.8, "learning_rate": 0.0001264491733376242, "loss": 4.8688, "step": 10460 }, { "epoch": 0.8, "learning_rate": 0.00012640894645802326, "loss": 4.3963, "step": 10465 }, { "epoch": 0.8, "learning_rate": 0.0001263687195784223, "loss": 4.7291, "step": 10470 }, { "epoch": 0.8, "learning_rate": 0.00012632849269882137, "loss": 3.6086, "step": 10475 }, { "epoch": 0.8, "learning_rate": 0.00012628826581922043, "loss": 2.9757, "step": 10480 }, { "epoch": 0.8, "learning_rate": 0.00012624803893961946, "loss": 3.5504, "step": 10485 }, { "epoch": 0.8, "learning_rate": 0.0001262078120600185, "loss": 3.5773, "step": 10490 }, { "epoch": 0.8, "learning_rate": 0.00012616758518041755, "loss": 1.6866, "step": 10495 }, { "epoch": 0.8, "learning_rate": 0.0001261273583008166, "loss": 2.5525, "step": 10500 }, { "epoch": 0.8, "learning_rate": 0.00012608713142121567, "loss": 4.3145, "step": 10505 }, { "epoch": 0.8, "learning_rate": 0.00012604690454161473, "loss": 4.4189, "step": 10510 }, { "epoch": 0.8, "learning_rate": 0.00012600667766201378, "loss": 4.108, "step": 10515 }, { "epoch": 0.8, "learning_rate": 0.00012596645078241281, "loss": 4.3092, "step": 10520 }, { "epoch": 0.8, "learning_rate": 0.00012592622390281185, "loss": 4.0993, "step": 10525 }, { "epoch": 0.8, "learning_rate": 0.0001258859970232109, "loss": 3.2504, "step": 10530 }, { "epoch": 0.81, "learning_rate": 0.00012584577014360996, "loss": 3.6479, "step": 10535 }, { "epoch": 0.81, "learning_rate": 0.00012580554326400902, "loss": 4.1616, "step": 10540 }, { "epoch": 0.81, "learning_rate": 0.00012577336176032827, "loss": 3.0753, "step": 10545 }, { "epoch": 0.81, "learning_rate": 0.0001257331348807273, "loss": 1.1912, "step": 10550 }, { "epoch": 0.81, "learning_rate": 0.00012569290800112636, "loss": 4.1457, "step": 10555 }, { "epoch": 0.81, "learning_rate": 0.00012565268112152541, "loss": 4.3012, "step": 10560 }, { "epoch": 0.81, "learning_rate": 0.00012561245424192447, "loss": 3.3361, "step": 10565 }, { "epoch": 0.81, "learning_rate": 0.0001255722273623235, "loss": 3.3066, "step": 10570 }, { "epoch": 0.81, "learning_rate": 0.00012553200048272256, "loss": 3.2586, "step": 10575 }, { "epoch": 0.81, "learning_rate": 0.00012549177360312162, "loss": 3.1351, "step": 10580 }, { "epoch": 0.81, "learning_rate": 0.00012545154672352065, "loss": 3.3842, "step": 10585 }, { "epoch": 0.81, "learning_rate": 0.0001254113198439197, "loss": 2.4289, "step": 10590 }, { "epoch": 0.81, "learning_rate": 0.00012537109296431877, "loss": 2.9674, "step": 10595 }, { "epoch": 0.81, "learning_rate": 0.00012533086608471783, "loss": 1.956, "step": 10600 }, { "epoch": 0.81, "learning_rate": 0.00012529063920511686, "loss": 3.8779, "step": 10605 }, { "epoch": 0.81, "learning_rate": 0.00012525041232551592, "loss": 3.518, "step": 10610 }, { "epoch": 0.81, "learning_rate": 0.00012521018544591497, "loss": 4.851, "step": 10615 }, { "epoch": 0.81, "learning_rate": 0.000125169958566314, "loss": 3.1552, "step": 10620 }, { "epoch": 0.81, "learning_rate": 0.00012512973168671306, "loss": 2.9098, "step": 10625 }, { "epoch": 0.81, "learning_rate": 0.00012508950480711212, "loss": 2.8324, "step": 10630 }, { "epoch": 0.81, "learning_rate": 0.00012504927792751118, "loss": 2.5265, "step": 10635 }, { "epoch": 0.81, "learning_rate": 0.00012500905104791024, "loss": 2.6673, "step": 10640 }, { "epoch": 0.81, "learning_rate": 0.00012496882416830927, "loss": 2.8797, "step": 10645 }, { "epoch": 0.81, "learning_rate": 0.00012492859728870833, "loss": 2.8138, "step": 10650 }, { "epoch": 0.81, "learning_rate": 0.00012488837040910736, "loss": 4.6326, "step": 10655 }, { "epoch": 0.81, "learning_rate": 0.00012484814352950642, "loss": 3.6939, "step": 10660 }, { "epoch": 0.82, "learning_rate": 0.00012480791664990548, "loss": 3.8156, "step": 10665 }, { "epoch": 0.82, "learning_rate": 0.00012476768977030453, "loss": 3.8127, "step": 10670 }, { "epoch": 0.82, "learning_rate": 0.0001247274628907036, "loss": 3.8121, "step": 10675 }, { "epoch": 0.82, "learning_rate": 0.00012468723601110262, "loss": 4.3749, "step": 10680 }, { "epoch": 0.82, "learning_rate": 0.00012464700913150168, "loss": 2.2904, "step": 10685 }, { "epoch": 0.82, "learning_rate": 0.0001246067822519007, "loss": 1.6336, "step": 10690 }, { "epoch": 0.82, "learning_rate": 0.00012456655537229977, "loss": 1.492, "step": 10695 }, { "epoch": 0.82, "learning_rate": 0.00012452632849269883, "loss": 1.4875, "step": 10700 }, { "epoch": 0.82, "learning_rate": 0.0001244861016130979, "loss": 4.3963, "step": 10705 }, { "epoch": 0.82, "learning_rate": 0.00012444587473349695, "loss": 3.8414, "step": 10710 }, { "epoch": 0.82, "learning_rate": 0.000124405647853896, "loss": 4.0334, "step": 10715 }, { "epoch": 0.82, "learning_rate": 0.00012436542097429503, "loss": 4.182, "step": 10720 }, { "epoch": 0.82, "learning_rate": 0.00012432519409469407, "loss": 3.2245, "step": 10725 }, { "epoch": 0.82, "learning_rate": 0.00012428496721509312, "loss": 2.7608, "step": 10730 }, { "epoch": 0.82, "learning_rate": 0.00012424474033549218, "loss": 2.8074, "step": 10735 }, { "epoch": 0.82, "learning_rate": 0.00012420451345589124, "loss": 3.0531, "step": 10740 }, { "epoch": 0.82, "learning_rate": 0.0001241642865762903, "loss": 1.4641, "step": 10745 }, { "epoch": 0.82, "learning_rate": 0.00012412405969668933, "loss": 2.0748, "step": 10750 }, { "epoch": 0.82, "learning_rate": 0.0001240838328170884, "loss": 4.573, "step": 10755 }, { "epoch": 0.82, "learning_rate": 0.00012404360593748742, "loss": 3.651, "step": 10760 }, { "epoch": 0.82, "learning_rate": 0.00012400337905788648, "loss": 3.5417, "step": 10765 }, { "epoch": 0.82, "learning_rate": 0.00012396315217828554, "loss": 4.6764, "step": 10770 }, { "epoch": 0.82, "learning_rate": 0.0001239229252986846, "loss": 4.1459, "step": 10775 }, { "epoch": 0.82, "learning_rate": 0.00012388269841908365, "loss": 3.444, "step": 10780 }, { "epoch": 0.82, "learning_rate": 0.00012384247153948268, "loss": 3.3812, "step": 10785 }, { "epoch": 0.82, "learning_rate": 0.00012380224465988174, "loss": 1.5143, "step": 10790 }, { "epoch": 0.83, "learning_rate": 0.00012376201778028077, "loss": 2.626, "step": 10795 }, { "epoch": 0.83, "learning_rate": 0.00012372179090067983, "loss": 1.1325, "step": 10800 }, { "epoch": 0.83, "learning_rate": 0.0001236815640210789, "loss": 5.0693, "step": 10805 }, { "epoch": 0.83, "learning_rate": 0.00012364133714147795, "loss": 4.8133, "step": 10810 }, { "epoch": 0.83, "learning_rate": 0.000123601110261877, "loss": 3.3014, "step": 10815 }, { "epoch": 0.83, "learning_rate": 0.00012356088338227604, "loss": 4.2463, "step": 10820 }, { "epoch": 0.83, "learning_rate": 0.0001235206565026751, "loss": 4.0414, "step": 10825 }, { "epoch": 0.83, "learning_rate": 0.00012348042962307413, "loss": 4.8287, "step": 10830 }, { "epoch": 0.83, "learning_rate": 0.00012344020274347318, "loss": 1.6694, "step": 10835 }, { "epoch": 0.83, "learning_rate": 0.00012339997586387224, "loss": 1.8969, "step": 10840 }, { "epoch": 0.83, "learning_rate": 0.0001233597489842713, "loss": 2.2986, "step": 10845 }, { "epoch": 0.83, "learning_rate": 0.00012331952210467036, "loss": 1.7887, "step": 10850 }, { "epoch": 0.83, "learning_rate": 0.0001232792952250694, "loss": 4.7313, "step": 10855 }, { "epoch": 0.83, "learning_rate": 0.00012323906834546845, "loss": 3.7457, "step": 10860 }, { "epoch": 0.83, "learning_rate": 0.0001231988414658675, "loss": 5.1592, "step": 10865 }, { "epoch": 0.83, "learning_rate": 0.00012315861458626654, "loss": 3.3604, "step": 10870 }, { "epoch": 0.83, "learning_rate": 0.0001231183877066656, "loss": 3.1216, "step": 10875 }, { "epoch": 0.83, "learning_rate": 0.00012307816082706465, "loss": 4.282, "step": 10880 }, { "epoch": 0.83, "learning_rate": 0.0001230379339474637, "loss": 2.4792, "step": 10885 }, { "epoch": 0.83, "learning_rate": 0.00012299770706786274, "loss": 1.8746, "step": 10890 }, { "epoch": 0.83, "learning_rate": 0.0001229574801882618, "loss": 1.6147, "step": 10895 }, { "epoch": 0.83, "learning_rate": 0.00012291725330866086, "loss": 2.9426, "step": 10900 }, { "epoch": 0.83, "learning_rate": 0.0001228770264290599, "loss": 4.616, "step": 10905 }, { "epoch": 0.83, "learning_rate": 0.00012283679954945895, "loss": 3.8428, "step": 10910 }, { "epoch": 0.83, "learning_rate": 0.000122796572669858, "loss": 4.851, "step": 10915 }, { "epoch": 0.83, "learning_rate": 0.00012275634579025707, "loss": 3.6221, "step": 10920 }, { "epoch": 0.83, "learning_rate": 0.0001227161189106561, "loss": 4.0699, "step": 10925 }, { "epoch": 0.84, "learning_rate": 0.00012267589203105516, "loss": 4.5023, "step": 10930 }, { "epoch": 0.84, "learning_rate": 0.00012263566515145421, "loss": 3.5705, "step": 10935 }, { "epoch": 0.84, "learning_rate": 0.00012259543827185327, "loss": 2.7856, "step": 10940 }, { "epoch": 0.84, "learning_rate": 0.0001225552113922523, "loss": 1.8233, "step": 10945 }, { "epoch": 0.84, "learning_rate": 0.00012251498451265136, "loss": 2.3532, "step": 10950 }, { "epoch": 0.84, "learning_rate": 0.00012247475763305042, "loss": 3.4496, "step": 10955 }, { "epoch": 0.84, "learning_rate": 0.00012243453075344945, "loss": 3.6041, "step": 10960 }, { "epoch": 0.84, "learning_rate": 0.0001223943038738485, "loss": 4.3311, "step": 10965 }, { "epoch": 0.84, "learning_rate": 0.00012235407699424757, "loss": 4.1073, "step": 10970 }, { "epoch": 0.84, "learning_rate": 0.00012231385011464663, "loss": 4.1563, "step": 10975 }, { "epoch": 0.84, "learning_rate": 0.00012227362323504566, "loss": 2.2184, "step": 10980 }, { "epoch": 0.84, "learning_rate": 0.00012223339635544471, "loss": 2.3406, "step": 10985 }, { "epoch": 0.84, "learning_rate": 0.00012219316947584377, "loss": 1.6087, "step": 10990 }, { "epoch": 0.84, "learning_rate": 0.0001221529425962428, "loss": 1.7286, "step": 10995 }, { "epoch": 0.84, "learning_rate": 0.00012211271571664186, "loss": 0.5842, "step": 11000 }, { "epoch": 0.84, "learning_rate": 0.00012207248883704092, "loss": 4.0867, "step": 11005 }, { "epoch": 0.84, "learning_rate": 0.00012203226195743998, "loss": 3.6726, "step": 11010 }, { "epoch": 0.84, "learning_rate": 0.00012199203507783902, "loss": 4.5297, "step": 11015 }, { "epoch": 0.84, "learning_rate": 0.00012195180819823805, "loss": 3.8204, "step": 11020 }, { "epoch": 0.84, "learning_rate": 0.00012191158131863711, "loss": 3.8697, "step": 11025 }, { "epoch": 0.84, "learning_rate": 0.00012187135443903617, "loss": 3.5434, "step": 11030 }, { "epoch": 0.84, "learning_rate": 0.00012183112755943522, "loss": 2.4551, "step": 11035 }, { "epoch": 0.84, "learning_rate": 0.00012179090067983427, "loss": 2.9122, "step": 11040 }, { "epoch": 0.84, "learning_rate": 0.00012175067380023333, "loss": 2.9427, "step": 11045 }, { "epoch": 0.84, "learning_rate": 0.00012171044692063238, "loss": 0.0149, "step": 11050 }, { "epoch": 0.84, "learning_rate": 0.00012167022004103141, "loss": 3.976, "step": 11055 }, { "epoch": 0.85, "learning_rate": 0.00012162999316143047, "loss": 4.292, "step": 11060 }, { "epoch": 0.85, "learning_rate": 0.00012158976628182952, "loss": 3.5534, "step": 11065 }, { "epoch": 0.85, "learning_rate": 0.00012154953940222857, "loss": 3.6272, "step": 11070 }, { "epoch": 0.85, "learning_rate": 0.00012150931252262763, "loss": 3.5693, "step": 11075 }, { "epoch": 0.85, "learning_rate": 0.00012146908564302669, "loss": 3.2917, "step": 11080 }, { "epoch": 0.85, "learning_rate": 0.00012142885876342573, "loss": 3.7476, "step": 11085 }, { "epoch": 0.85, "learning_rate": 0.00012138863188382479, "loss": 1.9143, "step": 11090 }, { "epoch": 0.85, "learning_rate": 0.00012134840500422382, "loss": 1.1044, "step": 11095 }, { "epoch": 0.85, "learning_rate": 0.00012130817812462288, "loss": 2.3956, "step": 11100 }, { "epoch": 0.85, "learning_rate": 0.00012126795124502192, "loss": 4.0947, "step": 11105 }, { "epoch": 0.85, "learning_rate": 0.00012122772436542098, "loss": 3.8414, "step": 11110 }, { "epoch": 0.85, "learning_rate": 0.00012118749748582004, "loss": 3.4203, "step": 11115 }, { "epoch": 0.85, "learning_rate": 0.00012114727060621908, "loss": 4.1299, "step": 11120 }, { "epoch": 0.85, "learning_rate": 0.00012110704372661814, "loss": 1.8336, "step": 11125 }, { "epoch": 0.85, "learning_rate": 0.00012106681684701717, "loss": 3.1511, "step": 11130 }, { "epoch": 0.85, "learning_rate": 0.00012102658996741623, "loss": 1.5535, "step": 11135 }, { "epoch": 0.85, "learning_rate": 0.00012098636308781528, "loss": 2.8977, "step": 11140 }, { "epoch": 0.85, "learning_rate": 0.00012094613620821433, "loss": 1.0519, "step": 11145 }, { "epoch": 0.85, "learning_rate": 0.00012090590932861339, "loss": 2.3733, "step": 11150 }, { "epoch": 0.85, "learning_rate": 0.00012086568244901244, "loss": 6.4816, "step": 11155 }, { "epoch": 0.85, "learning_rate": 0.0001208254555694115, "loss": 4.9588, "step": 11160 }, { "epoch": 0.85, "learning_rate": 0.00012078522868981055, "loss": 3.2568, "step": 11165 }, { "epoch": 0.85, "learning_rate": 0.00012074500181020959, "loss": 3.6896, "step": 11170 }, { "epoch": 0.85, "learning_rate": 0.00012070477493060863, "loss": 4.2188, "step": 11175 }, { "epoch": 0.85, "learning_rate": 0.00012066454805100769, "loss": 3.8143, "step": 11180 }, { "epoch": 0.85, "learning_rate": 0.00012062432117140675, "loss": 2.433, "step": 11185 }, { "epoch": 0.86, "learning_rate": 0.00012058409429180579, "loss": 2.9424, "step": 11190 }, { "epoch": 0.86, "learning_rate": 0.00012054386741220485, "loss": 1.7503, "step": 11195 }, { "epoch": 0.86, "learning_rate": 0.0001205036405326039, "loss": 1.471, "step": 11200 }, { "epoch": 0.86, "learning_rate": 0.00012046341365300294, "loss": 4.4582, "step": 11205 }, { "epoch": 0.86, "learning_rate": 0.00012042318677340198, "loss": 4.9609, "step": 11210 }, { "epoch": 0.86, "learning_rate": 0.00012038295989380104, "loss": 4.7346, "step": 11215 }, { "epoch": 0.86, "learning_rate": 0.00012034273301420009, "loss": 3.2718, "step": 11220 }, { "epoch": 0.86, "learning_rate": 0.00012030250613459914, "loss": 3.5242, "step": 11225 }, { "epoch": 0.86, "learning_rate": 0.0001202622792549982, "loss": 3.4757, "step": 11230 }, { "epoch": 0.86, "learning_rate": 0.00012022205237539725, "loss": 1.8217, "step": 11235 }, { "epoch": 0.86, "learning_rate": 0.0001201818254957963, "loss": 2.7196, "step": 11240 }, { "epoch": 0.86, "learning_rate": 0.00012014159861619534, "loss": 1.9612, "step": 11245 }, { "epoch": 0.86, "learning_rate": 0.0001201013717365944, "loss": 1.22, "step": 11250 }, { "epoch": 0.86, "learning_rate": 0.00012006114485699344, "loss": 3.6163, "step": 11255 }, { "epoch": 0.86, "learning_rate": 0.0001200209179773925, "loss": 3.748, "step": 11260 }, { "epoch": 0.86, "learning_rate": 0.00011998069109779156, "loss": 3.8621, "step": 11265 }, { "epoch": 0.86, "learning_rate": 0.0001199404642181906, "loss": 3.7291, "step": 11270 }, { "epoch": 0.86, "learning_rate": 0.00011990023733858966, "loss": 3.5688, "step": 11275 }, { "epoch": 0.86, "learning_rate": 0.00011986001045898869, "loss": 3.9327, "step": 11280 }, { "epoch": 0.86, "learning_rate": 0.00011981978357938775, "loss": 2.0496, "step": 11285 }, { "epoch": 0.86, "learning_rate": 0.0001197795566997868, "loss": 3.2875, "step": 11290 }, { "epoch": 0.86, "learning_rate": 0.00011973932982018585, "loss": 3.1575, "step": 11295 }, { "epoch": 0.86, "learning_rate": 0.00011969910294058491, "loss": 1.5065, "step": 11300 }, { "epoch": 0.86, "learning_rate": 0.00011965887606098395, "loss": 5.9072, "step": 11305 }, { "epoch": 0.86, "learning_rate": 0.00011961864918138301, "loss": 4.5477, "step": 11310 }, { "epoch": 0.86, "learning_rate": 0.00011957842230178207, "loss": 4.2107, "step": 11315 }, { "epoch": 0.87, "learning_rate": 0.0001195381954221811, "loss": 4.1152, "step": 11320 }, { "epoch": 0.87, "learning_rate": 0.00011949796854258015, "loss": 3.9164, "step": 11325 }, { "epoch": 0.87, "learning_rate": 0.0001194577416629792, "loss": 3.1883, "step": 11330 }, { "epoch": 0.87, "learning_rate": 0.00011941751478337826, "loss": 3.7471, "step": 11335 }, { "epoch": 0.87, "learning_rate": 0.00011937728790377731, "loss": 2.6846, "step": 11340 }, { "epoch": 0.87, "learning_rate": 0.00011933706102417637, "loss": 2.8391, "step": 11345 }, { "epoch": 0.87, "learning_rate": 0.0001193048795204956, "loss": 2.3922, "step": 11350 }, { "epoch": 0.87, "learning_rate": 0.00011926465264089466, "loss": 4.3721, "step": 11355 }, { "epoch": 0.87, "learning_rate": 0.00011922442576129372, "loss": 4.3652, "step": 11360 }, { "epoch": 0.87, "learning_rate": 0.00011918419888169275, "loss": 3.515, "step": 11365 }, { "epoch": 0.87, "learning_rate": 0.00011914397200209179, "loss": 4.5871, "step": 11370 }, { "epoch": 0.87, "learning_rate": 0.00011910374512249085, "loss": 3.9205, "step": 11375 }, { "epoch": 0.87, "learning_rate": 0.00011906351824288991, "loss": 3.4631, "step": 11380 }, { "epoch": 0.87, "learning_rate": 0.00011902329136328895, "loss": 3.6807, "step": 11385 }, { "epoch": 0.87, "learning_rate": 0.00011898306448368801, "loss": 2.6828, "step": 11390 }, { "epoch": 0.87, "learning_rate": 0.00011894283760408707, "loss": 2.0819, "step": 11395 }, { "epoch": 0.87, "learning_rate": 0.0001189026107244861, "loss": 2.5945, "step": 11400 }, { "epoch": 0.87, "learning_rate": 0.00011886238384488515, "loss": 3.8611, "step": 11405 }, { "epoch": 0.87, "learning_rate": 0.0001188221569652842, "loss": 4.0271, "step": 11410 }, { "epoch": 0.87, "learning_rate": 0.00011878193008568326, "loss": 5.5275, "step": 11415 }, { "epoch": 0.87, "learning_rate": 0.0001187417032060823, "loss": 4.565, "step": 11420 }, { "epoch": 0.87, "learning_rate": 0.00011870147632648136, "loss": 3.5147, "step": 11425 }, { "epoch": 0.87, "learning_rate": 0.00011866124944688042, "loss": 3.1143, "step": 11430 }, { "epoch": 0.87, "learning_rate": 0.00011862102256727947, "loss": 1.3822, "step": 11435 }, { "epoch": 0.87, "learning_rate": 0.0001185807956876785, "loss": 2.3071, "step": 11440 }, { "epoch": 0.87, "learning_rate": 0.00011854056880807756, "loss": 1.8316, "step": 11445 }, { "epoch": 0.88, "learning_rate": 0.00011850034192847662, "loss": 1.9333, "step": 11450 }, { "epoch": 0.88, "learning_rate": 0.00011846011504887566, "loss": 4.7771, "step": 11455 }, { "epoch": 0.88, "learning_rate": 0.00011841988816927472, "loss": 4.8422, "step": 11460 }, { "epoch": 0.88, "learning_rate": 0.00011837966128967378, "loss": 4.3121, "step": 11465 }, { "epoch": 0.88, "learning_rate": 0.00011833943441007282, "loss": 3.5387, "step": 11470 }, { "epoch": 0.88, "learning_rate": 0.00011829920753047185, "loss": 2.7474, "step": 11475 }, { "epoch": 0.88, "learning_rate": 0.00011825898065087091, "loss": 3.3034, "step": 11480 }, { "epoch": 0.88, "learning_rate": 0.00011821875377126997, "loss": 3.3162, "step": 11485 }, { "epoch": 0.88, "learning_rate": 0.00011817852689166901, "loss": 1.4119, "step": 11490 }, { "epoch": 0.88, "learning_rate": 0.00011813830001206807, "loss": 2.9623, "step": 11495 }, { "epoch": 0.88, "learning_rate": 0.00011809807313246713, "loss": 1.924, "step": 11500 }, { "epoch": 0.88, "learning_rate": 0.00011805784625286617, "loss": 4.39, "step": 11505 }, { "epoch": 0.88, "learning_rate": 0.00011801761937326523, "loss": 4.1092, "step": 11510 }, { "epoch": 0.88, "learning_rate": 0.00011797739249366426, "loss": 4.3584, "step": 11515 }, { "epoch": 0.88, "learning_rate": 0.00011793716561406332, "loss": 4.9307, "step": 11520 }, { "epoch": 0.88, "learning_rate": 0.00011789693873446237, "loss": 3.7303, "step": 11525 }, { "epoch": 0.88, "learning_rate": 0.00011785671185486143, "loss": 2.9764, "step": 11530 }, { "epoch": 0.88, "learning_rate": 0.00011781648497526048, "loss": 2.5169, "step": 11535 }, { "epoch": 0.88, "learning_rate": 0.00011777625809565953, "loss": 2.986, "step": 11540 }, { "epoch": 0.88, "learning_rate": 0.00011773603121605859, "loss": 2.9573, "step": 11545 }, { "epoch": 0.88, "learning_rate": 0.00011769580433645762, "loss": 4.3488, "step": 11550 }, { "epoch": 0.88, "learning_rate": 0.00011765557745685668, "loss": 3.9223, "step": 11555 }, { "epoch": 0.88, "learning_rate": 0.00011761535057725572, "loss": 4.3891, "step": 11560 }, { "epoch": 0.88, "learning_rate": 0.00011757512369765478, "loss": 4.6504, "step": 11565 }, { "epoch": 0.88, "learning_rate": 0.00011753489681805384, "loss": 2.1885, "step": 11570 }, { "epoch": 0.88, "learning_rate": 0.00011749466993845288, "loss": 4.1299, "step": 11575 }, { "epoch": 0.89, "learning_rate": 0.00011745444305885194, "loss": 2.6887, "step": 11580 }, { "epoch": 0.89, "learning_rate": 0.00011741421617925098, "loss": 4.266, "step": 11585 }, { "epoch": 0.89, "learning_rate": 0.00011737398929965002, "loss": 2.6325, "step": 11590 }, { "epoch": 0.89, "learning_rate": 0.00011733376242004907, "loss": 2.8226, "step": 11595 }, { "epoch": 0.89, "learning_rate": 0.00011729353554044813, "loss": 0.0985, "step": 11600 }, { "epoch": 0.89, "learning_rate": 0.00011725330866084718, "loss": 4.3924, "step": 11605 }, { "epoch": 0.89, "learning_rate": 0.00011721308178124624, "loss": 4.9402, "step": 11610 }, { "epoch": 0.89, "learning_rate": 0.0001171728549016453, "loss": 4.0807, "step": 11615 }, { "epoch": 0.89, "learning_rate": 0.00011713262802204434, "loss": 3.7963, "step": 11620 }, { "epoch": 0.89, "learning_rate": 0.00011709240114244337, "loss": 3.3341, "step": 11625 }, { "epoch": 0.89, "learning_rate": 0.00011705217426284243, "loss": 2.103, "step": 11630 }, { "epoch": 0.89, "learning_rate": 0.00011701194738324149, "loss": 2.1604, "step": 11635 }, { "epoch": 0.89, "learning_rate": 0.00011697172050364053, "loss": 3.5096, "step": 11640 }, { "epoch": 0.89, "learning_rate": 0.00011693149362403959, "loss": 4.5993, "step": 11645 }, { "epoch": 0.89, "learning_rate": 0.00011689126674443865, "loss": 3.1042, "step": 11650 }, { "epoch": 0.89, "learning_rate": 0.00011685103986483769, "loss": 4.4627, "step": 11655 }, { "epoch": 0.89, "learning_rate": 0.00011681081298523675, "loss": 4.0707, "step": 11660 }, { "epoch": 0.89, "learning_rate": 0.00011677058610563578, "loss": 4.1977, "step": 11665 }, { "epoch": 0.89, "learning_rate": 0.00011673035922603484, "loss": 3.0867, "step": 11670 }, { "epoch": 0.89, "learning_rate": 0.00011669013234643388, "loss": 3.8916, "step": 11675 }, { "epoch": 0.89, "learning_rate": 0.00011664990546683294, "loss": 2.4693, "step": 11680 }, { "epoch": 0.89, "learning_rate": 0.000116609678587232, "loss": 3.8117, "step": 11685 }, { "epoch": 0.89, "learning_rate": 0.00011656945170763104, "loss": 2.3956, "step": 11690 }, { "epoch": 0.89, "learning_rate": 0.0001165292248280301, "loss": 2.6879, "step": 11695 }, { "epoch": 0.89, "learning_rate": 0.00011648899794842913, "loss": 1.1392, "step": 11700 }, { "epoch": 0.89, "learning_rate": 0.00011644877106882819, "loss": 4.1566, "step": 11705 }, { "epoch": 0.89, "learning_rate": 0.00011640854418922724, "loss": 4.0279, "step": 11710 }, { "epoch": 0.9, "learning_rate": 0.0001163683173096263, "loss": 3.9164, "step": 11715 }, { "epoch": 0.9, "learning_rate": 0.00011632809043002535, "loss": 2.289, "step": 11720 }, { "epoch": 0.9, "learning_rate": 0.0001162878635504244, "loss": 4.4148, "step": 11725 }, { "epoch": 0.9, "learning_rate": 0.00011624763667082346, "loss": 2.4125, "step": 11730 }, { "epoch": 0.9, "learning_rate": 0.00011620740979122251, "loss": 1.2408, "step": 11735 }, { "epoch": 0.9, "learning_rate": 0.00011616718291162155, "loss": 1.2736, "step": 11740 }, { "epoch": 0.9, "learning_rate": 0.00011612695603202059, "loss": 1.0584, "step": 11745 }, { "epoch": 0.9, "learning_rate": 0.00011608672915241965, "loss": 2.8411, "step": 11750 }, { "epoch": 0.9, "learning_rate": 0.00011604650227281871, "loss": 3.5629, "step": 11755 }, { "epoch": 0.9, "learning_rate": 0.00011600627539321775, "loss": 4.7592, "step": 11760 }, { "epoch": 0.9, "learning_rate": 0.00011596604851361681, "loss": 4.0055, "step": 11765 }, { "epoch": 0.9, "learning_rate": 0.00011592582163401587, "loss": 4.5269, "step": 11770 }, { "epoch": 0.9, "learning_rate": 0.0001158855947544149, "loss": 4.535, "step": 11775 }, { "epoch": 0.9, "learning_rate": 0.00011584536787481394, "loss": 1.9481, "step": 11780 }, { "epoch": 0.9, "learning_rate": 0.000115805140995213, "loss": 2.4671, "step": 11785 }, { "epoch": 0.9, "learning_rate": 0.00011576491411561206, "loss": 3.4428, "step": 11790 }, { "epoch": 0.9, "learning_rate": 0.0001157246872360111, "loss": 2.0589, "step": 11795 }, { "epoch": 0.9, "learning_rate": 0.00011568446035641016, "loss": 3.3335, "step": 11800 }, { "epoch": 0.9, "learning_rate": 0.00011564423347680922, "loss": 4.425, "step": 11805 }, { "epoch": 0.9, "learning_rate": 0.00011560400659720827, "loss": 4.8104, "step": 11810 }, { "epoch": 0.9, "learning_rate": 0.0001155637797176073, "loss": 4.1982, "step": 11815 }, { "epoch": 0.9, "learning_rate": 0.00011552355283800636, "loss": 4.6156, "step": 11820 }, { "epoch": 0.9, "learning_rate": 0.00011548332595840541, "loss": 3.4463, "step": 11825 }, { "epoch": 0.9, "learning_rate": 0.00011544309907880446, "loss": 1.7727, "step": 11830 }, { "epoch": 0.9, "learning_rate": 0.00011540287219920352, "loss": 2.8218, "step": 11835 }, { "epoch": 0.9, "learning_rate": 0.00011536264531960258, "loss": 2.5171, "step": 11840 }, { "epoch": 0.91, "learning_rate": 0.00011532241844000162, "loss": 3.8074, "step": 11845 }, { "epoch": 0.91, "learning_rate": 0.00011528219156040065, "loss": 1.3458, "step": 11850 }, { "epoch": 0.91, "learning_rate": 0.00011524196468079971, "loss": 4.7215, "step": 11855 }, { "epoch": 0.91, "learning_rate": 0.00011520173780119877, "loss": 4.6002, "step": 11860 }, { "epoch": 0.91, "learning_rate": 0.00011516151092159781, "loss": 4.7979, "step": 11865 }, { "epoch": 0.91, "learning_rate": 0.00011512128404199687, "loss": 4.3117, "step": 11870 }, { "epoch": 0.91, "learning_rate": 0.00011508105716239593, "loss": 2.6911, "step": 11875 }, { "epoch": 0.91, "learning_rate": 0.00011504083028279497, "loss": 3.0803, "step": 11880 }, { "epoch": 0.91, "learning_rate": 0.00011500060340319403, "loss": 2.7019, "step": 11885 }, { "epoch": 0.91, "learning_rate": 0.00011496037652359306, "loss": 2.1551, "step": 11890 }, { "epoch": 0.91, "learning_rate": 0.00011492014964399211, "loss": 2.693, "step": 11895 }, { "epoch": 0.91, "learning_rate": 0.00011487992276439117, "loss": 0.6029, "step": 11900 }, { "epoch": 0.91, "learning_rate": 0.00011483969588479022, "loss": 4.9437, "step": 11905 }, { "epoch": 0.91, "learning_rate": 0.00011479946900518927, "loss": 4.216, "step": 11910 }, { "epoch": 0.91, "learning_rate": 0.00011475924212558833, "loss": 3.3949, "step": 11915 }, { "epoch": 0.91, "learning_rate": 0.00011471901524598739, "loss": 3.6592, "step": 11920 }, { "epoch": 0.91, "learning_rate": 0.00011467878836638643, "loss": 2.7439, "step": 11925 }, { "epoch": 0.91, "learning_rate": 0.00011463856148678546, "loss": 2.8549, "step": 11930 }, { "epoch": 0.91, "learning_rate": 0.00011459833460718452, "loss": 2.6736, "step": 11935 }, { "epoch": 0.91, "learning_rate": 0.00011455810772758358, "loss": 3.3242, "step": 11940 }, { "epoch": 0.91, "learning_rate": 0.00011451788084798262, "loss": 1.7755, "step": 11945 }, { "epoch": 0.91, "learning_rate": 0.00011447765396838168, "loss": 2.2492, "step": 11950 }, { "epoch": 0.91, "learning_rate": 0.00011443742708878074, "loss": 4.5637, "step": 11955 }, { "epoch": 0.91, "learning_rate": 0.00011439720020917978, "loss": 4.16, "step": 11960 }, { "epoch": 0.91, "learning_rate": 0.00011435697332957881, "loss": 4.0373, "step": 11965 }, { "epoch": 0.91, "learning_rate": 0.00011431674644997787, "loss": 3.9465, "step": 11970 }, { "epoch": 0.92, "learning_rate": 0.00011427651957037693, "loss": 4.5678, "step": 11975 }, { "epoch": 0.92, "learning_rate": 0.00011423629269077598, "loss": 4.6479, "step": 11980 }, { "epoch": 0.92, "learning_rate": 0.00011419606581117503, "loss": 3.3561, "step": 11985 }, { "epoch": 0.92, "learning_rate": 0.00011415583893157409, "loss": 2.7581, "step": 11990 }, { "epoch": 0.92, "learning_rate": 0.00011411561205197314, "loss": 1.4079, "step": 11995 }, { "epoch": 0.92, "learning_rate": 0.0001140753851723722, "loss": 3.3122, "step": 12000 }, { "epoch": 0.92, "learning_rate": 0.00011403515829277123, "loss": 5.2654, "step": 12005 }, { "epoch": 0.92, "learning_rate": 0.00011399493141317028, "loss": 4.2666, "step": 12010 }, { "epoch": 0.92, "learning_rate": 0.00011395470453356933, "loss": 3.2514, "step": 12015 }, { "epoch": 0.92, "learning_rate": 0.00011391447765396839, "loss": 3.2791, "step": 12020 }, { "epoch": 0.92, "learning_rate": 0.00011387425077436745, "loss": 2.7815, "step": 12025 }, { "epoch": 0.92, "learning_rate": 0.00011383402389476649, "loss": 3.698, "step": 12030 }, { "epoch": 0.92, "learning_rate": 0.00011379379701516555, "loss": 3.4422, "step": 12035 }, { "epoch": 0.92, "learning_rate": 0.00011375357013556458, "loss": 0.9724, "step": 12040 }, { "epoch": 0.92, "learning_rate": 0.00011371334325596364, "loss": 1.8559, "step": 12045 }, { "epoch": 0.92, "learning_rate": 0.00011367311637636268, "loss": 1.9751, "step": 12050 }, { "epoch": 0.92, "learning_rate": 0.00011363288949676174, "loss": 3.3685, "step": 12055 }, { "epoch": 0.92, "learning_rate": 0.0001135926626171608, "loss": 5.3922, "step": 12060 }, { "epoch": 0.92, "learning_rate": 0.00011355243573755984, "loss": 3.7494, "step": 12065 }, { "epoch": 0.92, "learning_rate": 0.0001135122088579589, "loss": 5.3334, "step": 12070 }, { "epoch": 0.92, "learning_rate": 0.00011347198197835796, "loss": 3.8536, "step": 12075 }, { "epoch": 0.92, "learning_rate": 0.00011343175509875699, "loss": 3.947, "step": 12080 }, { "epoch": 0.92, "learning_rate": 0.00011339152821915604, "loss": 2.8934, "step": 12085 }, { "epoch": 0.92, "learning_rate": 0.0001133513013395551, "loss": 2.3866, "step": 12090 }, { "epoch": 0.92, "learning_rate": 0.00011331107445995415, "loss": 1.5607, "step": 12095 }, { "epoch": 0.92, "learning_rate": 0.0001132708475803532, "loss": 1.2598, "step": 12100 }, { "epoch": 0.93, "learning_rate": 0.00011323062070075226, "loss": 3.7342, "step": 12105 }, { "epoch": 0.93, "learning_rate": 0.00011319039382115131, "loss": 4.8529, "step": 12110 }, { "epoch": 0.93, "learning_rate": 0.00011315016694155035, "loss": 4.0742, "step": 12115 }, { "epoch": 0.93, "learning_rate": 0.00011310994006194939, "loss": 5.4617, "step": 12120 }, { "epoch": 0.93, "learning_rate": 0.00011306971318234845, "loss": 3.8857, "step": 12125 }, { "epoch": 0.93, "learning_rate": 0.0001130294863027475, "loss": 2.9613, "step": 12130 }, { "epoch": 0.93, "learning_rate": 0.00011298925942314655, "loss": 1.5732, "step": 12135 }, { "epoch": 0.93, "learning_rate": 0.00011294903254354561, "loss": 1.9693, "step": 12140 }, { "epoch": 0.93, "learning_rate": 0.00011290880566394467, "loss": 1.1998, "step": 12145 }, { "epoch": 0.93, "learning_rate": 0.00011286857878434371, "loss": 3.2726, "step": 12150 }, { "epoch": 0.93, "learning_rate": 0.00011282835190474274, "loss": 4.6418, "step": 12155 }, { "epoch": 0.93, "learning_rate": 0.0001127881250251418, "loss": 4.0848, "step": 12160 }, { "epoch": 0.93, "learning_rate": 0.00011274789814554086, "loss": 4.4574, "step": 12165 }, { "epoch": 0.93, "learning_rate": 0.0001127076712659399, "loss": 4.4372, "step": 12170 }, { "epoch": 0.93, "learning_rate": 0.00011266744438633896, "loss": 4.7805, "step": 12175 }, { "epoch": 0.93, "learning_rate": 0.00011262721750673802, "loss": 2.6393, "step": 12180 }, { "epoch": 0.93, "learning_rate": 0.00011258699062713707, "loss": 3.8619, "step": 12185 }, { "epoch": 0.93, "learning_rate": 0.0001125467637475361, "loss": 3.5495, "step": 12190 }, { "epoch": 0.93, "learning_rate": 0.00011250653686793516, "loss": 3.6609, "step": 12195 }, { "epoch": 0.93, "learning_rate": 0.0001124663099883342, "loss": 2.3156, "step": 12200 }, { "epoch": 0.93, "learning_rate": 0.00011242608310873326, "loss": 4.6609, "step": 12205 }, { "epoch": 0.93, "learning_rate": 0.00011238585622913232, "loss": 4.3674, "step": 12210 }, { "epoch": 0.93, "learning_rate": 0.00011234562934953136, "loss": 3.684, "step": 12215 }, { "epoch": 0.93, "learning_rate": 0.00011230540246993042, "loss": 4.9451, "step": 12220 }, { "epoch": 0.93, "learning_rate": 0.00011226517559032948, "loss": 2.7038, "step": 12225 }, { "epoch": 0.93, "learning_rate": 0.00011222494871072851, "loss": 3.1783, "step": 12230 }, { "epoch": 0.94, "learning_rate": 0.00011218472183112755, "loss": 2.1691, "step": 12235 }, { "epoch": 0.94, "learning_rate": 0.00011214449495152661, "loss": 2.7589, "step": 12240 }, { "epoch": 0.94, "learning_rate": 0.00011210426807192567, "loss": 1.9916, "step": 12245 }, { "epoch": 0.94, "learning_rate": 0.00011206404119232471, "loss": 2.93, "step": 12250 }, { "epoch": 0.94, "learning_rate": 0.00011202381431272377, "loss": 3.9955, "step": 12255 }, { "epoch": 0.94, "learning_rate": 0.00011198358743312283, "loss": 4.1354, "step": 12260 }, { "epoch": 0.94, "learning_rate": 0.00011194336055352186, "loss": 3.9764, "step": 12265 }, { "epoch": 0.94, "learning_rate": 0.0001119031336739209, "loss": 3.9035, "step": 12270 }, { "epoch": 0.94, "learning_rate": 0.00011186290679431996, "loss": 3.8275, "step": 12275 }, { "epoch": 0.94, "learning_rate": 0.00011182267991471902, "loss": 3.5661, "step": 12280 }, { "epoch": 0.94, "learning_rate": 0.00011178245303511807, "loss": 3.2651, "step": 12285 }, { "epoch": 0.94, "learning_rate": 0.00011174222615551713, "loss": 1.1449, "step": 12290 }, { "epoch": 0.94, "learning_rate": 0.00011170199927591618, "loss": 1.265, "step": 12295 }, { "epoch": 0.94, "learning_rate": 0.00011166177239631523, "loss": 0.3177, "step": 12300 }, { "epoch": 0.94, "learning_rate": 0.00011162154551671426, "loss": 5.1195, "step": 12305 }, { "epoch": 0.94, "learning_rate": 0.00011158131863711332, "loss": 4.2492, "step": 12310 }, { "epoch": 0.94, "learning_rate": 0.00011154109175751238, "loss": 4.85, "step": 12315 }, { "epoch": 0.94, "learning_rate": 0.00011150086487791142, "loss": 3.5096, "step": 12320 }, { "epoch": 0.94, "learning_rate": 0.00011146063799831048, "loss": 3.4133, "step": 12325 }, { "epoch": 0.94, "learning_rate": 0.00011142041111870954, "loss": 2.644, "step": 12330 }, { "epoch": 0.94, "learning_rate": 0.00011138018423910858, "loss": 1.4073, "step": 12335 }, { "epoch": 0.94, "learning_rate": 0.00011133995735950761, "loss": 2.6904, "step": 12340 }, { "epoch": 0.94, "learning_rate": 0.00011129973047990667, "loss": 4.1987, "step": 12345 }, { "epoch": 0.94, "learning_rate": 0.00011125950360030573, "loss": 4.0315, "step": 12350 }, { "epoch": 0.94, "learning_rate": 0.00011121927672070477, "loss": 4.21, "step": 12355 }, { "epoch": 0.94, "learning_rate": 0.00011117904984110383, "loss": 4.2699, "step": 12360 }, { "epoch": 0.95, "learning_rate": 0.00011113882296150289, "loss": 5.3631, "step": 12365 }, { "epoch": 0.95, "learning_rate": 0.00011109859608190194, "loss": 4.8795, "step": 12370 }, { "epoch": 0.95, "learning_rate": 0.000111058369202301, "loss": 4.0154, "step": 12375 }, { "epoch": 0.95, "learning_rate": 0.00011101814232270003, "loss": 4.0755, "step": 12380 }, { "epoch": 0.95, "learning_rate": 0.00011097791544309908, "loss": 4.2021, "step": 12385 }, { "epoch": 0.95, "learning_rate": 0.00011093768856349813, "loss": 2.2228, "step": 12390 }, { "epoch": 0.95, "learning_rate": 0.00011089746168389719, "loss": 2.6391, "step": 12395 }, { "epoch": 0.95, "learning_rate": 0.00011085723480429624, "loss": 2.3025, "step": 12400 }, { "epoch": 0.95, "learning_rate": 0.00011081700792469529, "loss": 5.06, "step": 12405 }, { "epoch": 0.95, "learning_rate": 0.00011077678104509435, "loss": 4.7277, "step": 12410 }, { "epoch": 0.95, "learning_rate": 0.00011073655416549338, "loss": 4.8125, "step": 12415 }, { "epoch": 0.95, "learning_rate": 0.00011069632728589244, "loss": 3.7311, "step": 12420 }, { "epoch": 0.95, "learning_rate": 0.00011065610040629148, "loss": 3.3934, "step": 12425 }, { "epoch": 0.95, "learning_rate": 0.00011061587352669054, "loss": 4.0301, "step": 12430 }, { "epoch": 0.95, "learning_rate": 0.0001105756466470896, "loss": 3.5967, "step": 12435 }, { "epoch": 0.95, "learning_rate": 0.00011053541976748864, "loss": 3.6605, "step": 12440 }, { "epoch": 0.95, "learning_rate": 0.0001104951928878877, "loss": 2.1137, "step": 12445 }, { "epoch": 0.95, "learning_rate": 0.00011045496600828676, "loss": 1.5456, "step": 12450 }, { "epoch": 0.95, "learning_rate": 0.00011041473912868579, "loss": 4.5984, "step": 12455 }, { "epoch": 0.95, "learning_rate": 0.00011037451224908484, "loss": 4.9756, "step": 12460 }, { "epoch": 0.95, "learning_rate": 0.0001103342853694839, "loss": 4.725, "step": 12465 }, { "epoch": 0.95, "learning_rate": 0.00011029405848988295, "loss": 5.0465, "step": 12470 }, { "epoch": 0.95, "learning_rate": 0.000110253831610282, "loss": 3.6682, "step": 12475 }, { "epoch": 0.95, "learning_rate": 0.00011021360473068105, "loss": 4.3359, "step": 12480 }, { "epoch": 0.95, "learning_rate": 0.0001101733778510801, "loss": 2.6403, "step": 12485 }, { "epoch": 0.95, "learning_rate": 0.00011013315097147914, "loss": 3.0468, "step": 12490 }, { "epoch": 0.95, "learning_rate": 0.00011009292409187819, "loss": 3.0222, "step": 12495 }, { "epoch": 0.96, "learning_rate": 0.00011005269721227725, "loss": 0.6001, "step": 12500 }, { "epoch": 0.96, "learning_rate": 0.00011001247033267629, "loss": 4.099, "step": 12505 }, { "epoch": 0.96, "learning_rate": 0.00010997224345307535, "loss": 3.7836, "step": 12510 }, { "epoch": 0.96, "learning_rate": 0.00010993201657347441, "loss": 4.1847, "step": 12515 }, { "epoch": 0.96, "learning_rate": 0.00010989178969387345, "loss": 4.2145, "step": 12520 }, { "epoch": 0.96, "learning_rate": 0.00010985156281427251, "loss": 3.6762, "step": 12525 }, { "epoch": 0.96, "learning_rate": 0.00010981133593467154, "loss": 3.2025, "step": 12530 }, { "epoch": 0.96, "learning_rate": 0.0001097711090550706, "loss": 2.674, "step": 12535 }, { "epoch": 0.96, "learning_rate": 0.00010973088217546965, "loss": 2.7232, "step": 12540 }, { "epoch": 0.96, "learning_rate": 0.0001096906552958687, "loss": 0.4914, "step": 12545 }, { "epoch": 0.96, "learning_rate": 0.00010965042841626776, "loss": 2.267, "step": 12550 }, { "epoch": 0.96, "learning_rate": 0.0001096102015366668, "loss": 3.6879, "step": 12555 }, { "epoch": 0.96, "learning_rate": 0.00010956997465706586, "loss": 4.2727, "step": 12560 }, { "epoch": 0.96, "learning_rate": 0.0001095297477774649, "loss": 3.6961, "step": 12565 }, { "epoch": 0.96, "learning_rate": 0.00010948952089786395, "loss": 2.9307, "step": 12570 }, { "epoch": 0.96, "learning_rate": 0.000109449294018263, "loss": 2.4069, "step": 12575 }, { "epoch": 0.96, "learning_rate": 0.00010940906713866206, "loss": 2.6994, "step": 12580 }, { "epoch": 0.96, "learning_rate": 0.00010936884025906112, "loss": 1.9843, "step": 12585 }, { "epoch": 0.96, "learning_rate": 0.00010932861337946016, "loss": 2.8084, "step": 12590 }, { "epoch": 0.96, "learning_rate": 0.00010928838649985922, "loss": 2.6188, "step": 12595 }, { "epoch": 0.96, "learning_rate": 0.00010924815962025828, "loss": 0.5502, "step": 12600 }, { "epoch": 0.96, "learning_rate": 0.00010920793274065731, "loss": 5.0705, "step": 12605 }, { "epoch": 0.96, "learning_rate": 0.00010916770586105635, "loss": 3.4141, "step": 12610 }, { "epoch": 0.96, "learning_rate": 0.00010912747898145541, "loss": 3.7998, "step": 12615 }, { "epoch": 0.96, "learning_rate": 0.00010908725210185447, "loss": 3.3628, "step": 12620 }, { "epoch": 0.96, "learning_rate": 0.00010904702522225351, "loss": 3.8773, "step": 12625 }, { "epoch": 0.97, "learning_rate": 0.00010900679834265257, "loss": 3.2954, "step": 12630 }, { "epoch": 0.97, "learning_rate": 0.00010896657146305163, "loss": 3.2307, "step": 12635 }, { "epoch": 0.97, "learning_rate": 0.00010892634458345066, "loss": 2.1269, "step": 12640 }, { "epoch": 0.97, "learning_rate": 0.0001088861177038497, "loss": 2.2615, "step": 12645 }, { "epoch": 0.97, "learning_rate": 0.00010884589082424876, "loss": 4.0116, "step": 12650 }, { "epoch": 0.97, "learning_rate": 0.00010880566394464782, "loss": 4.5145, "step": 12655 }, { "epoch": 0.97, "learning_rate": 0.00010876543706504687, "loss": 4.0908, "step": 12660 }, { "epoch": 0.97, "learning_rate": 0.00010872521018544593, "loss": 3.6911, "step": 12665 }, { "epoch": 0.97, "learning_rate": 0.00010868498330584498, "loss": 3.7748, "step": 12670 }, { "epoch": 0.97, "learning_rate": 0.00010864475642624403, "loss": 3.6313, "step": 12675 }, { "epoch": 0.97, "learning_rate": 0.00010860452954664306, "loss": 1.8294, "step": 12680 }, { "epoch": 0.97, "learning_rate": 0.00010856430266704212, "loss": 3.2862, "step": 12685 }, { "epoch": 0.97, "learning_rate": 0.00010852407578744118, "loss": 2.1133, "step": 12690 }, { "epoch": 0.97, "learning_rate": 0.00010848384890784022, "loss": 2.6551, "step": 12695 }, { "epoch": 0.97, "learning_rate": 0.00010844362202823928, "loss": 1.0988, "step": 12700 }, { "epoch": 0.97, "learning_rate": 0.00010840339514863834, "loss": 4.7625, "step": 12705 }, { "epoch": 0.97, "learning_rate": 0.00010836316826903738, "loss": 3.9336, "step": 12710 }, { "epoch": 0.97, "learning_rate": 0.00010832294138943641, "loss": 4.7963, "step": 12715 }, { "epoch": 0.97, "learning_rate": 0.00010828271450983547, "loss": 3.2798, "step": 12720 }, { "epoch": 0.97, "learning_rate": 0.00010824248763023453, "loss": 2.7068, "step": 12725 }, { "epoch": 0.97, "learning_rate": 0.00010820226075063357, "loss": 3.1443, "step": 12730 }, { "epoch": 0.97, "learning_rate": 0.00010816203387103263, "loss": 3.0755, "step": 12735 }, { "epoch": 0.97, "learning_rate": 0.00010812180699143169, "loss": 1.3621, "step": 12740 }, { "epoch": 0.97, "learning_rate": 0.00010808158011183074, "loss": 2.0039, "step": 12745 }, { "epoch": 0.97, "learning_rate": 0.0001080413532322298, "loss": 1.3085, "step": 12750 }, { "epoch": 0.97, "learning_rate": 0.00010800112635262882, "loss": 5.2609, "step": 12755 }, { "epoch": 0.98, "learning_rate": 0.00010796089947302788, "loss": 4.3895, "step": 12760 }, { "epoch": 0.98, "learning_rate": 0.00010792067259342693, "loss": 3.3505, "step": 12765 }, { "epoch": 0.98, "learning_rate": 0.00010788044571382599, "loss": 4.2628, "step": 12770 }, { "epoch": 0.98, "learning_rate": 0.00010784021883422504, "loss": 5.0826, "step": 12775 }, { "epoch": 0.98, "learning_rate": 0.00010779999195462409, "loss": 2.538, "step": 12780 }, { "epoch": 0.98, "learning_rate": 0.00010775976507502315, "loss": 3.901, "step": 12785 }, { "epoch": 0.98, "learning_rate": 0.00010771953819542218, "loss": 1.9004, "step": 12790 }, { "epoch": 0.98, "learning_rate": 0.00010767931131582124, "loss": 0.5085, "step": 12795 }, { "epoch": 0.98, "learning_rate": 0.00010763908443622028, "loss": 3.4105, "step": 12800 }, { "epoch": 0.98, "learning_rate": 0.00010759885755661934, "loss": 3.8672, "step": 12805 }, { "epoch": 0.98, "learning_rate": 0.00010755863067701838, "loss": 4.4018, "step": 12810 }, { "epoch": 0.98, "learning_rate": 0.00010751840379741744, "loss": 3.9307, "step": 12815 }, { "epoch": 0.98, "learning_rate": 0.0001074781769178165, "loss": 4.3627, "step": 12820 }, { "epoch": 0.98, "learning_rate": 0.00010743795003821555, "loss": 3.4223, "step": 12825 }, { "epoch": 0.98, "learning_rate": 0.00010739772315861458, "loss": 2.4649, "step": 12830 }, { "epoch": 0.98, "learning_rate": 0.00010735749627901363, "loss": 1.3693, "step": 12835 }, { "epoch": 0.98, "learning_rate": 0.00010731726939941269, "loss": 1.899, "step": 12840 }, { "epoch": 0.98, "learning_rate": 0.00010727704251981174, "loss": 0.5113, "step": 12845 }, { "epoch": 0.98, "learning_rate": 0.0001072368156402108, "loss": 2.3541, "step": 12850 }, { "epoch": 0.98, "learning_rate": 0.00010719658876060985, "loss": 5.3012, "step": 12855 }, { "epoch": 0.98, "learning_rate": 0.0001071563618810089, "loss": 3.8258, "step": 12860 }, { "epoch": 0.98, "learning_rate": 0.00010711613500140793, "loss": 4.4615, "step": 12865 }, { "epoch": 0.98, "learning_rate": 0.00010707590812180699, "loss": 4.375, "step": 12870 }, { "epoch": 0.98, "learning_rate": 0.00010703568124220605, "loss": 3.2504, "step": 12875 }, { "epoch": 0.98, "learning_rate": 0.00010699545436260509, "loss": 2.7794, "step": 12880 }, { "epoch": 0.98, "learning_rate": 0.00010695522748300415, "loss": 2.613, "step": 12885 }, { "epoch": 0.99, "learning_rate": 0.00010691500060340321, "loss": 2.0133, "step": 12890 }, { "epoch": 0.99, "learning_rate": 0.00010687477372380225, "loss": 3.0668, "step": 12895 }, { "epoch": 0.99, "learning_rate": 0.00010683454684420131, "loss": 2.141, "step": 12900 }, { "epoch": 0.99, "learning_rate": 0.00010679431996460034, "loss": 3.9652, "step": 12905 }, { "epoch": 0.99, "learning_rate": 0.0001067540930849994, "loss": 5.0807, "step": 12910 }, { "epoch": 0.99, "learning_rate": 0.00010671386620539844, "loss": 4.0959, "step": 12915 }, { "epoch": 0.99, "learning_rate": 0.0001066736393257975, "loss": 2.3016, "step": 12920 }, { "epoch": 0.99, "learning_rate": 0.00010663341244619656, "loss": 3.6994, "step": 12925 }, { "epoch": 0.99, "learning_rate": 0.0001065931855665956, "loss": 3.4188, "step": 12930 }, { "epoch": 0.99, "learning_rate": 0.00010655295868699466, "loss": 2.1822, "step": 12935 }, { "epoch": 0.99, "learning_rate": 0.0001065127318073937, "loss": 2.6569, "step": 12940 }, { "epoch": 0.99, "learning_rate": 0.00010647250492779275, "loss": 2.9767, "step": 12945 }, { "epoch": 0.99, "learning_rate": 0.0001064322780481918, "loss": 0.6595, "step": 12950 }, { "epoch": 0.99, "learning_rate": 0.00010639205116859086, "loss": 4.4453, "step": 12955 }, { "epoch": 0.99, "learning_rate": 0.00010635182428898991, "loss": 4.6098, "step": 12960 }, { "epoch": 0.99, "learning_rate": 0.00010631159740938896, "loss": 4.4469, "step": 12965 }, { "epoch": 0.99, "learning_rate": 0.00010627137052978802, "loss": 4.1836, "step": 12970 }, { "epoch": 0.99, "learning_rate": 0.00010623114365018708, "loss": 3.3398, "step": 12975 }, { "epoch": 0.99, "learning_rate": 0.0001061909167705861, "loss": 3.8299, "step": 12980 }, { "epoch": 0.99, "learning_rate": 0.00010615068989098515, "loss": 2.2403, "step": 12985 }, { "epoch": 0.99, "learning_rate": 0.00010611046301138421, "loss": 1.9197, "step": 12990 }, { "epoch": 0.99, "learning_rate": 0.00010607023613178327, "loss": 1.7764, "step": 12995 }, { "epoch": 0.99, "learning_rate": 0.00010603000925218231, "loss": 3.4165, "step": 13000 }, { "epoch": 0.99, "learning_rate": 0.00010598978237258137, "loss": 5.0324, "step": 13005 }, { "epoch": 0.99, "learning_rate": 0.00010594955549298043, "loss": 4.4018, "step": 13010 }, { "epoch": 0.99, "learning_rate": 0.00010590932861337946, "loss": 4.1213, "step": 13015 }, { "epoch": 1.0, "learning_rate": 0.0001058691017337785, "loss": 4.3871, "step": 13020 }, { "epoch": 1.0, "learning_rate": 0.00010582887485417756, "loss": 2.7148, "step": 13025 }, { "epoch": 1.0, "learning_rate": 0.00010578864797457662, "loss": 2.8373, "step": 13030 }, { "epoch": 1.0, "learning_rate": 0.00010574842109497567, "loss": 2.7298, "step": 13035 }, { "epoch": 1.0, "learning_rate": 0.00010570819421537472, "loss": 0.7858, "step": 13040 }, { "epoch": 1.0, "learning_rate": 0.00010566796733577378, "loss": 1.4459, "step": 13045 }, { "epoch": 1.0, "learning_rate": 0.00010562774045617283, "loss": 2.2835, "step": 13050 }, { "epoch": 1.0, "learning_rate": 0.00010558751357657186, "loss": 4.6979, "step": 13055 }, { "epoch": 1.0, "learning_rate": 0.00010554728669697092, "loss": 3.1915, "step": 13060 }, { "epoch": 1.0, "learning_rate": 0.00010550705981736997, "loss": 2.701, "step": 13065 }, { "epoch": 1.0, "learning_rate": 0.00010546683293776902, "loss": 3.3332, "step": 13070 }, { "epoch": 1.0, "learning_rate": 0.00010542660605816808, "loss": 3.1688, "step": 13075 }, { "epoch": 1.0, "learning_rate": 0.00010538637917856714, "loss": 2.2415, "step": 13080 }, { "epoch": 1.0, "eval_exact_match": 19.315068493150687, "eval_f1": 23.273953174088238, "eval_loss": 3.1703224182128906, "eval_runtime": 132.4755, "eval_samples_per_second": 11.021, "eval_steps_per_second": 11.021, "step": 13084 }, { "epoch": 1.0, "learning_rate": 0.00010534615229896618, "loss": 5.7792, "step": 13085 }, { "epoch": 1.0, "learning_rate": 0.00010530592541936521, "loss": 3.9447, "step": 13090 }, { "epoch": 1.0, "learning_rate": 0.00010526569853976427, "loss": 3.901, "step": 13095 }, { "epoch": 1.0, "learning_rate": 0.00010522547166016333, "loss": 4.2629, "step": 13100 }, { "epoch": 1.0, "learning_rate": 0.00010518524478056237, "loss": 3.0552, "step": 13105 }, { "epoch": 1.0, "learning_rate": 0.00010514501790096143, "loss": 2.5984, "step": 13110 }, { "epoch": 1.0, "learning_rate": 0.00010510479102136048, "loss": 2.634, "step": 13115 }, { "epoch": 1.0, "learning_rate": 0.00010506456414175953, "loss": 2.7914, "step": 13120 }, { "epoch": 1.0, "learning_rate": 0.00010502433726215859, "loss": 2.0628, "step": 13125 }, { "epoch": 1.0, "learning_rate": 0.00010498411038255762, "loss": 3.1447, "step": 13130 }, { "epoch": 1.0, "learning_rate": 0.00010494388350295667, "loss": 2.7328, "step": 13135 }, { "epoch": 1.0, "learning_rate": 0.00010490365662335573, "loss": 3.76, "step": 13140 }, { "epoch": 1.0, "learning_rate": 0.00010486342974375478, "loss": 4.6025, "step": 13145 }, { "epoch": 1.01, "learning_rate": 0.00010482320286415383, "loss": 3.5472, "step": 13150 }, { "epoch": 1.01, "learning_rate": 0.00010478297598455289, "loss": 3.857, "step": 13155 }, { "epoch": 1.01, "learning_rate": 0.00010474274910495195, "loss": 2.5723, "step": 13160 }, { "epoch": 1.01, "learning_rate": 0.00010470252222535098, "loss": 2.8614, "step": 13165 }, { "epoch": 1.01, "learning_rate": 0.00010466229534575002, "loss": 2.782, "step": 13170 }, { "epoch": 1.01, "learning_rate": 0.00010462206846614908, "loss": 2.5527, "step": 13175 }, { "epoch": 1.01, "learning_rate": 0.00010458184158654814, "loss": 2.8946, "step": 13180 }, { "epoch": 1.01, "learning_rate": 0.00010454161470694718, "loss": 2.8931, "step": 13185 }, { "epoch": 1.01, "learning_rate": 0.00010450138782734624, "loss": 4.4236, "step": 13190 }, { "epoch": 1.01, "learning_rate": 0.0001044611609477453, "loss": 4.7928, "step": 13195 }, { "epoch": 1.01, "learning_rate": 0.00010442093406814434, "loss": 3.9273, "step": 13200 }, { "epoch": 1.01, "learning_rate": 0.00010438070718854338, "loss": 3.9992, "step": 13205 }, { "epoch": 1.01, "learning_rate": 0.00010434048030894243, "loss": 4.2393, "step": 13210 }, { "epoch": 1.01, "learning_rate": 0.00010430025342934149, "loss": 3.4548, "step": 13215 }, { "epoch": 1.01, "learning_rate": 0.00010426002654974054, "loss": 3.0828, "step": 13220 }, { "epoch": 1.01, "learning_rate": 0.0001042197996701396, "loss": 2.7957, "step": 13225 }, { "epoch": 1.01, "learning_rate": 0.00010417957279053865, "loss": 2.4393, "step": 13230 }, { "epoch": 1.01, "learning_rate": 0.0001041393459109377, "loss": 3.4124, "step": 13235 }, { "epoch": 1.01, "learning_rate": 0.00010409911903133673, "loss": 4.693, "step": 13240 }, { "epoch": 1.01, "learning_rate": 0.00010405889215173579, "loss": 3.4697, "step": 13245 }, { "epoch": 1.01, "learning_rate": 0.00010401866527213485, "loss": 2.767, "step": 13250 }, { "epoch": 1.01, "learning_rate": 0.00010397843839253389, "loss": 3.817, "step": 13255 }, { "epoch": 1.01, "learning_rate": 0.00010393821151293295, "loss": 1.5805, "step": 13260 }, { "epoch": 1.01, "learning_rate": 0.000103897984633332, "loss": 2.659, "step": 13265 }, { "epoch": 1.01, "learning_rate": 0.00010385775775373105, "loss": 1.4231, "step": 13270 }, { "epoch": 1.01, "learning_rate": 0.00010381753087413011, "loss": 1.9977, "step": 13275 }, { "epoch": 1.01, "learning_rate": 0.00010377730399452914, "loss": 1.0283, "step": 13280 }, { "epoch": 1.02, "learning_rate": 0.0001037370771149282, "loss": 4.241, "step": 13285 }, { "epoch": 1.02, "learning_rate": 0.00010369685023532724, "loss": 3.7126, "step": 13290 }, { "epoch": 1.02, "learning_rate": 0.0001036566233557263, "loss": 3.5291, "step": 13295 }, { "epoch": 1.02, "learning_rate": 0.00010361639647612536, "loss": 4.2404, "step": 13300 }, { "epoch": 1.02, "learning_rate": 0.0001035761695965244, "loss": 3.9266, "step": 13305 }, { "epoch": 1.02, "learning_rate": 0.00010353594271692346, "loss": 3.1577, "step": 13310 }, { "epoch": 1.02, "learning_rate": 0.0001034957158373225, "loss": 4.3291, "step": 13315 }, { "epoch": 1.02, "learning_rate": 0.00010345548895772155, "loss": 2.2583, "step": 13320 }, { "epoch": 1.02, "learning_rate": 0.0001034152620781206, "loss": 2.4731, "step": 13325 }, { "epoch": 1.02, "learning_rate": 0.00010337503519851966, "loss": 2.9766, "step": 13330 }, { "epoch": 1.02, "learning_rate": 0.00010333480831891871, "loss": 3.4053, "step": 13335 }, { "epoch": 1.02, "learning_rate": 0.00010329458143931776, "loss": 5.1129, "step": 13340 }, { "epoch": 1.02, "learning_rate": 0.00010325435455971682, "loss": 4.6836, "step": 13345 }, { "epoch": 1.02, "learning_rate": 0.00010321412768011587, "loss": 3.6654, "step": 13350 }, { "epoch": 1.02, "learning_rate": 0.0001031739008005149, "loss": 4.1396, "step": 13355 }, { "epoch": 1.02, "learning_rate": 0.00010313367392091395, "loss": 3.5745, "step": 13360 }, { "epoch": 1.02, "learning_rate": 0.00010309344704131301, "loss": 2.6486, "step": 13365 }, { "epoch": 1.02, "learning_rate": 0.00010305322016171207, "loss": 2.9922, "step": 13370 }, { "epoch": 1.02, "learning_rate": 0.00010301299328211111, "loss": 2.7838, "step": 13375 }, { "epoch": 1.02, "learning_rate": 0.00010297276640251017, "loss": 1.5658, "step": 13380 }, { "epoch": 1.02, "learning_rate": 0.00010293253952290923, "loss": 1.1819, "step": 13385 }, { "epoch": 1.02, "learning_rate": 0.00010289231264330826, "loss": 3.4799, "step": 13390 }, { "epoch": 1.02, "learning_rate": 0.0001028520857637073, "loss": 4.5635, "step": 13395 }, { "epoch": 1.02, "learning_rate": 0.00010281185888410636, "loss": 3.1668, "step": 13400 }, { "epoch": 1.02, "learning_rate": 0.00010277163200450542, "loss": 3.4604, "step": 13405 }, { "epoch": 1.02, "learning_rate": 0.00010273140512490447, "loss": 2.7437, "step": 13410 }, { "epoch": 1.03, "learning_rate": 0.00010269117824530352, "loss": 1.6988, "step": 13415 }, { "epoch": 1.03, "learning_rate": 0.00010265095136570257, "loss": 3.8053, "step": 13420 }, { "epoch": 1.03, "learning_rate": 0.00010261072448610163, "loss": 1.8165, "step": 13425 }, { "epoch": 1.03, "learning_rate": 0.00010257049760650066, "loss": 1.5206, "step": 13430 }, { "epoch": 1.03, "learning_rate": 0.00010253027072689972, "loss": 1.837, "step": 13435 }, { "epoch": 1.03, "learning_rate": 0.00010249004384729876, "loss": 3.6209, "step": 13440 }, { "epoch": 1.03, "learning_rate": 0.00010244981696769782, "loss": 5.1863, "step": 13445 }, { "epoch": 1.03, "learning_rate": 0.00010240959008809688, "loss": 3.5703, "step": 13450 }, { "epoch": 1.03, "learning_rate": 0.00010236936320849592, "loss": 4.4045, "step": 13455 }, { "epoch": 1.03, "learning_rate": 0.00010232913632889498, "loss": 2.1611, "step": 13460 }, { "epoch": 1.03, "learning_rate": 0.00010228890944929401, "loss": 1.9873, "step": 13465 }, { "epoch": 1.03, "learning_rate": 0.00010224868256969307, "loss": 2.744, "step": 13470 }, { "epoch": 1.03, "learning_rate": 0.00010220845569009211, "loss": 3.7924, "step": 13475 }, { "epoch": 1.03, "learning_rate": 0.00010216822881049117, "loss": 1.564, "step": 13480 }, { "epoch": 1.03, "learning_rate": 0.00010212800193089023, "loss": 1.5397, "step": 13485 }, { "epoch": 1.03, "learning_rate": 0.00010208777505128927, "loss": 4.3014, "step": 13490 }, { "epoch": 1.03, "learning_rate": 0.00010204754817168833, "loss": 4.2459, "step": 13495 }, { "epoch": 1.03, "learning_rate": 0.00010200732129208739, "loss": 3.9824, "step": 13500 }, { "epoch": 1.03, "learning_rate": 0.00010196709441248642, "loss": 3.8135, "step": 13505 }, { "epoch": 1.03, "learning_rate": 0.00010192686753288547, "loss": 2.5058, "step": 13510 }, { "epoch": 1.03, "learning_rate": 0.00010188664065328453, "loss": 3.7534, "step": 13515 }, { "epoch": 1.03, "learning_rate": 0.00010184641377368358, "loss": 3.8482, "step": 13520 }, { "epoch": 1.03, "learning_rate": 0.00010180618689408263, "loss": 2.4387, "step": 13525 }, { "epoch": 1.03, "learning_rate": 0.00010176596001448169, "loss": 1.9385, "step": 13530 }, { "epoch": 1.03, "learning_rate": 0.00010172573313488074, "loss": 2.6273, "step": 13535 }, { "epoch": 1.03, "learning_rate": 0.00010168550625527978, "loss": 4.2504, "step": 13540 }, { "epoch": 1.04, "learning_rate": 0.00010164527937567882, "loss": 4.3891, "step": 13545 }, { "epoch": 1.04, "learning_rate": 0.00010160505249607788, "loss": 4.7129, "step": 13550 }, { "epoch": 1.04, "learning_rate": 0.00010156482561647694, "loss": 4.8672, "step": 13555 }, { "epoch": 1.04, "learning_rate": 0.00010152459873687598, "loss": 4.241, "step": 13560 }, { "epoch": 1.04, "learning_rate": 0.00010148437185727504, "loss": 3.9742, "step": 13565 }, { "epoch": 1.04, "learning_rate": 0.0001014441449776741, "loss": 2.0136, "step": 13570 }, { "epoch": 1.04, "learning_rate": 0.00010140391809807314, "loss": 1.6322, "step": 13575 }, { "epoch": 1.04, "learning_rate": 0.00010136369121847217, "loss": 1.4756, "step": 13580 }, { "epoch": 1.04, "learning_rate": 0.00010132346433887123, "loss": 4.3438, "step": 13585 }, { "epoch": 1.04, "learning_rate": 0.00010128323745927029, "loss": 4.408, "step": 13590 }, { "epoch": 1.04, "learning_rate": 0.00010124301057966934, "loss": 3.9469, "step": 13595 }, { "epoch": 1.04, "learning_rate": 0.0001012027837000684, "loss": 3.242, "step": 13600 }, { "epoch": 1.04, "learning_rate": 0.00010116255682046745, "loss": 3.3846, "step": 13605 }, { "epoch": 1.04, "learning_rate": 0.0001011223299408665, "loss": 3.5865, "step": 13610 }, { "epoch": 1.04, "learning_rate": 0.00010108210306126553, "loss": 0.9825, "step": 13615 }, { "epoch": 1.04, "learning_rate": 0.00010104187618166459, "loss": 1.6402, "step": 13620 }, { "epoch": 1.04, "learning_rate": 0.00010100164930206364, "loss": 1.2693, "step": 13625 }, { "epoch": 1.04, "learning_rate": 0.00010096142242246269, "loss": 2.1932, "step": 13630 }, { "epoch": 1.04, "learning_rate": 0.00010092119554286175, "loss": 3.0698, "step": 13635 }, { "epoch": 1.04, "learning_rate": 0.0001008809686632608, "loss": 4.5355, "step": 13640 }, { "epoch": 1.04, "learning_rate": 0.00010084074178365985, "loss": 4.377, "step": 13645 }, { "epoch": 1.04, "learning_rate": 0.00010080051490405891, "loss": 2.7748, "step": 13650 }, { "epoch": 1.04, "learning_rate": 0.00010076028802445794, "loss": 2.7549, "step": 13655 }, { "epoch": 1.04, "learning_rate": 0.000100720061144857, "loss": 2.8492, "step": 13660 }, { "epoch": 1.04, "learning_rate": 0.00010067983426525604, "loss": 2.1549, "step": 13665 }, { "epoch": 1.04, "learning_rate": 0.0001006396073856551, "loss": 4.6738, "step": 13670 }, { "epoch": 1.05, "learning_rate": 0.00010059938050605416, "loss": 3.3374, "step": 13675 }, { "epoch": 1.05, "learning_rate": 0.0001005591536264532, "loss": 1.8652, "step": 13680 }, { "epoch": 1.05, "learning_rate": 0.00010051892674685226, "loss": 1.9717, "step": 13685 }, { "epoch": 1.05, "learning_rate": 0.00010047869986725129, "loss": 4.8148, "step": 13690 }, { "epoch": 1.05, "learning_rate": 0.00010043847298765035, "loss": 2.9995, "step": 13695 }, { "epoch": 1.05, "learning_rate": 0.0001003982461080494, "loss": 3.6135, "step": 13700 }, { "epoch": 1.05, "learning_rate": 0.00010035801922844845, "loss": 3.8224, "step": 13705 }, { "epoch": 1.05, "learning_rate": 0.00010031779234884751, "loss": 3.3087, "step": 13710 }, { "epoch": 1.05, "learning_rate": 0.00010027756546924656, "loss": 3.1735, "step": 13715 }, { "epoch": 1.05, "learning_rate": 0.00010023733858964562, "loss": 2.4742, "step": 13720 }, { "epoch": 1.05, "learning_rate": 0.00010019711171004466, "loss": 1.4694, "step": 13725 }, { "epoch": 1.05, "learning_rate": 0.00010015688483044369, "loss": 0.4333, "step": 13730 }, { "epoch": 1.05, "learning_rate": 0.00010011665795084275, "loss": 1.2572, "step": 13735 }, { "epoch": 1.05, "learning_rate": 0.00010007643107124181, "loss": 3.6918, "step": 13740 }, { "epoch": 1.05, "learning_rate": 0.00010003620419164085, "loss": 3.8943, "step": 13745 }, { "epoch": 1.05, "learning_rate": 9.999597731203991e-05, "loss": 4.2775, "step": 13750 }, { "epoch": 1.05, "learning_rate": 9.995575043243897e-05, "loss": 3.6002, "step": 13755 }, { "epoch": 1.05, "learning_rate": 9.991552355283801e-05, "loss": 2.8794, "step": 13760 }, { "epoch": 1.05, "learning_rate": 9.987529667323706e-05, "loss": 3.1105, "step": 13765 }, { "epoch": 1.05, "learning_rate": 9.983506979363612e-05, "loss": 4.7389, "step": 13770 }, { "epoch": 1.05, "learning_rate": 9.979484291403516e-05, "loss": 2.5333, "step": 13775 }, { "epoch": 1.05, "learning_rate": 9.97546160344342e-05, "loss": 2.6911, "step": 13780 }, { "epoch": 1.05, "learning_rate": 9.971438915483326e-05, "loss": 0.6666, "step": 13785 }, { "epoch": 1.05, "learning_rate": 9.967416227523232e-05, "loss": 4.4268, "step": 13790 }, { "epoch": 1.05, "learning_rate": 9.963393539563137e-05, "loss": 4.1672, "step": 13795 }, { "epoch": 1.05, "learning_rate": 9.959370851603041e-05, "loss": 3.8434, "step": 13800 }, { "epoch": 1.06, "learning_rate": 9.955348163642947e-05, "loss": 4.1248, "step": 13805 }, { "epoch": 1.06, "learning_rate": 9.951325475682853e-05, "loss": 3.2066, "step": 13810 }, { "epoch": 1.06, "learning_rate": 9.947302787722756e-05, "loss": 3.5237, "step": 13815 }, { "epoch": 1.06, "learning_rate": 9.943280099762662e-05, "loss": 3.5643, "step": 13820 }, { "epoch": 1.06, "learning_rate": 9.939257411802568e-05, "loss": 3.0314, "step": 13825 }, { "epoch": 1.06, "learning_rate": 9.935234723842472e-05, "loss": 1.1349, "step": 13830 }, { "epoch": 1.06, "learning_rate": 9.931212035882377e-05, "loss": 3.0678, "step": 13835 }, { "epoch": 1.06, "learning_rate": 9.927189347922282e-05, "loss": 3.6559, "step": 13840 }, { "epoch": 1.06, "learning_rate": 9.923166659962188e-05, "loss": 4.8559, "step": 13845 }, { "epoch": 1.06, "learning_rate": 9.919143972002091e-05, "loss": 3.8783, "step": 13850 }, { "epoch": 1.06, "learning_rate": 9.915121284041997e-05, "loss": 4.1764, "step": 13855 }, { "epoch": 1.06, "learning_rate": 9.911098596081903e-05, "loss": 3.7188, "step": 13860 }, { "epoch": 1.06, "learning_rate": 9.907075908121807e-05, "loss": 2.8204, "step": 13865 }, { "epoch": 1.06, "learning_rate": 9.903053220161712e-05, "loss": 3.4747, "step": 13870 }, { "epoch": 1.06, "learning_rate": 9.899030532201618e-05, "loss": 2.0576, "step": 13875 }, { "epoch": 1.06, "learning_rate": 9.895007844241522e-05, "loss": 1.8002, "step": 13880 }, { "epoch": 1.06, "learning_rate": 9.890985156281428e-05, "loss": 1.943, "step": 13885 }, { "epoch": 1.06, "learning_rate": 9.886962468321332e-05, "loss": 5.2752, "step": 13890 }, { "epoch": 1.06, "learning_rate": 9.882939780361238e-05, "loss": 3.9357, "step": 13895 }, { "epoch": 1.06, "learning_rate": 9.878917092401143e-05, "loss": 3.7773, "step": 13900 }, { "epoch": 1.06, "learning_rate": 9.874894404441049e-05, "loss": 5.2211, "step": 13905 }, { "epoch": 1.06, "learning_rate": 9.870871716480953e-05, "loss": 3.1906, "step": 13910 }, { "epoch": 1.06, "learning_rate": 9.866849028520858e-05, "loss": 2.1916, "step": 13915 }, { "epoch": 1.06, "learning_rate": 9.862826340560763e-05, "loss": 3.045, "step": 13920 }, { "epoch": 1.06, "learning_rate": 9.858803652600668e-05, "loss": 3.3197, "step": 13925 }, { "epoch": 1.06, "learning_rate": 9.854780964640574e-05, "loss": 1.3265, "step": 13930 }, { "epoch": 1.07, "learning_rate": 9.850758276680478e-05, "loss": 2.4261, "step": 13935 }, { "epoch": 1.07, "learning_rate": 9.846735588720384e-05, "loss": 4.1789, "step": 13940 }, { "epoch": 1.07, "learning_rate": 9.842712900760288e-05, "loss": 4.1479, "step": 13945 }, { "epoch": 1.07, "learning_rate": 9.838690212800193e-05, "loss": 3.1523, "step": 13950 }, { "epoch": 1.07, "learning_rate": 9.834667524840099e-05, "loss": 3.0566, "step": 13955 }, { "epoch": 1.07, "learning_rate": 9.830644836880005e-05, "loss": 3.6089, "step": 13960 }, { "epoch": 1.07, "learning_rate": 9.826622148919909e-05, "loss": 2.8464, "step": 13965 }, { "epoch": 1.07, "learning_rate": 9.822599460959813e-05, "loss": 1.619, "step": 13970 }, { "epoch": 1.07, "learning_rate": 9.818576772999719e-05, "loss": 2.2897, "step": 13975 }, { "epoch": 1.07, "learning_rate": 9.814554085039625e-05, "loss": 1.4078, "step": 13980 }, { "epoch": 1.07, "learning_rate": 9.810531397079528e-05, "loss": 2.4326, "step": 13985 }, { "epoch": 1.07, "learning_rate": 9.806508709119434e-05, "loss": 4.1578, "step": 13990 }, { "epoch": 1.07, "learning_rate": 9.80248602115934e-05, "loss": 4.7398, "step": 13995 }, { "epoch": 1.07, "learning_rate": 9.798463333199244e-05, "loss": 3.6618, "step": 14000 }, { "epoch": 1.07, "learning_rate": 9.794440645239149e-05, "loss": 3.7291, "step": 14005 }, { "epoch": 1.07, "learning_rate": 9.790417957279055e-05, "loss": 2.9765, "step": 14010 }, { "epoch": 1.07, "learning_rate": 9.78639526931896e-05, "loss": 3.6363, "step": 14015 }, { "epoch": 1.07, "learning_rate": 9.782372581358864e-05, "loss": 1.3663, "step": 14020 }, { "epoch": 1.07, "learning_rate": 9.77834989339877e-05, "loss": 3.0463, "step": 14025 }, { "epoch": 1.07, "learning_rate": 9.774327205438675e-05, "loss": 5.0727, "step": 14030 }, { "epoch": 1.07, "learning_rate": 9.77030451747858e-05, "loss": 1.6521, "step": 14035 }, { "epoch": 1.07, "learning_rate": 9.766281829518484e-05, "loss": 4.0453, "step": 14040 }, { "epoch": 1.07, "learning_rate": 9.76225914155839e-05, "loss": 4.0281, "step": 14045 }, { "epoch": 1.07, "learning_rate": 9.758236453598294e-05, "loss": 4.8402, "step": 14050 }, { "epoch": 1.07, "learning_rate": 9.7542137656382e-05, "loss": 4.2115, "step": 14055 }, { "epoch": 1.07, "learning_rate": 9.750191077678105e-05, "loss": 2.9648, "step": 14060 }, { "epoch": 1.07, "learning_rate": 9.74616838971801e-05, "loss": 2.7151, "step": 14065 }, { "epoch": 1.08, "learning_rate": 9.742145701757915e-05, "loss": 2.5117, "step": 14070 }, { "epoch": 1.08, "learning_rate": 9.73812301379782e-05, "loss": 1.9299, "step": 14075 }, { "epoch": 1.08, "learning_rate": 9.734100325837725e-05, "loss": 2.7252, "step": 14080 }, { "epoch": 1.08, "learning_rate": 9.73007763787763e-05, "loss": 2.3402, "step": 14085 }, { "epoch": 1.08, "learning_rate": 9.726054949917536e-05, "loss": 4.2039, "step": 14090 }, { "epoch": 1.08, "learning_rate": 9.72203226195744e-05, "loss": 4.5305, "step": 14095 }, { "epoch": 1.08, "learning_rate": 9.718009573997346e-05, "loss": 3.88, "step": 14100 }, { "epoch": 1.08, "learning_rate": 9.71398688603725e-05, "loss": 3.9152, "step": 14105 }, { "epoch": 1.08, "learning_rate": 9.709964198077156e-05, "loss": 2.9659, "step": 14110 }, { "epoch": 1.08, "learning_rate": 9.70594151011706e-05, "loss": 1.5826, "step": 14115 }, { "epoch": 1.08, "learning_rate": 9.701918822156965e-05, "loss": 2.4249, "step": 14120 }, { "epoch": 1.08, "learning_rate": 9.697896134196871e-05, "loss": 1.3763, "step": 14125 }, { "epoch": 1.08, "learning_rate": 9.693873446236777e-05, "loss": 3.5167, "step": 14130 }, { "epoch": 1.08, "learning_rate": 9.689850758276681e-05, "loss": 1.3236, "step": 14135 }, { "epoch": 1.08, "learning_rate": 9.685828070316586e-05, "loss": 3.5751, "step": 14140 }, { "epoch": 1.08, "learning_rate": 9.681805382356492e-05, "loss": 5.0203, "step": 14145 }, { "epoch": 1.08, "learning_rate": 9.677782694396396e-05, "loss": 4.1728, "step": 14150 }, { "epoch": 1.08, "learning_rate": 9.6737600064363e-05, "loss": 3.4594, "step": 14155 }, { "epoch": 1.08, "learning_rate": 9.669737318476206e-05, "loss": 3.3553, "step": 14160 }, { "epoch": 1.08, "learning_rate": 9.665714630516112e-05, "loss": 2.2479, "step": 14165 }, { "epoch": 1.08, "learning_rate": 9.661691942556017e-05, "loss": 1.877, "step": 14170 }, { "epoch": 1.08, "learning_rate": 9.657669254595921e-05, "loss": 2.6314, "step": 14175 }, { "epoch": 1.08, "learning_rate": 9.653646566635827e-05, "loss": 2.0204, "step": 14180 }, { "epoch": 1.08, "learning_rate": 9.649623878675731e-05, "loss": 0.9242, "step": 14185 }, { "epoch": 1.08, "learning_rate": 9.645601190715636e-05, "loss": 4.19, "step": 14190 }, { "epoch": 1.08, "learning_rate": 9.641578502755542e-05, "loss": 3.0912, "step": 14195 }, { "epoch": 1.09, "learning_rate": 9.637555814795447e-05, "loss": 4.9994, "step": 14200 }, { "epoch": 1.09, "learning_rate": 9.633533126835352e-05, "loss": 3.509, "step": 14205 }, { "epoch": 1.09, "learning_rate": 9.629510438875256e-05, "loss": 2.7194, "step": 14210 }, { "epoch": 1.09, "learning_rate": 9.625487750915162e-05, "loss": 2.4299, "step": 14215 }, { "epoch": 1.09, "learning_rate": 9.621465062955067e-05, "loss": 2.4398, "step": 14220 }, { "epoch": 1.09, "learning_rate": 9.617442374994971e-05, "loss": 2.8874, "step": 14225 }, { "epoch": 1.09, "learning_rate": 9.613419687034877e-05, "loss": 2.0132, "step": 14230 }, { "epoch": 1.09, "learning_rate": 9.609396999074783e-05, "loss": 2.243, "step": 14235 }, { "epoch": 1.09, "learning_rate": 9.605374311114687e-05, "loss": 4.0598, "step": 14240 }, { "epoch": 1.09, "learning_rate": 9.601351623154592e-05, "loss": 5.1547, "step": 14245 }, { "epoch": 1.09, "learning_rate": 9.597328935194498e-05, "loss": 3.9782, "step": 14250 }, { "epoch": 1.09, "learning_rate": 9.593306247234402e-05, "loss": 2.596, "step": 14255 }, { "epoch": 1.09, "learning_rate": 9.589283559274308e-05, "loss": 1.8018, "step": 14260 }, { "epoch": 1.09, "learning_rate": 9.585260871314212e-05, "loss": 3.1381, "step": 14265 }, { "epoch": 1.09, "learning_rate": 9.581238183354118e-05, "loss": 3.5054, "step": 14270 }, { "epoch": 1.09, "learning_rate": 9.577215495394023e-05, "loss": 2.9537, "step": 14275 }, { "epoch": 1.09, "learning_rate": 9.573192807433928e-05, "loss": 1.9567, "step": 14280 }, { "epoch": 1.09, "learning_rate": 9.569170119473833e-05, "loss": 2.3284, "step": 14285 }, { "epoch": 1.09, "learning_rate": 9.565147431513737e-05, "loss": 3.6228, "step": 14290 }, { "epoch": 1.09, "learning_rate": 9.561124743553643e-05, "loss": 3.4064, "step": 14295 }, { "epoch": 1.09, "learning_rate": 9.557102055593548e-05, "loss": 4.2512, "step": 14300 }, { "epoch": 1.09, "learning_rate": 9.553079367633454e-05, "loss": 3.4361, "step": 14305 }, { "epoch": 1.09, "learning_rate": 9.549056679673358e-05, "loss": 3.2916, "step": 14310 }, { "epoch": 1.09, "learning_rate": 9.545033991713264e-05, "loss": 4.3375, "step": 14315 }, { "epoch": 1.09, "learning_rate": 9.541011303753168e-05, "loss": 1.8709, "step": 14320 }, { "epoch": 1.09, "learning_rate": 9.536988615793073e-05, "loss": 1.8777, "step": 14325 }, { "epoch": 1.1, "learning_rate": 9.532965927832979e-05, "loss": 3.9809, "step": 14330 }, { "epoch": 1.1, "learning_rate": 9.528943239872884e-05, "loss": 4.1262, "step": 14335 }, { "epoch": 1.1, "learning_rate": 9.524920551912788e-05, "loss": 4.7285, "step": 14340 }, { "epoch": 1.1, "learning_rate": 9.520897863952693e-05, "loss": 4.2479, "step": 14345 }, { "epoch": 1.1, "learning_rate": 9.516875175992599e-05, "loss": 3.2966, "step": 14350 }, { "epoch": 1.1, "learning_rate": 9.512852488032504e-05, "loss": 3.9391, "step": 14355 }, { "epoch": 1.1, "learning_rate": 9.508829800072408e-05, "loss": 3.4237, "step": 14360 }, { "epoch": 1.1, "learning_rate": 9.504807112112314e-05, "loss": 2.8419, "step": 14365 }, { "epoch": 1.1, "learning_rate": 9.50078442415222e-05, "loss": 3.4014, "step": 14370 }, { "epoch": 1.1, "learning_rate": 9.496761736192123e-05, "loss": 2.3402, "step": 14375 }, { "epoch": 1.1, "learning_rate": 9.492739048232029e-05, "loss": 2.5134, "step": 14380 }, { "epoch": 1.1, "learning_rate": 9.488716360271935e-05, "loss": 2.5162, "step": 14385 }, { "epoch": 1.1, "learning_rate": 9.484693672311839e-05, "loss": 4.0307, "step": 14390 }, { "epoch": 1.1, "learning_rate": 9.480670984351743e-05, "loss": 4.8123, "step": 14395 }, { "epoch": 1.1, "learning_rate": 9.476648296391649e-05, "loss": 3.1288, "step": 14400 }, { "epoch": 1.1, "learning_rate": 9.472625608431555e-05, "loss": 3.1115, "step": 14405 }, { "epoch": 1.1, "learning_rate": 9.46860292047146e-05, "loss": 2.6729, "step": 14410 }, { "epoch": 1.1, "learning_rate": 9.464580232511364e-05, "loss": 1.9552, "step": 14415 }, { "epoch": 1.1, "learning_rate": 9.46055754455127e-05, "loss": 3.342, "step": 14420 }, { "epoch": 1.1, "learning_rate": 9.456534856591174e-05, "loss": 0.694, "step": 14425 }, { "epoch": 1.1, "learning_rate": 9.45251216863108e-05, "loss": 1.77, "step": 14430 }, { "epoch": 1.1, "learning_rate": 9.448489480670985e-05, "loss": 1.6854, "step": 14435 }, { "epoch": 1.1, "learning_rate": 9.44446679271089e-05, "loss": 4.6209, "step": 14440 }, { "epoch": 1.1, "learning_rate": 9.440444104750795e-05, "loss": 4.9201, "step": 14445 }, { "epoch": 1.1, "learning_rate": 9.4364214167907e-05, "loss": 4.5273, "step": 14450 }, { "epoch": 1.1, "learning_rate": 9.432398728830605e-05, "loss": 3.6521, "step": 14455 }, { "epoch": 1.11, "learning_rate": 9.42837604087051e-05, "loss": 3.382, "step": 14460 }, { "epoch": 1.11, "learning_rate": 9.424353352910416e-05, "loss": 2.8238, "step": 14465 }, { "epoch": 1.11, "learning_rate": 9.42033066495032e-05, "loss": 3.1938, "step": 14470 }, { "epoch": 1.11, "learning_rate": 9.416307976990226e-05, "loss": 1.7498, "step": 14475 }, { "epoch": 1.11, "learning_rate": 9.41228528903013e-05, "loss": 2.284, "step": 14480 }, { "epoch": 1.11, "learning_rate": 9.408262601070036e-05, "loss": 2.6607, "step": 14485 }, { "epoch": 1.11, "learning_rate": 9.40423991310994e-05, "loss": 3.5379, "step": 14490 }, { "epoch": 1.11, "learning_rate": 9.400217225149845e-05, "loss": 4.8506, "step": 14495 }, { "epoch": 1.11, "learning_rate": 9.396194537189751e-05, "loss": 4.3186, "step": 14500 }, { "epoch": 1.11, "learning_rate": 9.392171849229657e-05, "loss": 4.0508, "step": 14505 }, { "epoch": 1.11, "learning_rate": 9.38814916126956e-05, "loss": 3.2298, "step": 14510 }, { "epoch": 1.11, "learning_rate": 9.384126473309466e-05, "loss": 2.3462, "step": 14515 }, { "epoch": 1.11, "learning_rate": 9.380103785349371e-05, "loss": 1.9607, "step": 14520 }, { "epoch": 1.11, "learning_rate": 9.376081097389276e-05, "loss": 1.1034, "step": 14525 }, { "epoch": 1.11, "learning_rate": 9.37205840942918e-05, "loss": 2.063, "step": 14530 }, { "epoch": 1.11, "learning_rate": 9.368035721469086e-05, "loss": 2.0346, "step": 14535 }, { "epoch": 1.11, "learning_rate": 9.364013033508992e-05, "loss": 4.0316, "step": 14540 }, { "epoch": 1.11, "learning_rate": 9.359990345548895e-05, "loss": 4.7727, "step": 14545 }, { "epoch": 1.11, "learning_rate": 9.355967657588801e-05, "loss": 4.61, "step": 14550 }, { "epoch": 1.11, "learning_rate": 9.351944969628707e-05, "loss": 3.921, "step": 14555 }, { "epoch": 1.11, "learning_rate": 9.347922281668611e-05, "loss": 3.5495, "step": 14560 }, { "epoch": 1.11, "learning_rate": 9.343899593708516e-05, "loss": 3.8674, "step": 14565 }, { "epoch": 1.11, "learning_rate": 9.339876905748422e-05, "loss": 2.3444, "step": 14570 }, { "epoch": 1.11, "learning_rate": 9.335854217788327e-05, "loss": 2.3662, "step": 14575 }, { "epoch": 1.11, "learning_rate": 9.331831529828232e-05, "loss": 1.6259, "step": 14580 }, { "epoch": 1.11, "learning_rate": 9.327808841868136e-05, "loss": 2.9731, "step": 14585 }, { "epoch": 1.12, "learning_rate": 9.323786153908042e-05, "loss": 5.4543, "step": 14590 }, { "epoch": 1.12, "learning_rate": 9.319763465947947e-05, "loss": 3.7518, "step": 14595 }, { "epoch": 1.12, "learning_rate": 9.315740777987851e-05, "loss": 2.8011, "step": 14600 }, { "epoch": 1.12, "learning_rate": 9.311718090027757e-05, "loss": 2.6506, "step": 14605 }, { "epoch": 1.12, "learning_rate": 9.307695402067663e-05, "loss": 2.9476, "step": 14610 }, { "epoch": 1.12, "learning_rate": 9.303672714107567e-05, "loss": 2.7561, "step": 14615 }, { "epoch": 1.12, "learning_rate": 9.299650026147472e-05, "loss": 1.8628, "step": 14620 }, { "epoch": 1.12, "learning_rate": 9.295627338187378e-05, "loss": 3.3484, "step": 14625 }, { "epoch": 1.12, "learning_rate": 9.291604650227282e-05, "loss": 1.8501, "step": 14630 }, { "epoch": 1.12, "learning_rate": 9.287581962267188e-05, "loss": 1.1852, "step": 14635 }, { "epoch": 1.12, "learning_rate": 9.283559274307092e-05, "loss": 3.9387, "step": 14640 }, { "epoch": 1.12, "learning_rate": 9.279536586346997e-05, "loss": 5.0062, "step": 14645 }, { "epoch": 1.12, "learning_rate": 9.275513898386903e-05, "loss": 3.9559, "step": 14650 }, { "epoch": 1.12, "learning_rate": 9.271491210426808e-05, "loss": 3.1137, "step": 14655 }, { "epoch": 1.12, "learning_rate": 9.267468522466713e-05, "loss": 2.2882, "step": 14660 }, { "epoch": 1.12, "learning_rate": 9.263445834506617e-05, "loss": 3.7979, "step": 14665 }, { "epoch": 1.12, "learning_rate": 9.259423146546523e-05, "loss": 1.49, "step": 14670 }, { "epoch": 1.12, "learning_rate": 9.255400458586428e-05, "loss": 2.9072, "step": 14675 }, { "epoch": 1.12, "learning_rate": 9.251377770626332e-05, "loss": 0.0377, "step": 14680 }, { "epoch": 1.12, "learning_rate": 9.247355082666238e-05, "loss": 0.8162, "step": 14685 }, { "epoch": 1.12, "learning_rate": 9.243332394706144e-05, "loss": 4.1197, "step": 14690 }, { "epoch": 1.12, "learning_rate": 9.239309706746048e-05, "loss": 3.5349, "step": 14695 }, { "epoch": 1.12, "learning_rate": 9.235287018785953e-05, "loss": 3.0641, "step": 14700 }, { "epoch": 1.12, "learning_rate": 9.231264330825858e-05, "loss": 3.3049, "step": 14705 }, { "epoch": 1.12, "learning_rate": 9.227241642865764e-05, "loss": 2.9953, "step": 14710 }, { "epoch": 1.12, "learning_rate": 9.223218954905667e-05, "loss": 4.6273, "step": 14715 }, { "epoch": 1.13, "learning_rate": 9.219196266945573e-05, "loss": 3.2683, "step": 14720 }, { "epoch": 1.13, "learning_rate": 9.215173578985479e-05, "loss": 1.9925, "step": 14725 }, { "epoch": 1.13, "learning_rate": 9.211150891025384e-05, "loss": 4.6559, "step": 14730 }, { "epoch": 1.13, "learning_rate": 9.207128203065288e-05, "loss": 2.2516, "step": 14735 }, { "epoch": 1.13, "learning_rate": 9.203105515105194e-05, "loss": 5.6625, "step": 14740 }, { "epoch": 1.13, "learning_rate": 9.1990828271451e-05, "loss": 3.9943, "step": 14745 }, { "epoch": 1.13, "learning_rate": 9.195060139185003e-05, "loss": 3.9695, "step": 14750 }, { "epoch": 1.13, "learning_rate": 9.191037451224909e-05, "loss": 4.2107, "step": 14755 }, { "epoch": 1.13, "learning_rate": 9.187014763264814e-05, "loss": 3.3329, "step": 14760 }, { "epoch": 1.13, "learning_rate": 9.182992075304719e-05, "loss": 3.2871, "step": 14765 }, { "epoch": 1.13, "learning_rate": 9.178969387344623e-05, "loss": 4.1098, "step": 14770 }, { "epoch": 1.13, "learning_rate": 9.174946699384529e-05, "loss": 1.0645, "step": 14775 }, { "epoch": 1.13, "learning_rate": 9.170924011424435e-05, "loss": 0.74, "step": 14780 }, { "epoch": 1.13, "learning_rate": 9.16690132346434e-05, "loss": 4.3225, "step": 14785 }, { "epoch": 1.13, "learning_rate": 9.162878635504244e-05, "loss": 4.441, "step": 14790 }, { "epoch": 1.13, "learning_rate": 9.15885594754415e-05, "loss": 3.9297, "step": 14795 }, { "epoch": 1.13, "learning_rate": 9.154833259584054e-05, "loss": 3.2412, "step": 14800 }, { "epoch": 1.13, "learning_rate": 9.15081057162396e-05, "loss": 3.4386, "step": 14805 }, { "epoch": 1.13, "learning_rate": 9.146787883663865e-05, "loss": 3.839, "step": 14810 }, { "epoch": 1.13, "learning_rate": 9.142765195703769e-05, "loss": 4.0252, "step": 14815 }, { "epoch": 1.13, "learning_rate": 9.138742507743675e-05, "loss": 2.962, "step": 14820 }, { "epoch": 1.13, "learning_rate": 9.13471981978358e-05, "loss": 2.9706, "step": 14825 }, { "epoch": 1.13, "learning_rate": 9.130697131823485e-05, "loss": 3.4539, "step": 14830 }, { "epoch": 1.13, "learning_rate": 9.12667444386339e-05, "loss": 3.3337, "step": 14835 }, { "epoch": 1.13, "learning_rate": 9.122651755903295e-05, "loss": 4.6289, "step": 14840 }, { "epoch": 1.13, "learning_rate": 9.1186290679432e-05, "loss": 4.4992, "step": 14845 }, { "epoch": 1.13, "learning_rate": 9.114606379983104e-05, "loss": 4.495, "step": 14850 }, { "epoch": 1.14, "learning_rate": 9.11058369202301e-05, "loss": 3.5725, "step": 14855 }, { "epoch": 1.14, "learning_rate": 9.106561004062916e-05, "loss": 2.8839, "step": 14860 }, { "epoch": 1.14, "learning_rate": 9.10253831610282e-05, "loss": 2.2224, "step": 14865 }, { "epoch": 1.14, "learning_rate": 9.098515628142725e-05, "loss": 2.9777, "step": 14870 }, { "epoch": 1.14, "learning_rate": 9.094492940182631e-05, "loss": 0.4112, "step": 14875 }, { "epoch": 1.14, "learning_rate": 9.090470252222537e-05, "loss": 3.0038, "step": 14880 }, { "epoch": 1.14, "learning_rate": 9.08644756426244e-05, "loss": 2.8895, "step": 14885 }, { "epoch": 1.14, "learning_rate": 9.082424876302346e-05, "loss": 4.4019, "step": 14890 }, { "epoch": 1.14, "learning_rate": 9.078402188342251e-05, "loss": 4.5066, "step": 14895 }, { "epoch": 1.14, "learning_rate": 9.074379500382156e-05, "loss": 4.3509, "step": 14900 }, { "epoch": 1.14, "learning_rate": 9.07035681242206e-05, "loss": 3.8162, "step": 14905 }, { "epoch": 1.14, "learning_rate": 9.066334124461966e-05, "loss": 2.8266, "step": 14910 }, { "epoch": 1.14, "learning_rate": 9.062311436501872e-05, "loss": 1.7993, "step": 14915 }, { "epoch": 1.14, "learning_rate": 9.058288748541775e-05, "loss": 3.0204, "step": 14920 }, { "epoch": 1.14, "learning_rate": 9.054266060581681e-05, "loss": 2.9019, "step": 14925 }, { "epoch": 1.14, "learning_rate": 9.050243372621587e-05, "loss": 1.8711, "step": 14930 }, { "epoch": 1.14, "learning_rate": 9.046220684661491e-05, "loss": 0.6589, "step": 14935 }, { "epoch": 1.14, "learning_rate": 9.042197996701396e-05, "loss": 4.4053, "step": 14940 }, { "epoch": 1.14, "learning_rate": 9.038175308741301e-05, "loss": 4.7037, "step": 14945 }, { "epoch": 1.14, "learning_rate": 9.034152620781206e-05, "loss": 3.6162, "step": 14950 }, { "epoch": 1.14, "learning_rate": 9.030129932821112e-05, "loss": 3.1366, "step": 14955 }, { "epoch": 1.14, "learning_rate": 9.026107244861016e-05, "loss": 4.0396, "step": 14960 }, { "epoch": 1.14, "learning_rate": 9.022084556900922e-05, "loss": 2.0449, "step": 14965 }, { "epoch": 1.14, "learning_rate": 9.018061868940827e-05, "loss": 2.2303, "step": 14970 }, { "epoch": 1.14, "learning_rate": 9.014039180980732e-05, "loss": 2.6239, "step": 14975 }, { "epoch": 1.14, "learning_rate": 9.010016493020637e-05, "loss": 2.1086, "step": 14980 }, { "epoch": 1.15, "learning_rate": 9.005993805060541e-05, "loss": 5.5885, "step": 14985 }, { "epoch": 1.15, "learning_rate": 9.001971117100447e-05, "loss": 4.2605, "step": 14990 }, { "epoch": 1.15, "learning_rate": 8.997948429140352e-05, "loss": 4.6322, "step": 14995 }, { "epoch": 1.15, "learning_rate": 8.993925741180257e-05, "loss": 3.768, "step": 15000 }, { "epoch": 1.15, "learning_rate": 8.989903053220162e-05, "loss": 3.6373, "step": 15005 }, { "epoch": 1.15, "learning_rate": 8.985880365260068e-05, "loss": 3.0281, "step": 15010 }, { "epoch": 1.15, "learning_rate": 8.981857677299972e-05, "loss": 3.4137, "step": 15015 }, { "epoch": 1.15, "learning_rate": 8.977834989339877e-05, "loss": 2.8749, "step": 15020 }, { "epoch": 1.15, "learning_rate": 8.973812301379782e-05, "loss": 0.759, "step": 15025 }, { "epoch": 1.15, "learning_rate": 8.969789613419688e-05, "loss": 2.8243, "step": 15030 }, { "epoch": 1.15, "learning_rate": 8.965766925459593e-05, "loss": 2.4736, "step": 15035 }, { "epoch": 1.15, "learning_rate": 8.961744237499497e-05, "loss": 4.4713, "step": 15040 }, { "epoch": 1.15, "learning_rate": 8.957721549539403e-05, "loss": 4.3701, "step": 15045 }, { "epoch": 1.15, "learning_rate": 8.953698861579309e-05, "loss": 4.4148, "step": 15050 }, { "epoch": 1.15, "learning_rate": 8.949676173619212e-05, "loss": 3.6678, "step": 15055 }, { "epoch": 1.15, "learning_rate": 8.945653485659118e-05, "loss": 3.7158, "step": 15060 }, { "epoch": 1.15, "learning_rate": 8.941630797699024e-05, "loss": 3.458, "step": 15065 }, { "epoch": 1.15, "learning_rate": 8.937608109738928e-05, "loss": 3.1514, "step": 15070 }, { "epoch": 1.15, "learning_rate": 8.933585421778833e-05, "loss": 1.927, "step": 15075 }, { "epoch": 1.15, "learning_rate": 8.929562733818738e-05, "loss": 2.2447, "step": 15080 }, { "epoch": 1.15, "learning_rate": 8.925540045858644e-05, "loss": 2.924, "step": 15085 }, { "epoch": 1.15, "learning_rate": 8.921517357898547e-05, "loss": 3.8148, "step": 15090 }, { "epoch": 1.15, "learning_rate": 8.917494669938453e-05, "loss": 3.6268, "step": 15095 }, { "epoch": 1.15, "learning_rate": 8.913471981978359e-05, "loss": 3.8186, "step": 15100 }, { "epoch": 1.15, "learning_rate": 8.909449294018263e-05, "loss": 4.7416, "step": 15105 }, { "epoch": 1.15, "learning_rate": 8.905426606058168e-05, "loss": 3.0722, "step": 15110 }, { "epoch": 1.16, "learning_rate": 8.901403918098074e-05, "loss": 2.4213, "step": 15115 }, { "epoch": 1.16, "learning_rate": 8.897381230137978e-05, "loss": 1.8222, "step": 15120 }, { "epoch": 1.16, "learning_rate": 8.893358542177884e-05, "loss": 2.871, "step": 15125 }, { "epoch": 1.16, "learning_rate": 8.889335854217789e-05, "loss": 1.8803, "step": 15130 }, { "epoch": 1.16, "learning_rate": 8.885313166257694e-05, "loss": 2.8122, "step": 15135 }, { "epoch": 1.16, "learning_rate": 8.881290478297599e-05, "loss": 3.7818, "step": 15140 }, { "epoch": 1.16, "learning_rate": 8.877267790337503e-05, "loss": 3.7229, "step": 15145 }, { "epoch": 1.16, "learning_rate": 8.873245102377409e-05, "loss": 4.0561, "step": 15150 }, { "epoch": 1.16, "learning_rate": 8.869222414417314e-05, "loss": 3.7717, "step": 15155 }, { "epoch": 1.16, "learning_rate": 8.86519972645722e-05, "loss": 3.7115, "step": 15160 }, { "epoch": 1.16, "learning_rate": 8.861177038497124e-05, "loss": 3.7303, "step": 15165 }, { "epoch": 1.16, "learning_rate": 8.85715435053703e-05, "loss": 2.5779, "step": 15170 }, { "epoch": 1.16, "learning_rate": 8.853131662576934e-05, "loss": 3.3491, "step": 15175 }, { "epoch": 1.16, "learning_rate": 8.84910897461684e-05, "loss": 1.5736, "step": 15180 }, { "epoch": 1.16, "learning_rate": 8.845086286656744e-05, "loss": 2.1361, "step": 15185 }, { "epoch": 1.16, "learning_rate": 8.841063598696649e-05, "loss": 4.5369, "step": 15190 }, { "epoch": 1.16, "learning_rate": 8.837040910736555e-05, "loss": 4.5656, "step": 15195 }, { "epoch": 1.16, "learning_rate": 8.83301822277646e-05, "loss": 2.8299, "step": 15200 }, { "epoch": 1.16, "learning_rate": 8.828995534816365e-05, "loss": 4.0764, "step": 15205 }, { "epoch": 1.16, "learning_rate": 8.82497284685627e-05, "loss": 3.6823, "step": 15210 }, { "epoch": 1.16, "learning_rate": 8.820950158896175e-05, "loss": 2.6001, "step": 15215 }, { "epoch": 1.16, "learning_rate": 8.81692747093608e-05, "loss": 2.5453, "step": 15220 }, { "epoch": 1.16, "learning_rate": 8.812904782975984e-05, "loss": 3.1553, "step": 15225 }, { "epoch": 1.16, "learning_rate": 8.80888209501589e-05, "loss": 2.2298, "step": 15230 }, { "epoch": 1.16, "learning_rate": 8.804859407055796e-05, "loss": 1.4315, "step": 15235 }, { "epoch": 1.16, "learning_rate": 8.8008367190957e-05, "loss": 4.1703, "step": 15240 }, { "epoch": 1.17, "learning_rate": 8.796814031135605e-05, "loss": 4.1615, "step": 15245 }, { "epoch": 1.17, "learning_rate": 8.792791343175511e-05, "loss": 4.0225, "step": 15250 }, { "epoch": 1.17, "learning_rate": 8.788768655215415e-05, "loss": 3.3277, "step": 15255 }, { "epoch": 1.17, "learning_rate": 8.78474596725532e-05, "loss": 4.7938, "step": 15260 }, { "epoch": 1.17, "learning_rate": 8.780723279295225e-05, "loss": 3.795, "step": 15265 }, { "epoch": 1.17, "learning_rate": 8.776700591335131e-05, "loss": 3.2113, "step": 15270 }, { "epoch": 1.17, "learning_rate": 8.772677903375036e-05, "loss": 1.447, "step": 15275 }, { "epoch": 1.17, "learning_rate": 8.76865521541494e-05, "loss": 1.6464, "step": 15280 }, { "epoch": 1.17, "learning_rate": 8.764632527454846e-05, "loss": 2.8649, "step": 15285 }, { "epoch": 1.17, "learning_rate": 8.76060983949475e-05, "loss": 4.4762, "step": 15290 }, { "epoch": 1.17, "learning_rate": 8.756587151534655e-05, "loss": 4.0934, "step": 15295 }, { "epoch": 1.17, "learning_rate": 8.752564463574561e-05, "loss": 4.1266, "step": 15300 }, { "epoch": 1.17, "learning_rate": 8.748541775614467e-05, "loss": 3.9455, "step": 15305 }, { "epoch": 1.17, "learning_rate": 8.744519087654371e-05, "loss": 2.9204, "step": 15310 }, { "epoch": 1.17, "learning_rate": 8.740496399694276e-05, "loss": 3.201, "step": 15315 }, { "epoch": 1.17, "learning_rate": 8.736473711734181e-05, "loss": 3.3204, "step": 15320 }, { "epoch": 1.17, "learning_rate": 8.732451023774086e-05, "loss": 1.0861, "step": 15325 }, { "epoch": 1.17, "learning_rate": 8.728428335813992e-05, "loss": 2.0387, "step": 15330 }, { "epoch": 1.17, "learning_rate": 8.724405647853896e-05, "loss": 2.5072, "step": 15335 }, { "epoch": 1.17, "learning_rate": 8.720382959893802e-05, "loss": 3.6967, "step": 15340 }, { "epoch": 1.17, "learning_rate": 8.716360271933706e-05, "loss": 4.2148, "step": 15345 }, { "epoch": 1.17, "learning_rate": 8.712337583973612e-05, "loss": 2.8939, "step": 15350 }, { "epoch": 1.17, "learning_rate": 8.708314896013517e-05, "loss": 3.6441, "step": 15355 }, { "epoch": 1.17, "learning_rate": 8.704292208053421e-05, "loss": 2.4613, "step": 15360 }, { "epoch": 1.17, "learning_rate": 8.700269520093327e-05, "loss": 1.0514, "step": 15365 }, { "epoch": 1.17, "learning_rate": 8.696246832133231e-05, "loss": 4.2469, "step": 15370 }, { "epoch": 1.18, "learning_rate": 8.692224144173137e-05, "loss": 3.054, "step": 15375 }, { "epoch": 1.18, "learning_rate": 8.688201456213042e-05, "loss": 1.4101, "step": 15380 }, { "epoch": 1.18, "learning_rate": 8.684178768252948e-05, "loss": 2.7099, "step": 15385 }, { "epoch": 1.18, "learning_rate": 8.680156080292852e-05, "loss": 4.5326, "step": 15390 }, { "epoch": 1.18, "learning_rate": 8.676133392332757e-05, "loss": 3.8502, "step": 15395 }, { "epoch": 1.18, "learning_rate": 8.672110704372662e-05, "loss": 3.8027, "step": 15400 }, { "epoch": 1.18, "learning_rate": 8.668088016412568e-05, "loss": 3.708, "step": 15405 }, { "epoch": 1.18, "learning_rate": 8.664065328452471e-05, "loss": 3.4665, "step": 15410 }, { "epoch": 1.18, "learning_rate": 8.660042640492377e-05, "loss": 2.2829, "step": 15415 }, { "epoch": 1.18, "learning_rate": 8.656019952532283e-05, "loss": 2.9721, "step": 15420 }, { "epoch": 1.18, "learning_rate": 8.651997264572187e-05, "loss": 2.7123, "step": 15425 }, { "epoch": 1.18, "learning_rate": 8.647974576612092e-05, "loss": 1.2022, "step": 15430 }, { "epoch": 1.18, "learning_rate": 8.643951888651998e-05, "loss": 1.9478, "step": 15435 }, { "epoch": 1.18, "learning_rate": 8.639929200691904e-05, "loss": 4.2762, "step": 15440 }, { "epoch": 1.18, "learning_rate": 8.635906512731807e-05, "loss": 3.9773, "step": 15445 }, { "epoch": 1.18, "learning_rate": 8.631883824771712e-05, "loss": 4.3887, "step": 15450 }, { "epoch": 1.18, "learning_rate": 8.627861136811618e-05, "loss": 4.1759, "step": 15455 }, { "epoch": 1.18, "learning_rate": 8.623838448851523e-05, "loss": 2.006, "step": 15460 }, { "epoch": 1.18, "learning_rate": 8.619815760891427e-05, "loss": 3.5844, "step": 15465 }, { "epoch": 1.18, "learning_rate": 8.615793072931333e-05, "loss": 4.083, "step": 15470 }, { "epoch": 1.18, "learning_rate": 8.611770384971239e-05, "loss": 2.3228, "step": 15475 }, { "epoch": 1.18, "learning_rate": 8.607747697011143e-05, "loss": 3.9055, "step": 15480 }, { "epoch": 1.18, "learning_rate": 8.603725009051048e-05, "loss": 1.9998, "step": 15485 }, { "epoch": 1.18, "learning_rate": 8.599702321090954e-05, "loss": 4.3775, "step": 15490 }, { "epoch": 1.18, "learning_rate": 8.595679633130858e-05, "loss": 5.6619, "step": 15495 }, { "epoch": 1.18, "learning_rate": 8.591656945170764e-05, "loss": 3.8799, "step": 15500 }, { "epoch": 1.19, "learning_rate": 8.587634257210668e-05, "loss": 4.2123, "step": 15505 }, { "epoch": 1.19, "learning_rate": 8.583611569250574e-05, "loss": 3.1585, "step": 15510 }, { "epoch": 1.19, "learning_rate": 8.579588881290479e-05, "loss": 3.1555, "step": 15515 }, { "epoch": 1.19, "learning_rate": 8.575566193330383e-05, "loss": 2.6523, "step": 15520 }, { "epoch": 1.19, "learning_rate": 8.571543505370289e-05, "loss": 1.6222, "step": 15525 }, { "epoch": 1.19, "learning_rate": 8.567520817410193e-05, "loss": 1.5623, "step": 15530 }, { "epoch": 1.19, "learning_rate": 8.563498129450099e-05, "loss": 2.1904, "step": 15535 }, { "epoch": 1.19, "learning_rate": 8.559475441490004e-05, "loss": 4.3115, "step": 15540 }, { "epoch": 1.19, "learning_rate": 8.55545275352991e-05, "loss": 4.4436, "step": 15545 }, { "epoch": 1.19, "learning_rate": 8.551430065569814e-05, "loss": 4.1313, "step": 15550 }, { "epoch": 1.19, "learning_rate": 8.54740737760972e-05, "loss": 3.7244, "step": 15555 }, { "epoch": 1.19, "learning_rate": 8.543384689649624e-05, "loss": 1.7071, "step": 15560 }, { "epoch": 1.19, "learning_rate": 8.539362001689529e-05, "loss": 2.7745, "step": 15565 }, { "epoch": 1.19, "learning_rate": 8.536143851321453e-05, "loss": 3.1961, "step": 15570 }, { "epoch": 1.19, "learning_rate": 8.532121163361358e-05, "loss": 1.68, "step": 15575 }, { "epoch": 1.19, "learning_rate": 8.528098475401264e-05, "loss": 2.2349, "step": 15580 }, { "epoch": 1.19, "learning_rate": 8.524075787441168e-05, "loss": 3.1614, "step": 15585 }, { "epoch": 1.19, "learning_rate": 8.520053099481074e-05, "loss": 5.0854, "step": 15590 }, { "epoch": 1.19, "learning_rate": 8.516030411520979e-05, "loss": 4.314, "step": 15595 }, { "epoch": 1.19, "learning_rate": 8.512007723560884e-05, "loss": 3.1324, "step": 15600 }, { "epoch": 1.19, "learning_rate": 8.507985035600789e-05, "loss": 4.0867, "step": 15605 }, { "epoch": 1.19, "learning_rate": 8.503962347640693e-05, "loss": 2.0064, "step": 15610 }, { "epoch": 1.19, "learning_rate": 8.499939659680599e-05, "loss": 2.5377, "step": 15615 }, { "epoch": 1.19, "learning_rate": 8.495916971720505e-05, "loss": 3.2949, "step": 15620 }, { "epoch": 1.19, "learning_rate": 8.491894283760408e-05, "loss": 1.9116, "step": 15625 }, { "epoch": 1.19, "learning_rate": 8.487871595800314e-05, "loss": 1.14, "step": 15630 }, { "epoch": 1.19, "learning_rate": 8.48384890784022e-05, "loss": 4.8432, "step": 15635 }, { "epoch": 1.2, "learning_rate": 8.479826219880124e-05, "loss": 4.5416, "step": 15640 }, { "epoch": 1.2, "learning_rate": 8.475803531920029e-05, "loss": 4.0746, "step": 15645 }, { "epoch": 1.2, "learning_rate": 8.471780843959934e-05, "loss": 3.4386, "step": 15650 }, { "epoch": 1.2, "learning_rate": 8.46775815599984e-05, "loss": 3.0038, "step": 15655 }, { "epoch": 1.2, "learning_rate": 8.463735468039743e-05, "loss": 3.2941, "step": 15660 }, { "epoch": 1.2, "learning_rate": 8.459712780079649e-05, "loss": 3.2194, "step": 15665 }, { "epoch": 1.2, "learning_rate": 8.455690092119555e-05, "loss": 1.8783, "step": 15670 }, { "epoch": 1.2, "learning_rate": 8.45166740415946e-05, "loss": 2.5379, "step": 15675 }, { "epoch": 1.2, "learning_rate": 8.447644716199364e-05, "loss": 2.8854, "step": 15680 }, { "epoch": 1.2, "learning_rate": 8.44362202823927e-05, "loss": 2.5177, "step": 15685 }, { "epoch": 1.2, "learning_rate": 8.439599340279176e-05, "loss": 4.4383, "step": 15690 }, { "epoch": 1.2, "learning_rate": 8.43557665231908e-05, "loss": 3.7588, "step": 15695 }, { "epoch": 1.2, "learning_rate": 8.431553964358985e-05, "loss": 4.1584, "step": 15700 }, { "epoch": 1.2, "learning_rate": 8.42753127639889e-05, "loss": 4.5521, "step": 15705 }, { "epoch": 1.2, "learning_rate": 8.423508588438795e-05, "loss": 3.7143, "step": 15710 }, { "epoch": 1.2, "learning_rate": 8.419485900478701e-05, "loss": 3.5352, "step": 15715 }, { "epoch": 1.2, "learning_rate": 8.415463212518605e-05, "loss": 2.5749, "step": 15720 }, { "epoch": 1.2, "learning_rate": 8.411440524558511e-05, "loss": 2.019, "step": 15725 }, { "epoch": 1.2, "learning_rate": 8.407417836598415e-05, "loss": 1.2621, "step": 15730 }, { "epoch": 1.2, "learning_rate": 8.40339514863832e-05, "loss": 1.5193, "step": 15735 }, { "epoch": 1.2, "learning_rate": 8.399372460678226e-05, "loss": 4.5795, "step": 15740 }, { "epoch": 1.2, "learning_rate": 8.39534977271813e-05, "loss": 4.4549, "step": 15745 }, { "epoch": 1.2, "learning_rate": 8.391327084758036e-05, "loss": 3.8432, "step": 15750 }, { "epoch": 1.2, "learning_rate": 8.38730439679794e-05, "loss": 4.1367, "step": 15755 }, { "epoch": 1.2, "learning_rate": 8.383281708837846e-05, "loss": 2.8366, "step": 15760 }, { "epoch": 1.2, "learning_rate": 8.379259020877751e-05, "loss": 3.5475, "step": 15765 }, { "epoch": 1.21, "learning_rate": 8.375236332917657e-05, "loss": 3.802, "step": 15770 }, { "epoch": 1.21, "learning_rate": 8.371213644957561e-05, "loss": 2.1158, "step": 15775 }, { "epoch": 1.21, "learning_rate": 8.367190956997466e-05, "loss": 2.9005, "step": 15780 }, { "epoch": 1.21, "learning_rate": 8.363168269037371e-05, "loss": 4.0268, "step": 15785 }, { "epoch": 1.21, "learning_rate": 8.359145581077277e-05, "loss": 3.5148, "step": 15790 }, { "epoch": 1.21, "learning_rate": 8.35512289311718e-05, "loss": 3.9832, "step": 15795 }, { "epoch": 1.21, "learning_rate": 8.351100205157086e-05, "loss": 4.1332, "step": 15800 }, { "epoch": 1.21, "learning_rate": 8.347077517196992e-05, "loss": 2.8575, "step": 15805 }, { "epoch": 1.21, "learning_rate": 8.343054829236896e-05, "loss": 2.4339, "step": 15810 }, { "epoch": 1.21, "learning_rate": 8.339032141276801e-05, "loss": 2.5007, "step": 15815 }, { "epoch": 1.21, "learning_rate": 8.335009453316707e-05, "loss": 2.438, "step": 15820 }, { "epoch": 1.21, "learning_rate": 8.330986765356613e-05, "loss": 2.3291, "step": 15825 }, { "epoch": 1.21, "learning_rate": 8.326964077396516e-05, "loss": 1.1762, "step": 15830 }, { "epoch": 1.21, "learning_rate": 8.322941389436422e-05, "loss": 1.7898, "step": 15835 }, { "epoch": 1.21, "learning_rate": 8.318918701476327e-05, "loss": 4.6023, "step": 15840 }, { "epoch": 1.21, "learning_rate": 8.314896013516232e-05, "loss": 4.2715, "step": 15845 }, { "epoch": 1.21, "learning_rate": 8.310873325556136e-05, "loss": 4.5984, "step": 15850 }, { "epoch": 1.21, "learning_rate": 8.306850637596042e-05, "loss": 3.9041, "step": 15855 }, { "epoch": 1.21, "learning_rate": 8.302827949635948e-05, "loss": 3.0089, "step": 15860 }, { "epoch": 1.21, "learning_rate": 8.298805261675852e-05, "loss": 3.0388, "step": 15865 }, { "epoch": 1.21, "learning_rate": 8.294782573715757e-05, "loss": 3.5033, "step": 15870 }, { "epoch": 1.21, "learning_rate": 8.290759885755663e-05, "loss": 3.0061, "step": 15875 }, { "epoch": 1.21, "learning_rate": 8.286737197795567e-05, "loss": 2.2896, "step": 15880 }, { "epoch": 1.21, "learning_rate": 8.282714509835472e-05, "loss": 2.4662, "step": 15885 }, { "epoch": 1.21, "learning_rate": 8.278691821875377e-05, "loss": 4.1061, "step": 15890 }, { "epoch": 1.21, "learning_rate": 8.274669133915283e-05, "loss": 4.6795, "step": 15895 }, { "epoch": 1.22, "learning_rate": 8.270646445955188e-05, "loss": 4.7134, "step": 15900 }, { "epoch": 1.22, "learning_rate": 8.266623757995092e-05, "loss": 3.7715, "step": 15905 }, { "epoch": 1.22, "learning_rate": 8.262601070034998e-05, "loss": 3.1307, "step": 15910 }, { "epoch": 1.22, "learning_rate": 8.258578382074903e-05, "loss": 1.9352, "step": 15915 }, { "epoch": 1.22, "learning_rate": 8.254555694114808e-05, "loss": 1.595, "step": 15920 }, { "epoch": 1.22, "learning_rate": 8.250533006154713e-05, "loss": 1.327, "step": 15925 }, { "epoch": 1.22, "learning_rate": 8.246510318194617e-05, "loss": 1.5466, "step": 15930 }, { "epoch": 1.22, "learning_rate": 8.242487630234523e-05, "loss": 2.0442, "step": 15935 }, { "epoch": 1.22, "learning_rate": 8.238464942274429e-05, "loss": 5.4271, "step": 15940 }, { "epoch": 1.22, "learning_rate": 8.234442254314333e-05, "loss": 4.067, "step": 15945 }, { "epoch": 1.22, "learning_rate": 8.230419566354238e-05, "loss": 3.5479, "step": 15950 }, { "epoch": 1.22, "learning_rate": 8.226396878394144e-05, "loss": 3.7012, "step": 15955 }, { "epoch": 1.22, "learning_rate": 8.222374190434048e-05, "loss": 3.7094, "step": 15960 }, { "epoch": 1.22, "learning_rate": 8.218351502473953e-05, "loss": 3.0054, "step": 15965 }, { "epoch": 1.22, "learning_rate": 8.214328814513858e-05, "loss": 2.5629, "step": 15970 }, { "epoch": 1.22, "learning_rate": 8.210306126553764e-05, "loss": 1.7412, "step": 15975 }, { "epoch": 1.22, "learning_rate": 8.206283438593669e-05, "loss": 2.4695, "step": 15980 }, { "epoch": 1.22, "learning_rate": 8.202260750633573e-05, "loss": 3.7052, "step": 15985 }, { "epoch": 1.22, "learning_rate": 8.198238062673479e-05, "loss": 4.3586, "step": 15990 }, { "epoch": 1.22, "learning_rate": 8.194215374713385e-05, "loss": 4.7576, "step": 15995 }, { "epoch": 1.22, "learning_rate": 8.190192686753288e-05, "loss": 4.1148, "step": 16000 }, { "epoch": 1.22, "learning_rate": 8.186169998793194e-05, "loss": 4.3701, "step": 16005 }, { "epoch": 1.22, "learning_rate": 8.1821473108331e-05, "loss": 2.6996, "step": 16010 }, { "epoch": 1.22, "learning_rate": 8.178124622873004e-05, "loss": 3.8242, "step": 16015 }, { "epoch": 1.22, "learning_rate": 8.174101934912909e-05, "loss": 1.7617, "step": 16020 }, { "epoch": 1.22, "learning_rate": 8.170079246952814e-05, "loss": 3.2437, "step": 16025 }, { "epoch": 1.23, "learning_rate": 8.16605655899272e-05, "loss": 2.0556, "step": 16030 }, { "epoch": 1.23, "learning_rate": 8.162033871032623e-05, "loss": 1.7406, "step": 16035 }, { "epoch": 1.23, "learning_rate": 8.158011183072529e-05, "loss": 3.6297, "step": 16040 }, { "epoch": 1.23, "learning_rate": 8.153988495112435e-05, "loss": 3.8234, "step": 16045 }, { "epoch": 1.23, "learning_rate": 8.14996580715234e-05, "loss": 3.1287, "step": 16050 }, { "epoch": 1.23, "learning_rate": 8.145943119192244e-05, "loss": 3.4059, "step": 16055 }, { "epoch": 1.23, "learning_rate": 8.14192043123215e-05, "loss": 3.0411, "step": 16060 }, { "epoch": 1.23, "learning_rate": 8.137897743272056e-05, "loss": 1.7471, "step": 16065 }, { "epoch": 1.23, "learning_rate": 8.13387505531196e-05, "loss": 2.7685, "step": 16070 }, { "epoch": 1.23, "learning_rate": 8.129852367351864e-05, "loss": 1.8943, "step": 16075 }, { "epoch": 1.23, "learning_rate": 8.12582967939177e-05, "loss": 2.1345, "step": 16080 }, { "epoch": 1.23, "learning_rate": 8.121806991431675e-05, "loss": 0.4706, "step": 16085 }, { "epoch": 1.23, "learning_rate": 8.11778430347158e-05, "loss": 4.9555, "step": 16090 }, { "epoch": 1.23, "learning_rate": 8.113761615511485e-05, "loss": 3.7021, "step": 16095 }, { "epoch": 1.23, "learning_rate": 8.10973892755139e-05, "loss": 3.6622, "step": 16100 }, { "epoch": 1.23, "learning_rate": 8.105716239591295e-05, "loss": 3.2216, "step": 16105 }, { "epoch": 1.23, "learning_rate": 8.1016935516312e-05, "loss": 3.7122, "step": 16110 }, { "epoch": 1.23, "learning_rate": 8.097670863671106e-05, "loss": 3.2248, "step": 16115 }, { "epoch": 1.23, "learning_rate": 8.09364817571101e-05, "loss": 2.1737, "step": 16120 }, { "epoch": 1.23, "learning_rate": 8.089625487750916e-05, "loss": 3.0768, "step": 16125 }, { "epoch": 1.23, "learning_rate": 8.08560279979082e-05, "loss": 1.7508, "step": 16130 }, { "epoch": 1.23, "learning_rate": 8.081580111830725e-05, "loss": 1.544, "step": 16135 }, { "epoch": 1.23, "learning_rate": 8.077557423870631e-05, "loss": 4.6045, "step": 16140 }, { "epoch": 1.23, "learning_rate": 8.073534735910537e-05, "loss": 4.5873, "step": 16145 }, { "epoch": 1.23, "learning_rate": 8.069512047950441e-05, "loss": 4.3631, "step": 16150 }, { "epoch": 1.23, "learning_rate": 8.065489359990345e-05, "loss": 4.2217, "step": 16155 }, { "epoch": 1.24, "learning_rate": 8.061466672030251e-05, "loss": 2.9564, "step": 16160 }, { "epoch": 1.24, "learning_rate": 8.057443984070157e-05, "loss": 3.6113, "step": 16165 }, { "epoch": 1.24, "learning_rate": 8.05342129611006e-05, "loss": 2.8933, "step": 16170 }, { "epoch": 1.24, "learning_rate": 8.049398608149966e-05, "loss": 3.0604, "step": 16175 }, { "epoch": 1.24, "learning_rate": 8.045375920189872e-05, "loss": 3.6262, "step": 16180 }, { "epoch": 1.24, "learning_rate": 8.041353232229776e-05, "loss": 3.0539, "step": 16185 }, { "epoch": 1.24, "learning_rate": 8.037330544269681e-05, "loss": 4.0107, "step": 16190 }, { "epoch": 1.24, "learning_rate": 8.033307856309587e-05, "loss": 4.325, "step": 16195 }, { "epoch": 1.24, "learning_rate": 8.029285168349492e-05, "loss": 3.9152, "step": 16200 }, { "epoch": 1.24, "learning_rate": 8.025262480389396e-05, "loss": 2.3825, "step": 16205 }, { "epoch": 1.24, "learning_rate": 8.021239792429301e-05, "loss": 3.0158, "step": 16210 }, { "epoch": 1.24, "learning_rate": 8.017217104469207e-05, "loss": 3.0349, "step": 16215 }, { "epoch": 1.24, "learning_rate": 8.013194416509112e-05, "loss": 2.9871, "step": 16220 }, { "epoch": 1.24, "learning_rate": 8.009171728549016e-05, "loss": 3.3361, "step": 16225 }, { "epoch": 1.24, "learning_rate": 8.005149040588922e-05, "loss": 2.4099, "step": 16230 }, { "epoch": 1.24, "learning_rate": 8.001126352628826e-05, "loss": 4.984, "step": 16235 }, { "epoch": 1.24, "learning_rate": 7.997103664668732e-05, "loss": 4.5197, "step": 16240 }, { "epoch": 1.24, "learning_rate": 7.993080976708637e-05, "loss": 4.6932, "step": 16245 }, { "epoch": 1.24, "learning_rate": 7.989058288748543e-05, "loss": 3.5482, "step": 16250 }, { "epoch": 1.24, "learning_rate": 7.985035600788447e-05, "loss": 4.3166, "step": 16255 }, { "epoch": 1.24, "learning_rate": 7.981012912828353e-05, "loss": 3.2842, "step": 16260 }, { "epoch": 1.24, "learning_rate": 7.976990224868257e-05, "loss": 3.2132, "step": 16265 }, { "epoch": 1.24, "learning_rate": 7.972967536908162e-05, "loss": 3.3531, "step": 16270 }, { "epoch": 1.24, "learning_rate": 7.968944848948068e-05, "loss": 3.0101, "step": 16275 }, { "epoch": 1.24, "learning_rate": 7.964922160987972e-05, "loss": 3.3757, "step": 16280 }, { "epoch": 1.24, "learning_rate": 7.960899473027878e-05, "loss": 2.4891, "step": 16285 }, { "epoch": 1.25, "learning_rate": 7.956876785067782e-05, "loss": 4.3678, "step": 16290 }, { "epoch": 1.25, "learning_rate": 7.952854097107688e-05, "loss": 4.4967, "step": 16295 }, { "epoch": 1.25, "learning_rate": 7.948831409147593e-05, "loss": 4.7766, "step": 16300 }, { "epoch": 1.25, "learning_rate": 7.944808721187497e-05, "loss": 4.4363, "step": 16305 }, { "epoch": 1.25, "learning_rate": 7.940786033227403e-05, "loss": 3.5326, "step": 16310 }, { "epoch": 1.25, "learning_rate": 7.936763345267309e-05, "loss": 2.0882, "step": 16315 }, { "epoch": 1.25, "learning_rate": 7.932740657307213e-05, "loss": 2.1091, "step": 16320 }, { "epoch": 1.25, "learning_rate": 7.928717969347118e-05, "loss": 2.316, "step": 16325 }, { "epoch": 1.25, "learning_rate": 7.924695281387024e-05, "loss": 3.159, "step": 16330 }, { "epoch": 1.25, "learning_rate": 7.92067259342693e-05, "loss": 1.6889, "step": 16335 }, { "epoch": 1.25, "learning_rate": 7.916649905466833e-05, "loss": 3.8652, "step": 16340 }, { "epoch": 1.25, "learning_rate": 7.912627217506738e-05, "loss": 4.3504, "step": 16345 }, { "epoch": 1.25, "learning_rate": 7.908604529546644e-05, "loss": 4.0221, "step": 16350 }, { "epoch": 1.25, "learning_rate": 7.904581841586549e-05, "loss": 4.374, "step": 16355 }, { "epoch": 1.25, "learning_rate": 7.900559153626453e-05, "loss": 4.5057, "step": 16360 }, { "epoch": 1.25, "learning_rate": 7.896536465666359e-05, "loss": 2.6971, "step": 16365 }, { "epoch": 1.25, "learning_rate": 7.892513777706265e-05, "loss": 1.9066, "step": 16370 }, { "epoch": 1.25, "learning_rate": 7.888491089746168e-05, "loss": 2.0673, "step": 16375 }, { "epoch": 1.25, "learning_rate": 7.884468401786074e-05, "loss": 2.7272, "step": 16380 }, { "epoch": 1.25, "learning_rate": 7.88044571382598e-05, "loss": 3.6661, "step": 16385 }, { "epoch": 1.25, "learning_rate": 7.876423025865884e-05, "loss": 4.982, "step": 16390 }, { "epoch": 1.25, "learning_rate": 7.872400337905788e-05, "loss": 3.9939, "step": 16395 }, { "epoch": 1.25, "learning_rate": 7.868377649945694e-05, "loss": 4.1607, "step": 16400 }, { "epoch": 1.25, "learning_rate": 7.864354961985599e-05, "loss": 3.9865, "step": 16405 }, { "epoch": 1.25, "learning_rate": 7.860332274025505e-05, "loss": 4.0355, "step": 16410 }, { "epoch": 1.25, "learning_rate": 7.856309586065409e-05, "loss": 3.2419, "step": 16415 }, { "epoch": 1.25, "learning_rate": 7.852286898105315e-05, "loss": 2.5692, "step": 16420 }, { "epoch": 1.26, "learning_rate": 7.84826421014522e-05, "loss": 3.0755, "step": 16425 }, { "epoch": 1.26, "learning_rate": 7.844241522185124e-05, "loss": 1.1237, "step": 16430 }, { "epoch": 1.26, "learning_rate": 7.84021883422503e-05, "loss": 2.8103, "step": 16435 }, { "epoch": 1.26, "learning_rate": 7.836196146264934e-05, "loss": 4.2614, "step": 16440 }, { "epoch": 1.26, "learning_rate": 7.83217345830484e-05, "loss": 4.7865, "step": 16445 }, { "epoch": 1.26, "learning_rate": 7.828150770344744e-05, "loss": 4.392, "step": 16450 }, { "epoch": 1.26, "learning_rate": 7.82412808238465e-05, "loss": 2.8524, "step": 16455 }, { "epoch": 1.26, "learning_rate": 7.820105394424555e-05, "loss": 2.9417, "step": 16460 }, { "epoch": 1.26, "learning_rate": 7.81608270646446e-05, "loss": 3.8519, "step": 16465 }, { "epoch": 1.26, "learning_rate": 7.812060018504365e-05, "loss": 3.798, "step": 16470 }, { "epoch": 1.26, "learning_rate": 7.80803733054427e-05, "loss": 3.2817, "step": 16475 }, { "epoch": 1.26, "learning_rate": 7.804014642584175e-05, "loss": 0.6551, "step": 16480 }, { "epoch": 1.26, "learning_rate": 7.799991954624081e-05, "loss": 0.9354, "step": 16485 }, { "epoch": 1.26, "learning_rate": 7.795969266663986e-05, "loss": 4.2775, "step": 16490 }, { "epoch": 1.26, "learning_rate": 7.79194657870389e-05, "loss": 4.2949, "step": 16495 }, { "epoch": 1.26, "learning_rate": 7.787923890743796e-05, "loss": 4.0965, "step": 16500 }, { "epoch": 1.26, "learning_rate": 7.7839012027837e-05, "loss": 3.2883, "step": 16505 }, { "epoch": 1.26, "learning_rate": 7.779878514823605e-05, "loss": 3.9234, "step": 16510 }, { "epoch": 1.26, "learning_rate": 7.77585582686351e-05, "loss": 2.299, "step": 16515 }, { "epoch": 1.26, "learning_rate": 7.771833138903416e-05, "loss": 1.2933, "step": 16520 }, { "epoch": 1.26, "learning_rate": 7.767810450943321e-05, "loss": 0.9545, "step": 16525 }, { "epoch": 1.26, "learning_rate": 7.763787762983225e-05, "loss": 2.7195, "step": 16530 }, { "epoch": 1.26, "learning_rate": 7.759765075023131e-05, "loss": 3.0816, "step": 16535 }, { "epoch": 1.26, "learning_rate": 7.755742387063036e-05, "loss": 3.9545, "step": 16540 }, { "epoch": 1.26, "learning_rate": 7.75171969910294e-05, "loss": 4.1322, "step": 16545 }, { "epoch": 1.26, "learning_rate": 7.747697011142846e-05, "loss": 4.1234, "step": 16550 }, { "epoch": 1.27, "learning_rate": 7.743674323182752e-05, "loss": 3.8441, "step": 16555 }, { "epoch": 1.27, "learning_rate": 7.739651635222656e-05, "loss": 4.11, "step": 16560 }, { "epoch": 1.27, "learning_rate": 7.735628947262561e-05, "loss": 1.9523, "step": 16565 }, { "epoch": 1.27, "learning_rate": 7.731606259302467e-05, "loss": 2.1999, "step": 16570 }, { "epoch": 1.27, "learning_rate": 7.727583571342371e-05, "loss": 2.8916, "step": 16575 }, { "epoch": 1.27, "learning_rate": 7.723560883382276e-05, "loss": 0.9832, "step": 16580 }, { "epoch": 1.27, "learning_rate": 7.719538195422181e-05, "loss": 2.7123, "step": 16585 }, { "epoch": 1.27, "learning_rate": 7.715515507462087e-05, "loss": 4.9367, "step": 16590 }, { "epoch": 1.27, "learning_rate": 7.711492819501992e-05, "loss": 3.8705, "step": 16595 }, { "epoch": 1.27, "learning_rate": 7.707470131541896e-05, "loss": 3.7537, "step": 16600 }, { "epoch": 1.27, "learning_rate": 7.703447443581802e-05, "loss": 3.0968, "step": 16605 }, { "epoch": 1.27, "learning_rate": 7.699424755621706e-05, "loss": 2.827, "step": 16610 }, { "epoch": 1.27, "learning_rate": 7.695402067661612e-05, "loss": 3.0993, "step": 16615 }, { "epoch": 1.27, "learning_rate": 7.691379379701517e-05, "loss": 0.7201, "step": 16620 }, { "epoch": 1.27, "learning_rate": 7.687356691741423e-05, "loss": 2.1911, "step": 16625 }, { "epoch": 1.27, "learning_rate": 7.683334003781327e-05, "loss": 0.3554, "step": 16630 }, { "epoch": 1.27, "learning_rate": 7.679311315821233e-05, "loss": 1.0917, "step": 16635 }, { "epoch": 1.27, "learning_rate": 7.675288627861137e-05, "loss": 4.0449, "step": 16640 }, { "epoch": 1.27, "learning_rate": 7.671265939901042e-05, "loss": 4.4773, "step": 16645 }, { "epoch": 1.27, "learning_rate": 7.667243251940948e-05, "loss": 4.5842, "step": 16650 }, { "epoch": 1.27, "learning_rate": 7.663220563980852e-05, "loss": 4.2728, "step": 16655 }, { "epoch": 1.27, "learning_rate": 7.659197876020758e-05, "loss": 3.2278, "step": 16660 }, { "epoch": 1.27, "learning_rate": 7.655175188060662e-05, "loss": 3.1791, "step": 16665 }, { "epoch": 1.27, "learning_rate": 7.651152500100568e-05, "loss": 2.4688, "step": 16670 }, { "epoch": 1.27, "learning_rate": 7.647129812140473e-05, "loss": 3.6795, "step": 16675 }, { "epoch": 1.27, "learning_rate": 7.643107124180377e-05, "loss": 1.2943, "step": 16680 }, { "epoch": 1.28, "learning_rate": 7.639084436220283e-05, "loss": 3.7789, "step": 16685 }, { "epoch": 1.28, "learning_rate": 7.635061748260189e-05, "loss": 5.2229, "step": 16690 }, { "epoch": 1.28, "learning_rate": 7.631039060300092e-05, "loss": 4.4244, "step": 16695 }, { "epoch": 1.28, "learning_rate": 7.627016372339998e-05, "loss": 2.974, "step": 16700 }, { "epoch": 1.28, "learning_rate": 7.622993684379903e-05, "loss": 3.6898, "step": 16705 }, { "epoch": 1.28, "learning_rate": 7.618970996419808e-05, "loss": 2.8105, "step": 16710 }, { "epoch": 1.28, "learning_rate": 7.614948308459712e-05, "loss": 1.7367, "step": 16715 }, { "epoch": 1.28, "learning_rate": 7.610925620499618e-05, "loss": 2.6505, "step": 16720 }, { "epoch": 1.28, "learning_rate": 7.606902932539524e-05, "loss": 1.5005, "step": 16725 }, { "epoch": 1.28, "learning_rate": 7.602880244579427e-05, "loss": 1.0236, "step": 16730 }, { "epoch": 1.28, "learning_rate": 7.598857556619333e-05, "loss": 3.0022, "step": 16735 }, { "epoch": 1.28, "learning_rate": 7.594834868659239e-05, "loss": 4.4209, "step": 16740 }, { "epoch": 1.28, "learning_rate": 7.590812180699143e-05, "loss": 4.0451, "step": 16745 }, { "epoch": 1.28, "learning_rate": 7.586789492739048e-05, "loss": 4.0252, "step": 16750 }, { "epoch": 1.28, "learning_rate": 7.582766804778954e-05, "loss": 2.3807, "step": 16755 }, { "epoch": 1.28, "learning_rate": 7.57874411681886e-05, "loss": 3.1335, "step": 16760 }, { "epoch": 1.28, "learning_rate": 7.574721428858764e-05, "loss": 3.6875, "step": 16765 }, { "epoch": 1.28, "learning_rate": 7.570698740898668e-05, "loss": 2.3758, "step": 16770 }, { "epoch": 1.28, "learning_rate": 7.566676052938574e-05, "loss": 1.6693, "step": 16775 }, { "epoch": 1.28, "learning_rate": 7.562653364978479e-05, "loss": 2.396, "step": 16780 }, { "epoch": 1.28, "learning_rate": 7.558630677018384e-05, "loss": 1.7757, "step": 16785 }, { "epoch": 1.28, "learning_rate": 7.554607989058289e-05, "loss": 4.6365, "step": 16790 }, { "epoch": 1.28, "learning_rate": 7.550585301098195e-05, "loss": 3.7377, "step": 16795 }, { "epoch": 1.28, "learning_rate": 7.546562613138099e-05, "loss": 3.5774, "step": 16800 }, { "epoch": 1.28, "learning_rate": 7.542539925178004e-05, "loss": 3.6809, "step": 16805 }, { "epoch": 1.28, "learning_rate": 7.53851723721791e-05, "loss": 3.406, "step": 16810 }, { "epoch": 1.29, "learning_rate": 7.534494549257814e-05, "loss": 2.5013, "step": 16815 }, { "epoch": 1.29, "learning_rate": 7.53047186129772e-05, "loss": 3.9091, "step": 16820 }, { "epoch": 1.29, "learning_rate": 7.526449173337624e-05, "loss": 3.2679, "step": 16825 }, { "epoch": 1.29, "learning_rate": 7.52242648537753e-05, "loss": 3.0857, "step": 16830 }, { "epoch": 1.29, "learning_rate": 7.518403797417435e-05, "loss": 3.6681, "step": 16835 }, { "epoch": 1.29, "learning_rate": 7.51438110945734e-05, "loss": 6.1348, "step": 16840 }, { "epoch": 1.29, "learning_rate": 7.510358421497245e-05, "loss": 3.2572, "step": 16845 }, { "epoch": 1.29, "learning_rate": 7.50633573353715e-05, "loss": 4.0573, "step": 16850 }, { "epoch": 1.29, "learning_rate": 7.502313045577055e-05, "loss": 3.3355, "step": 16855 }, { "epoch": 1.29, "learning_rate": 7.498290357616961e-05, "loss": 3.1018, "step": 16860 }, { "epoch": 1.29, "learning_rate": 7.494267669656864e-05, "loss": 2.3866, "step": 16865 }, { "epoch": 1.29, "learning_rate": 7.49024498169677e-05, "loss": 4.0219, "step": 16870 }, { "epoch": 1.29, "learning_rate": 7.486222293736676e-05, "loss": 1.12, "step": 16875 }, { "epoch": 1.29, "learning_rate": 7.48219960577658e-05, "loss": 1.5368, "step": 16880 }, { "epoch": 1.29, "learning_rate": 7.478176917816485e-05, "loss": 3.0138, "step": 16885 }, { "epoch": 1.29, "learning_rate": 7.47415422985639e-05, "loss": 3.6557, "step": 16890 }, { "epoch": 1.29, "learning_rate": 7.470131541896296e-05, "loss": 3.2788, "step": 16895 }, { "epoch": 1.29, "learning_rate": 7.4661088539362e-05, "loss": 3.7785, "step": 16900 }, { "epoch": 1.29, "learning_rate": 7.462086165976105e-05, "loss": 3.4871, "step": 16905 }, { "epoch": 1.29, "learning_rate": 7.458063478016011e-05, "loss": 4.8828, "step": 16910 }, { "epoch": 1.29, "learning_rate": 7.454040790055916e-05, "loss": 3.833, "step": 16915 }, { "epoch": 1.29, "learning_rate": 7.45001810209582e-05, "loss": 1.6004, "step": 16920 }, { "epoch": 1.29, "learning_rate": 7.445995414135726e-05, "loss": 2.4341, "step": 16925 }, { "epoch": 1.29, "learning_rate": 7.441972726175632e-05, "loss": 2.7167, "step": 16930 }, { "epoch": 1.29, "learning_rate": 7.437950038215536e-05, "loss": 3.296, "step": 16935 }, { "epoch": 1.29, "learning_rate": 7.43392735025544e-05, "loss": 4.0896, "step": 16940 }, { "epoch": 1.3, "learning_rate": 7.429904662295346e-05, "loss": 2.8058, "step": 16945 }, { "epoch": 1.3, "learning_rate": 7.425881974335251e-05, "loss": 2.2906, "step": 16950 }, { "epoch": 1.3, "learning_rate": 7.421859286375155e-05, "loss": 3.6217, "step": 16955 }, { "epoch": 1.3, "learning_rate": 7.417836598415061e-05, "loss": 2.6921, "step": 16960 }, { "epoch": 1.3, "learning_rate": 7.413813910454967e-05, "loss": 3.0229, "step": 16965 }, { "epoch": 1.3, "learning_rate": 7.409791222494872e-05, "loss": 2.7896, "step": 16970 }, { "epoch": 1.3, "learning_rate": 7.405768534534776e-05, "loss": 2.8448, "step": 16975 }, { "epoch": 1.3, "learning_rate": 7.401745846574682e-05, "loss": 2.2765, "step": 16980 }, { "epoch": 1.3, "learning_rate": 7.397723158614586e-05, "loss": 3.1928, "step": 16985 }, { "epoch": 1.3, "learning_rate": 7.393700470654492e-05, "loss": 4.5197, "step": 16990 }, { "epoch": 1.3, "learning_rate": 7.389677782694397e-05, "loss": 4.3378, "step": 16995 }, { "epoch": 1.3, "learning_rate": 7.385655094734301e-05, "loss": 3.5922, "step": 17000 }, { "epoch": 1.3, "learning_rate": 7.381632406774207e-05, "loss": 3.5587, "step": 17005 }, { "epoch": 1.3, "learning_rate": 7.377609718814113e-05, "loss": 3.9139, "step": 17010 }, { "epoch": 1.3, "learning_rate": 7.373587030854017e-05, "loss": 2.5823, "step": 17015 }, { "epoch": 1.3, "learning_rate": 7.369564342893922e-05, "loss": 2.5079, "step": 17020 }, { "epoch": 1.3, "learning_rate": 7.365541654933827e-05, "loss": 0.9608, "step": 17025 }, { "epoch": 1.3, "learning_rate": 7.362323504565751e-05, "loss": 2.5257, "step": 17030 }, { "epoch": 1.3, "learning_rate": 7.358300816605657e-05, "loss": 3.4347, "step": 17035 }, { "epoch": 1.3, "learning_rate": 7.354278128645561e-05, "loss": 3.7679, "step": 17040 }, { "epoch": 1.3, "learning_rate": 7.350255440685467e-05, "loss": 3.6029, "step": 17045 }, { "epoch": 1.3, "learning_rate": 7.346232752725371e-05, "loss": 3.6551, "step": 17050 }, { "epoch": 1.3, "learning_rate": 7.342210064765277e-05, "loss": 4.3367, "step": 17055 }, { "epoch": 1.3, "learning_rate": 7.338187376805182e-05, "loss": 3.6209, "step": 17060 }, { "epoch": 1.3, "learning_rate": 7.334164688845086e-05, "loss": 4.4252, "step": 17065 }, { "epoch": 1.3, "learning_rate": 7.330142000884992e-05, "loss": 3.3807, "step": 17070 }, { "epoch": 1.31, "learning_rate": 7.326119312924898e-05, "loss": 3.8533, "step": 17075 }, { "epoch": 1.31, "learning_rate": 7.322096624964801e-05, "loss": 2.6486, "step": 17080 }, { "epoch": 1.31, "learning_rate": 7.318073937004707e-05, "loss": 1.0389, "step": 17085 }, { "epoch": 1.31, "learning_rate": 7.314051249044613e-05, "loss": 4.8477, "step": 17090 }, { "epoch": 1.31, "learning_rate": 7.310028561084517e-05, "loss": 4.098, "step": 17095 }, { "epoch": 1.31, "learning_rate": 7.306005873124421e-05, "loss": 3.5662, "step": 17100 }, { "epoch": 1.31, "learning_rate": 7.301983185164327e-05, "loss": 4.3957, "step": 17105 }, { "epoch": 1.31, "learning_rate": 7.297960497204233e-05, "loss": 1.8946, "step": 17110 }, { "epoch": 1.31, "learning_rate": 7.293937809244136e-05, "loss": 4.3447, "step": 17115 }, { "epoch": 1.31, "learning_rate": 7.289915121284042e-05, "loss": 3.3158, "step": 17120 }, { "epoch": 1.31, "learning_rate": 7.285892433323948e-05, "loss": 1.9162, "step": 17125 }, { "epoch": 1.31, "learning_rate": 7.281869745363852e-05, "loss": 2.8082, "step": 17130 }, { "epoch": 1.31, "learning_rate": 7.277847057403757e-05, "loss": 2.7045, "step": 17135 }, { "epoch": 1.31, "learning_rate": 7.273824369443663e-05, "loss": 4.3832, "step": 17140 }, { "epoch": 1.31, "learning_rate": 7.269801681483568e-05, "loss": 4.2967, "step": 17145 }, { "epoch": 1.31, "learning_rate": 7.265778993523473e-05, "loss": 4.1742, "step": 17150 }, { "epoch": 1.31, "learning_rate": 7.261756305563377e-05, "loss": 4.3498, "step": 17155 }, { "epoch": 1.31, "learning_rate": 7.257733617603283e-05, "loss": 3.0135, "step": 17160 }, { "epoch": 1.31, "learning_rate": 7.253710929643188e-05, "loss": 3.2709, "step": 17165 }, { "epoch": 1.31, "learning_rate": 7.249688241683092e-05, "loss": 2.5106, "step": 17170 }, { "epoch": 1.31, "learning_rate": 7.245665553722998e-05, "loss": 1.6757, "step": 17175 }, { "epoch": 1.31, "learning_rate": 7.241642865762904e-05, "loss": 2.2027, "step": 17180 }, { "epoch": 1.31, "learning_rate": 7.237620177802808e-05, "loss": 3.2238, "step": 17185 }, { "epoch": 1.31, "learning_rate": 7.233597489842713e-05, "loss": 4.7036, "step": 17190 }, { "epoch": 1.31, "learning_rate": 7.229574801882619e-05, "loss": 4.2068, "step": 17195 }, { "epoch": 1.31, "learning_rate": 7.225552113922523e-05, "loss": 3.9623, "step": 17200 }, { "epoch": 1.31, "learning_rate": 7.221529425962429e-05, "loss": 3.6021, "step": 17205 }, { "epoch": 1.32, "learning_rate": 7.217506738002333e-05, "loss": 2.7974, "step": 17210 }, { "epoch": 1.32, "learning_rate": 7.213484050042238e-05, "loss": 3.2512, "step": 17215 }, { "epoch": 1.32, "learning_rate": 7.209461362082144e-05, "loss": 2.4599, "step": 17220 }, { "epoch": 1.32, "learning_rate": 7.20543867412205e-05, "loss": 3.6352, "step": 17225 }, { "epoch": 1.32, "learning_rate": 7.201415986161954e-05, "loss": 1.6141, "step": 17230 }, { "epoch": 1.32, "learning_rate": 7.197393298201858e-05, "loss": 1.3339, "step": 17235 }, { "epoch": 1.32, "learning_rate": 7.193370610241764e-05, "loss": 4.0625, "step": 17240 }, { "epoch": 1.32, "learning_rate": 7.189347922281669e-05, "loss": 3.9266, "step": 17245 }, { "epoch": 1.32, "learning_rate": 7.185325234321573e-05, "loss": 4.1445, "step": 17250 }, { "epoch": 1.32, "learning_rate": 7.181302546361479e-05, "loss": 4.4178, "step": 17255 }, { "epoch": 1.32, "learning_rate": 7.177279858401385e-05, "loss": 5.1043, "step": 17260 }, { "epoch": 1.32, "learning_rate": 7.173257170441289e-05, "loss": 1.8865, "step": 17265 }, { "epoch": 1.32, "learning_rate": 7.169234482481194e-05, "loss": 3.1843, "step": 17270 }, { "epoch": 1.32, "learning_rate": 7.1652117945211e-05, "loss": 3.2057, "step": 17275 }, { "epoch": 1.32, "learning_rate": 7.161189106561005e-05, "loss": 2.154, "step": 17280 }, { "epoch": 1.32, "learning_rate": 7.157166418600909e-05, "loss": 4.2513, "step": 17285 }, { "epoch": 1.32, "learning_rate": 7.153143730640814e-05, "loss": 4.2359, "step": 17290 }, { "epoch": 1.32, "learning_rate": 7.14912104268072e-05, "loss": 3.6398, "step": 17295 }, { "epoch": 1.32, "learning_rate": 7.145098354720625e-05, "loss": 3.352, "step": 17300 }, { "epoch": 1.32, "learning_rate": 7.141075666760529e-05, "loss": 2.9969, "step": 17305 }, { "epoch": 1.32, "learning_rate": 7.137052978800435e-05, "loss": 2.6333, "step": 17310 }, { "epoch": 1.32, "learning_rate": 7.133030290840341e-05, "loss": 3.7852, "step": 17315 }, { "epoch": 1.32, "learning_rate": 7.129007602880244e-05, "loss": 1.6063, "step": 17320 }, { "epoch": 1.32, "learning_rate": 7.12498491492015e-05, "loss": 3.288, "step": 17325 }, { "epoch": 1.32, "learning_rate": 7.120962226960056e-05, "loss": 1.2184, "step": 17330 }, { "epoch": 1.32, "learning_rate": 7.11693953899996e-05, "loss": 2.4909, "step": 17335 }, { "epoch": 1.33, "learning_rate": 7.112916851039864e-05, "loss": 4.035, "step": 17340 }, { "epoch": 1.33, "learning_rate": 7.10889416307977e-05, "loss": 4.6348, "step": 17345 }, { "epoch": 1.33, "learning_rate": 7.104871475119676e-05, "loss": 3.6294, "step": 17350 }, { "epoch": 1.33, "learning_rate": 7.10084878715958e-05, "loss": 4.0395, "step": 17355 }, { "epoch": 1.33, "learning_rate": 7.096826099199485e-05, "loss": 4.607, "step": 17360 }, { "epoch": 1.33, "learning_rate": 7.092803411239391e-05, "loss": 3.9953, "step": 17365 }, { "epoch": 1.33, "learning_rate": 7.088780723279295e-05, "loss": 2.5543, "step": 17370 }, { "epoch": 1.33, "learning_rate": 7.084758035319201e-05, "loss": 2.2387, "step": 17375 }, { "epoch": 1.33, "learning_rate": 7.080735347359106e-05, "loss": 1.8694, "step": 17380 }, { "epoch": 1.33, "learning_rate": 7.07671265939901e-05, "loss": 2.5382, "step": 17385 }, { "epoch": 1.33, "learning_rate": 7.072689971438916e-05, "loss": 5.0727, "step": 17390 }, { "epoch": 1.33, "learning_rate": 7.06866728347882e-05, "loss": 3.9621, "step": 17395 }, { "epoch": 1.33, "learning_rate": 7.064644595518726e-05, "loss": 4.0389, "step": 17400 }, { "epoch": 1.33, "learning_rate": 7.06062190755863e-05, "loss": 3.7287, "step": 17405 }, { "epoch": 1.33, "learning_rate": 7.056599219598536e-05, "loss": 3.1371, "step": 17410 }, { "epoch": 1.33, "learning_rate": 7.052576531638441e-05, "loss": 2.7134, "step": 17415 }, { "epoch": 1.33, "learning_rate": 7.048553843678345e-05, "loss": 3.0964, "step": 17420 }, { "epoch": 1.33, "learning_rate": 7.044531155718251e-05, "loss": 1.5899, "step": 17425 }, { "epoch": 1.33, "learning_rate": 7.040508467758157e-05, "loss": 1.634, "step": 17430 }, { "epoch": 1.33, "learning_rate": 7.036485779798062e-05, "loss": 3.0479, "step": 17435 }, { "epoch": 1.33, "learning_rate": 7.032463091837966e-05, "loss": 4.1072, "step": 17440 }, { "epoch": 1.33, "learning_rate": 7.028440403877872e-05, "loss": 3.6551, "step": 17445 }, { "epoch": 1.33, "learning_rate": 7.024417715917778e-05, "loss": 4.5043, "step": 17450 }, { "epoch": 1.33, "learning_rate": 7.020395027957681e-05, "loss": 3.7246, "step": 17455 }, { "epoch": 1.33, "learning_rate": 7.016372339997587e-05, "loss": 3.2265, "step": 17460 }, { "epoch": 1.33, "learning_rate": 7.012349652037492e-05, "loss": 3.3408, "step": 17465 }, { "epoch": 1.34, "learning_rate": 7.008326964077397e-05, "loss": 3.5267, "step": 17470 }, { "epoch": 1.34, "learning_rate": 7.004304276117301e-05, "loss": 1.8221, "step": 17475 }, { "epoch": 1.34, "learning_rate": 7.000281588157207e-05, "loss": 1.5264, "step": 17480 }, { "epoch": 1.34, "learning_rate": 6.996258900197113e-05, "loss": 2.7208, "step": 17485 }, { "epoch": 1.34, "learning_rate": 6.992236212237016e-05, "loss": 3.7732, "step": 17490 }, { "epoch": 1.34, "learning_rate": 6.988213524276922e-05, "loss": 4.3205, "step": 17495 }, { "epoch": 1.34, "learning_rate": 6.984190836316828e-05, "loss": 4.0115, "step": 17500 }, { "epoch": 1.34, "learning_rate": 6.980168148356732e-05, "loss": 3.7114, "step": 17505 }, { "epoch": 1.34, "learning_rate": 6.976145460396637e-05, "loss": 2.9249, "step": 17510 }, { "epoch": 1.34, "learning_rate": 6.972122772436543e-05, "loss": 1.8061, "step": 17515 }, { "epoch": 1.34, "learning_rate": 6.968100084476447e-05, "loss": 1.9254, "step": 17520 }, { "epoch": 1.34, "learning_rate": 6.964077396516353e-05, "loss": 0.8883, "step": 17525 }, { "epoch": 1.34, "learning_rate": 6.960054708556257e-05, "loss": 0.7865, "step": 17530 }, { "epoch": 1.34, "learning_rate": 6.956032020596163e-05, "loss": 1.5326, "step": 17535 }, { "epoch": 1.34, "learning_rate": 6.952009332636068e-05, "loss": 3.7009, "step": 17540 }, { "epoch": 1.34, "learning_rate": 6.947986644675972e-05, "loss": 3.5918, "step": 17545 }, { "epoch": 1.34, "learning_rate": 6.943963956715878e-05, "loss": 3.55, "step": 17550 }, { "epoch": 1.34, "learning_rate": 6.939941268755782e-05, "loss": 2.8442, "step": 17555 }, { "epoch": 1.34, "learning_rate": 6.935918580795688e-05, "loss": 3.8076, "step": 17560 }, { "epoch": 1.34, "learning_rate": 6.931895892835593e-05, "loss": 2.268, "step": 17565 }, { "epoch": 1.34, "learning_rate": 6.927873204875498e-05, "loss": 1.8671, "step": 17570 }, { "epoch": 1.34, "learning_rate": 6.923850516915403e-05, "loss": 3.1274, "step": 17575 }, { "epoch": 1.34, "learning_rate": 6.919827828955309e-05, "loss": 1.8582, "step": 17580 }, { "epoch": 1.34, "learning_rate": 6.915805140995213e-05, "loss": 1.7748, "step": 17585 }, { "epoch": 1.34, "learning_rate": 6.911782453035118e-05, "loss": 3.6909, "step": 17590 }, { "epoch": 1.34, "learning_rate": 6.907759765075024e-05, "loss": 4.9395, "step": 17595 }, { "epoch": 1.35, "learning_rate": 6.90373707711493e-05, "loss": 5.6717, "step": 17600 }, { "epoch": 1.35, "learning_rate": 6.899714389154834e-05, "loss": 3.5836, "step": 17605 }, { "epoch": 1.35, "learning_rate": 6.895691701194738e-05, "loss": 4.4979, "step": 17610 }, { "epoch": 1.35, "learning_rate": 6.891669013234644e-05, "loss": 2.4373, "step": 17615 }, { "epoch": 1.35, "learning_rate": 6.887646325274549e-05, "loss": 2.756, "step": 17620 }, { "epoch": 1.35, "learning_rate": 6.883623637314453e-05, "loss": 1.4593, "step": 17625 }, { "epoch": 1.35, "learning_rate": 6.879600949354359e-05, "loss": 1.1477, "step": 17630 }, { "epoch": 1.35, "learning_rate": 6.875578261394265e-05, "loss": 1.9229, "step": 17635 }, { "epoch": 1.35, "learning_rate": 6.871555573434169e-05, "loss": 4.017, "step": 17640 }, { "epoch": 1.35, "learning_rate": 6.867532885474074e-05, "loss": 5.0738, "step": 17645 }, { "epoch": 1.35, "learning_rate": 6.86351019751398e-05, "loss": 3.6027, "step": 17650 }, { "epoch": 1.35, "learning_rate": 6.859487509553885e-05, "loss": 3.1089, "step": 17655 }, { "epoch": 1.35, "learning_rate": 6.855464821593788e-05, "loss": 3.3415, "step": 17660 }, { "epoch": 1.35, "learning_rate": 6.851442133633694e-05, "loss": 3.0649, "step": 17665 }, { "epoch": 1.35, "learning_rate": 6.8474194456736e-05, "loss": 1.925, "step": 17670 }, { "epoch": 1.35, "learning_rate": 6.843396757713505e-05, "loss": 2.909, "step": 17675 }, { "epoch": 1.35, "learning_rate": 6.839374069753409e-05, "loss": 3.0972, "step": 17680 }, { "epoch": 1.35, "learning_rate": 6.835351381793315e-05, "loss": 1.8968, "step": 17685 }, { "epoch": 1.35, "learning_rate": 6.831328693833219e-05, "loss": 4.1662, "step": 17690 }, { "epoch": 1.35, "learning_rate": 6.827306005873125e-05, "loss": 3.5768, "step": 17695 }, { "epoch": 1.35, "learning_rate": 6.82328331791303e-05, "loss": 3.7977, "step": 17700 }, { "epoch": 1.35, "learning_rate": 6.819260629952935e-05, "loss": 2.5532, "step": 17705 }, { "epoch": 1.35, "learning_rate": 6.81523794199284e-05, "loss": 2.4193, "step": 17710 }, { "epoch": 1.35, "learning_rate": 6.811215254032744e-05, "loss": 2.7911, "step": 17715 }, { "epoch": 1.35, "learning_rate": 6.80719256607265e-05, "loss": 1.1723, "step": 17720 }, { "epoch": 1.35, "learning_rate": 6.803169878112555e-05, "loss": 2.1002, "step": 17725 }, { "epoch": 1.36, "learning_rate": 6.79914719015246e-05, "loss": 1.357, "step": 17730 }, { "epoch": 1.36, "learning_rate": 6.795124502192365e-05, "loss": 2.7562, "step": 17735 }, { "epoch": 1.36, "learning_rate": 6.791101814232271e-05, "loss": 3.9273, "step": 17740 }, { "epoch": 1.36, "learning_rate": 6.787079126272175e-05, "loss": 4.8758, "step": 17745 }, { "epoch": 1.36, "learning_rate": 6.783056438312081e-05, "loss": 3.9227, "step": 17750 }, { "epoch": 1.36, "learning_rate": 6.779033750351986e-05, "loss": 3.1739, "step": 17755 }, { "epoch": 1.36, "learning_rate": 6.77501106239189e-05, "loss": 3.0794, "step": 17760 }, { "epoch": 1.36, "learning_rate": 6.770988374431796e-05, "loss": 2.0279, "step": 17765 }, { "epoch": 1.36, "learning_rate": 6.766965686471702e-05, "loss": 2.2346, "step": 17770 }, { "epoch": 1.36, "learning_rate": 6.762942998511606e-05, "loss": 2.6593, "step": 17775 }, { "epoch": 1.36, "learning_rate": 6.75892031055151e-05, "loss": 0.953, "step": 17780 }, { "epoch": 1.36, "learning_rate": 6.754897622591416e-05, "loss": 2.3692, "step": 17785 }, { "epoch": 1.36, "learning_rate": 6.750874934631321e-05, "loss": 4.9299, "step": 17790 }, { "epoch": 1.36, "learning_rate": 6.746852246671225e-05, "loss": 4.4605, "step": 17795 }, { "epoch": 1.36, "learning_rate": 6.742829558711131e-05, "loss": 4.1517, "step": 17800 }, { "epoch": 1.36, "learning_rate": 6.738806870751037e-05, "loss": 2.3445, "step": 17805 }, { "epoch": 1.36, "learning_rate": 6.734784182790941e-05, "loss": 4.102, "step": 17810 }, { "epoch": 1.36, "learning_rate": 6.730761494830846e-05, "loss": 2.3169, "step": 17815 }, { "epoch": 1.36, "learning_rate": 6.726738806870752e-05, "loss": 3.781, "step": 17820 }, { "epoch": 1.36, "learning_rate": 6.722716118910656e-05, "loss": 1.5209, "step": 17825 }, { "epoch": 1.36, "learning_rate": 6.718693430950561e-05, "loss": 2.827, "step": 17830 }, { "epoch": 1.36, "learning_rate": 6.714670742990467e-05, "loss": 1.0628, "step": 17835 }, { "epoch": 1.36, "learning_rate": 6.710648055030372e-05, "loss": 4.5979, "step": 17840 }, { "epoch": 1.36, "learning_rate": 6.706625367070277e-05, "loss": 3.8928, "step": 17845 }, { "epoch": 1.36, "learning_rate": 6.702602679110181e-05, "loss": 4.7121, "step": 17850 }, { "epoch": 1.36, "learning_rate": 6.698579991150087e-05, "loss": 3.5897, "step": 17855 }, { "epoch": 1.37, "learning_rate": 6.694557303189992e-05, "loss": 2.4365, "step": 17860 }, { "epoch": 1.37, "learning_rate": 6.690534615229896e-05, "loss": 3.8054, "step": 17865 }, { "epoch": 1.37, "learning_rate": 6.686511927269802e-05, "loss": 3.1852, "step": 17870 }, { "epoch": 1.37, "learning_rate": 6.682489239309708e-05, "loss": 1.203, "step": 17875 }, { "epoch": 1.37, "learning_rate": 6.678466551349612e-05, "loss": 0.7864, "step": 17880 }, { "epoch": 1.37, "learning_rate": 6.674443863389517e-05, "loss": 1.6607, "step": 17885 }, { "epoch": 1.37, "learning_rate": 6.670421175429422e-05, "loss": 4.7062, "step": 17890 }, { "epoch": 1.37, "learning_rate": 6.666398487469327e-05, "loss": 4.7527, "step": 17895 }, { "epoch": 1.37, "learning_rate": 6.662375799509233e-05, "loss": 3.8579, "step": 17900 }, { "epoch": 1.37, "learning_rate": 6.658353111549137e-05, "loss": 3.7981, "step": 17905 }, { "epoch": 1.37, "learning_rate": 6.654330423589043e-05, "loss": 2.9869, "step": 17910 }, { "epoch": 1.37, "learning_rate": 6.650307735628948e-05, "loss": 1.7792, "step": 17915 }, { "epoch": 1.37, "learning_rate": 6.646285047668853e-05, "loss": 4.4921, "step": 17920 }, { "epoch": 1.37, "learning_rate": 6.642262359708758e-05, "loss": 3.5635, "step": 17925 }, { "epoch": 1.37, "learning_rate": 6.638239671748662e-05, "loss": 2.6001, "step": 17930 }, { "epoch": 1.37, "learning_rate": 6.634216983788568e-05, "loss": 1.0408, "step": 17935 }, { "epoch": 1.37, "learning_rate": 6.630194295828473e-05, "loss": 4.8291, "step": 17940 }, { "epoch": 1.37, "learning_rate": 6.626171607868378e-05, "loss": 4.6105, "step": 17945 }, { "epoch": 1.37, "learning_rate": 6.622148919908283e-05, "loss": 3.6875, "step": 17950 }, { "epoch": 1.37, "learning_rate": 6.618126231948189e-05, "loss": 3.7762, "step": 17955 }, { "epoch": 1.37, "learning_rate": 6.614103543988093e-05, "loss": 3.6926, "step": 17960 }, { "epoch": 1.37, "learning_rate": 6.610080856027998e-05, "loss": 4.0579, "step": 17965 }, { "epoch": 1.37, "learning_rate": 6.606058168067903e-05, "loss": 2.8567, "step": 17970 }, { "epoch": 1.37, "learning_rate": 6.602035480107809e-05, "loss": 2.1443, "step": 17975 }, { "epoch": 1.37, "learning_rate": 6.598012792147714e-05, "loss": 3.152, "step": 17980 }, { "epoch": 1.37, "learning_rate": 6.593990104187618e-05, "loss": 1.3468, "step": 17985 }, { "epoch": 1.37, "learning_rate": 6.589967416227524e-05, "loss": 4.0677, "step": 17990 }, { "epoch": 1.38, "learning_rate": 6.585944728267429e-05, "loss": 4.3828, "step": 17995 }, { "epoch": 1.38, "learning_rate": 6.581922040307333e-05, "loss": 4.085, "step": 18000 }, { "epoch": 1.38, "learning_rate": 6.577899352347239e-05, "loss": 3.9857, "step": 18005 }, { "epoch": 1.38, "learning_rate": 6.573876664387145e-05, "loss": 4.3557, "step": 18010 }, { "epoch": 1.38, "learning_rate": 6.569853976427048e-05, "loss": 3.3637, "step": 18015 }, { "epoch": 1.38, "learning_rate": 6.565831288466954e-05, "loss": 3.1275, "step": 18020 }, { "epoch": 1.38, "learning_rate": 6.56180860050686e-05, "loss": 1.9914, "step": 18025 }, { "epoch": 1.38, "learning_rate": 6.557785912546764e-05, "loss": 1.9206, "step": 18030 }, { "epoch": 1.38, "learning_rate": 6.553763224586668e-05, "loss": 2.7614, "step": 18035 }, { "epoch": 1.38, "learning_rate": 6.549740536626574e-05, "loss": 3.8361, "step": 18040 }, { "epoch": 1.38, "learning_rate": 6.54571784866648e-05, "loss": 4.5418, "step": 18045 }, { "epoch": 1.38, "learning_rate": 6.541695160706384e-05, "loss": 3.9312, "step": 18050 }, { "epoch": 1.38, "learning_rate": 6.537672472746289e-05, "loss": 2.5614, "step": 18055 }, { "epoch": 1.38, "learning_rate": 6.533649784786195e-05, "loss": 2.4895, "step": 18060 }, { "epoch": 1.38, "learning_rate": 6.529627096826099e-05, "loss": 1.8234, "step": 18065 }, { "epoch": 1.38, "learning_rate": 6.525604408866005e-05, "loss": 2.198, "step": 18070 }, { "epoch": 1.38, "learning_rate": 6.52158172090591e-05, "loss": 1.9346, "step": 18075 }, { "epoch": 1.38, "learning_rate": 6.517559032945815e-05, "loss": 1.823, "step": 18080 }, { "epoch": 1.38, "learning_rate": 6.51353634498572e-05, "loss": 1.2123, "step": 18085 }, { "epoch": 1.38, "learning_rate": 6.509513657025624e-05, "loss": 3.9965, "step": 18090 }, { "epoch": 1.38, "learning_rate": 6.50549096906553e-05, "loss": 4.3736, "step": 18095 }, { "epoch": 1.38, "learning_rate": 6.501468281105435e-05, "loss": 3.9881, "step": 18100 }, { "epoch": 1.38, "learning_rate": 6.49744559314534e-05, "loss": 3.8488, "step": 18105 }, { "epoch": 1.38, "learning_rate": 6.493422905185245e-05, "loss": 4.2275, "step": 18110 }, { "epoch": 1.38, "learning_rate": 6.48940021722515e-05, "loss": 4.3555, "step": 18115 }, { "epoch": 1.38, "learning_rate": 6.485377529265055e-05, "loss": 3.4964, "step": 18120 }, { "epoch": 1.39, "learning_rate": 6.481354841304961e-05, "loss": 2.727, "step": 18125 }, { "epoch": 1.39, "learning_rate": 6.477332153344865e-05, "loss": 3.9642, "step": 18130 }, { "epoch": 1.39, "learning_rate": 6.47330946538477e-05, "loss": 1.054, "step": 18135 }, { "epoch": 1.39, "learning_rate": 6.469286777424676e-05, "loss": 4.3533, "step": 18140 }, { "epoch": 1.39, "learning_rate": 6.465264089464582e-05, "loss": 3.6725, "step": 18145 }, { "epoch": 1.39, "learning_rate": 6.461241401504485e-05, "loss": 3.9557, "step": 18150 }, { "epoch": 1.39, "learning_rate": 6.45721871354439e-05, "loss": 4.5719, "step": 18155 }, { "epoch": 1.39, "learning_rate": 6.453196025584296e-05, "loss": 3.3895, "step": 18160 }, { "epoch": 1.39, "learning_rate": 6.449173337624201e-05, "loss": 2.3612, "step": 18165 }, { "epoch": 1.39, "learning_rate": 6.445150649664105e-05, "loss": 3.5728, "step": 18170 }, { "epoch": 1.39, "learning_rate": 6.441127961704011e-05, "loss": 1.809, "step": 18175 }, { "epoch": 1.39, "learning_rate": 6.437105273743917e-05, "loss": 2.2907, "step": 18180 }, { "epoch": 1.39, "learning_rate": 6.43308258578382e-05, "loss": 3.9573, "step": 18185 }, { "epoch": 1.39, "learning_rate": 6.429059897823726e-05, "loss": 3.91, "step": 18190 }, { "epoch": 1.39, "learning_rate": 6.425037209863632e-05, "loss": 3.9791, "step": 18195 }, { "epoch": 1.39, "learning_rate": 6.421014521903536e-05, "loss": 4.3547, "step": 18200 }, { "epoch": 1.39, "learning_rate": 6.41699183394344e-05, "loss": 4.3502, "step": 18205 }, { "epoch": 1.39, "learning_rate": 6.412969145983346e-05, "loss": 3.3553, "step": 18210 }, { "epoch": 1.39, "learning_rate": 6.408946458023252e-05, "loss": 2.7739, "step": 18215 }, { "epoch": 1.39, "learning_rate": 6.404923770063157e-05, "loss": 2.2082, "step": 18220 }, { "epoch": 1.39, "learning_rate": 6.400901082103061e-05, "loss": 1.1458, "step": 18225 }, { "epoch": 1.39, "learning_rate": 6.396878394142967e-05, "loss": 2.6004, "step": 18230 }, { "epoch": 1.39, "learning_rate": 6.392855706182871e-05, "loss": 2.9328, "step": 18235 }, { "epoch": 1.39, "learning_rate": 6.388833018222776e-05, "loss": 4.5781, "step": 18240 }, { "epoch": 1.39, "learning_rate": 6.384810330262682e-05, "loss": 4.5582, "step": 18245 }, { "epoch": 1.39, "learning_rate": 6.380787642302588e-05, "loss": 3.7113, "step": 18250 }, { "epoch": 1.4, "learning_rate": 6.376764954342492e-05, "loss": 3.8949, "step": 18255 }, { "epoch": 1.4, "learning_rate": 6.372742266382397e-05, "loss": 4.1025, "step": 18260 }, { "epoch": 1.4, "learning_rate": 6.368719578422302e-05, "loss": 4.8066, "step": 18265 }, { "epoch": 1.4, "learning_rate": 6.364696890462207e-05, "loss": 3.1322, "step": 18270 }, { "epoch": 1.4, "learning_rate": 6.360674202502113e-05, "loss": 2.281, "step": 18275 }, { "epoch": 1.4, "learning_rate": 6.356651514542017e-05, "loss": 1.8965, "step": 18280 }, { "epoch": 1.4, "learning_rate": 6.352628826581922e-05, "loss": 2.0165, "step": 18285 }, { "epoch": 1.4, "learning_rate": 6.348606138621827e-05, "loss": 4.3139, "step": 18290 }, { "epoch": 1.4, "learning_rate": 6.344583450661733e-05, "loss": 3.5159, "step": 18295 }, { "epoch": 1.4, "learning_rate": 6.340560762701638e-05, "loss": 3.6641, "step": 18300 }, { "epoch": 1.4, "learning_rate": 6.336538074741542e-05, "loss": 4.3941, "step": 18305 }, { "epoch": 1.4, "learning_rate": 6.332515386781448e-05, "loss": 4.0872, "step": 18310 }, { "epoch": 1.4, "learning_rate": 6.328492698821352e-05, "loss": 2.6069, "step": 18315 }, { "epoch": 1.4, "learning_rate": 6.324470010861257e-05, "loss": 2.309, "step": 18320 }, { "epoch": 1.4, "learning_rate": 6.320447322901163e-05, "loss": 2.1683, "step": 18325 }, { "epoch": 1.4, "learning_rate": 6.316424634941069e-05, "loss": 1.5804, "step": 18330 }, { "epoch": 1.4, "learning_rate": 6.312401946980973e-05, "loss": 2.2497, "step": 18335 }, { "epoch": 1.4, "learning_rate": 6.308379259020878e-05, "loss": 3.6252, "step": 18340 }, { "epoch": 1.4, "learning_rate": 6.304356571060783e-05, "loss": 4.0513, "step": 18345 }, { "epoch": 1.4, "learning_rate": 6.300333883100689e-05, "loss": 3.3787, "step": 18350 }, { "epoch": 1.4, "learning_rate": 6.296311195140592e-05, "loss": 3.8106, "step": 18355 }, { "epoch": 1.4, "learning_rate": 6.292288507180498e-05, "loss": 4.0287, "step": 18360 }, { "epoch": 1.4, "learning_rate": 6.288265819220404e-05, "loss": 2.5771, "step": 18365 }, { "epoch": 1.4, "learning_rate": 6.284243131260308e-05, "loss": 3.1564, "step": 18370 }, { "epoch": 1.4, "learning_rate": 6.280220443300213e-05, "loss": 0.9893, "step": 18375 }, { "epoch": 1.4, "learning_rate": 6.276197755340119e-05, "loss": 1.7055, "step": 18380 }, { "epoch": 1.41, "learning_rate": 6.272175067380025e-05, "loss": 2.0058, "step": 18385 }, { "epoch": 1.41, "learning_rate": 6.268152379419928e-05, "loss": 4.6082, "step": 18390 }, { "epoch": 1.41, "learning_rate": 6.264129691459833e-05, "loss": 4.1299, "step": 18395 }, { "epoch": 1.41, "learning_rate": 6.260107003499739e-05, "loss": 2.9067, "step": 18400 }, { "epoch": 1.41, "learning_rate": 6.256084315539644e-05, "loss": 4.0816, "step": 18405 }, { "epoch": 1.41, "learning_rate": 6.252061627579548e-05, "loss": 4.8357, "step": 18410 }, { "epoch": 1.41, "learning_rate": 6.248038939619454e-05, "loss": 3.3007, "step": 18415 }, { "epoch": 1.41, "learning_rate": 6.24401625165936e-05, "loss": 2.1316, "step": 18420 }, { "epoch": 1.41, "learning_rate": 6.239993563699264e-05, "loss": 3.2057, "step": 18425 }, { "epoch": 1.41, "learning_rate": 6.235970875739169e-05, "loss": 1.6077, "step": 18430 }, { "epoch": 1.41, "learning_rate": 6.231948187779075e-05, "loss": 1.3006, "step": 18435 }, { "epoch": 1.41, "learning_rate": 6.227925499818979e-05, "loss": 4.4477, "step": 18440 }, { "epoch": 1.41, "learning_rate": 6.223902811858885e-05, "loss": 3.8949, "step": 18445 }, { "epoch": 1.41, "learning_rate": 6.21988012389879e-05, "loss": 4.133, "step": 18450 }, { "epoch": 1.41, "learning_rate": 6.215857435938694e-05, "loss": 4.193, "step": 18455 }, { "epoch": 1.41, "learning_rate": 6.2118347479786e-05, "loss": 4.1554, "step": 18460 }, { "epoch": 1.41, "learning_rate": 6.207812060018504e-05, "loss": 3.3928, "step": 18465 }, { "epoch": 1.41, "learning_rate": 6.20378937205841e-05, "loss": 2.6005, "step": 18470 }, { "epoch": 1.41, "learning_rate": 6.199766684098314e-05, "loss": 1.3332, "step": 18475 }, { "epoch": 1.41, "learning_rate": 6.19574399613822e-05, "loss": 1.9054, "step": 18480 }, { "epoch": 1.41, "learning_rate": 6.191721308178125e-05, "loss": 1.7774, "step": 18485 }, { "epoch": 1.41, "learning_rate": 6.187698620218029e-05, "loss": 4.46, "step": 18490 }, { "epoch": 1.41, "learning_rate": 6.183675932257935e-05, "loss": 4.0219, "step": 18495 }, { "epoch": 1.41, "learning_rate": 6.179653244297841e-05, "loss": 2.6053, "step": 18500 }, { "epoch": 1.41, "learning_rate": 6.175630556337745e-05, "loss": 3.7852, "step": 18505 }, { "epoch": 1.41, "learning_rate": 6.17160786837765e-05, "loss": 3.012, "step": 18510 }, { "epoch": 1.42, "learning_rate": 6.167585180417556e-05, "loss": 4.4118, "step": 18515 }, { "epoch": 1.42, "learning_rate": 6.163562492457461e-05, "loss": 3.0597, "step": 18520 }, { "epoch": 1.42, "learning_rate": 6.159539804497365e-05, "loss": 2.179, "step": 18525 }, { "epoch": 1.42, "learning_rate": 6.15551711653727e-05, "loss": 1.4966, "step": 18530 }, { "epoch": 1.42, "learning_rate": 6.151494428577176e-05, "loss": 1.3322, "step": 18535 }, { "epoch": 1.42, "learning_rate": 6.147471740617081e-05, "loss": 3.75, "step": 18540 }, { "epoch": 1.42, "learning_rate": 6.143449052656985e-05, "loss": 4.7051, "step": 18545 }, { "epoch": 1.42, "learning_rate": 6.139426364696891e-05, "loss": 5.2428, "step": 18550 }, { "epoch": 1.42, "learning_rate": 6.135403676736797e-05, "loss": 3.5576, "step": 18555 }, { "epoch": 1.42, "learning_rate": 6.1313809887767e-05, "loss": 3.1026, "step": 18560 }, { "epoch": 1.42, "learning_rate": 6.127358300816606e-05, "loss": 4.2556, "step": 18565 }, { "epoch": 1.42, "learning_rate": 6.123335612856512e-05, "loss": 1.9716, "step": 18570 }, { "epoch": 1.42, "learning_rate": 6.119312924896416e-05, "loss": 1.7601, "step": 18575 }, { "epoch": 1.42, "learning_rate": 6.11529023693632e-05, "loss": 3.0333, "step": 18580 }, { "epoch": 1.42, "learning_rate": 6.111267548976226e-05, "loss": 1.5847, "step": 18585 }, { "epoch": 1.42, "learning_rate": 6.107244861016131e-05, "loss": 3.682, "step": 18590 }, { "epoch": 1.42, "learning_rate": 6.1032221730560366e-05, "loss": 4.1221, "step": 18595 }, { "epoch": 1.42, "learning_rate": 6.099199485095941e-05, "loss": 4.0278, "step": 18600 }, { "epoch": 1.42, "learning_rate": 6.095176797135846e-05, "loss": 2.6338, "step": 18605 }, { "epoch": 1.42, "learning_rate": 6.091154109175752e-05, "loss": 3.3207, "step": 18610 }, { "epoch": 1.42, "learning_rate": 6.087131421215656e-05, "loss": 3.9458, "step": 18615 }, { "epoch": 1.42, "learning_rate": 6.083108733255562e-05, "loss": 2.4551, "step": 18620 }, { "epoch": 1.42, "learning_rate": 6.079086045295467e-05, "loss": 1.0928, "step": 18625 }, { "epoch": 1.42, "learning_rate": 6.075063357335372e-05, "loss": 1.2878, "step": 18630 }, { "epoch": 1.42, "learning_rate": 6.0710406693752764e-05, "loss": 3.1286, "step": 18635 }, { "epoch": 1.42, "learning_rate": 6.0670179814151816e-05, "loss": 4.3492, "step": 18640 }, { "epoch": 1.43, "learning_rate": 6.0629952934550874e-05, "loss": 4.4941, "step": 18645 }, { "epoch": 1.43, "learning_rate": 6.0589726054949926e-05, "loss": 3.3197, "step": 18650 }, { "epoch": 1.43, "learning_rate": 6.054949917534897e-05, "loss": 2.8205, "step": 18655 }, { "epoch": 1.43, "learning_rate": 6.050927229574802e-05, "loss": 2.2681, "step": 18660 }, { "epoch": 1.43, "learning_rate": 6.046904541614707e-05, "loss": 2.6547, "step": 18665 }, { "epoch": 1.43, "learning_rate": 6.0428818536546125e-05, "loss": 3.5173, "step": 18670 }, { "epoch": 1.43, "learning_rate": 6.038859165694517e-05, "loss": 1.8866, "step": 18675 }, { "epoch": 1.43, "learning_rate": 6.034836477734422e-05, "loss": 2.6423, "step": 18680 }, { "epoch": 1.43, "learning_rate": 6.030813789774328e-05, "loss": 1.6941, "step": 18685 }, { "epoch": 1.43, "learning_rate": 6.026791101814232e-05, "loss": 3.9561, "step": 18690 }, { "epoch": 1.43, "learning_rate": 6.0227684138541375e-05, "loss": 4.3443, "step": 18695 }, { "epoch": 1.43, "learning_rate": 6.018745725894043e-05, "loss": 3.6342, "step": 18700 }, { "epoch": 1.43, "learning_rate": 6.014723037933948e-05, "loss": 3.3798, "step": 18705 }, { "epoch": 1.43, "learning_rate": 6.010700349973852e-05, "loss": 2.8606, "step": 18710 }, { "epoch": 1.43, "learning_rate": 6.0066776620137574e-05, "loss": 2.683, "step": 18715 }, { "epoch": 1.43, "learning_rate": 6.002654974053663e-05, "loss": 1.8671, "step": 18720 }, { "epoch": 1.43, "learning_rate": 5.9986322860935684e-05, "loss": 4.4889, "step": 18725 }, { "epoch": 1.43, "learning_rate": 5.994609598133473e-05, "loss": 2.7197, "step": 18730 }, { "epoch": 1.43, "learning_rate": 5.990586910173378e-05, "loss": 3.2302, "step": 18735 }, { "epoch": 1.43, "learning_rate": 5.986564222213283e-05, "loss": 3.9961, "step": 18740 }, { "epoch": 1.43, "learning_rate": 5.982541534253189e-05, "loss": 3.467, "step": 18745 }, { "epoch": 1.43, "learning_rate": 5.978518846293093e-05, "loss": 4.3445, "step": 18750 }, { "epoch": 1.43, "learning_rate": 5.9744961583329986e-05, "loss": 4.9615, "step": 18755 }, { "epoch": 1.43, "learning_rate": 5.970473470372904e-05, "loss": 3.1598, "step": 18760 }, { "epoch": 1.43, "learning_rate": 5.966450782412808e-05, "loss": 3.2324, "step": 18765 }, { "epoch": 1.43, "learning_rate": 5.9624280944527134e-05, "loss": 3.9107, "step": 18770 }, { "epoch": 1.43, "learning_rate": 5.9584054064926185e-05, "loss": 1.1585, "step": 18775 }, { "epoch": 1.44, "learning_rate": 5.9543827185325243e-05, "loss": 0.6935, "step": 18780 }, { "epoch": 1.44, "learning_rate": 5.950360030572428e-05, "loss": 1.9886, "step": 18785 }, { "epoch": 1.44, "learning_rate": 5.946337342612334e-05, "loss": 6.2145, "step": 18790 }, { "epoch": 1.44, "learning_rate": 5.942314654652239e-05, "loss": 4.3086, "step": 18795 }, { "epoch": 1.44, "learning_rate": 5.938291966692144e-05, "loss": 3.2976, "step": 18800 }, { "epoch": 1.44, "learning_rate": 5.934269278732049e-05, "loss": 3.9161, "step": 18805 }, { "epoch": 1.44, "learning_rate": 5.930246590771954e-05, "loss": 3.1264, "step": 18810 }, { "epoch": 1.44, "learning_rate": 5.92622390281186e-05, "loss": 4.4698, "step": 18815 }, { "epoch": 1.44, "learning_rate": 5.922201214851765e-05, "loss": 3.101, "step": 18820 }, { "epoch": 1.44, "learning_rate": 5.9181785268916686e-05, "loss": 1.7798, "step": 18825 }, { "epoch": 1.44, "learning_rate": 5.9141558389315745e-05, "loss": 2.7562, "step": 18830 }, { "epoch": 1.44, "learning_rate": 5.9101331509714796e-05, "loss": 2.8743, "step": 18835 }, { "epoch": 1.44, "learning_rate": 5.906110463011385e-05, "loss": 4.3965, "step": 18840 }, { "epoch": 1.44, "learning_rate": 5.902087775051289e-05, "loss": 3.4023, "step": 18845 }, { "epoch": 1.44, "learning_rate": 5.8980650870911944e-05, "loss": 4.3494, "step": 18850 }, { "epoch": 1.44, "learning_rate": 5.8940423991311e-05, "loss": 2.9739, "step": 18855 }, { "epoch": 1.44, "learning_rate": 5.890019711171004e-05, "loss": 4.0516, "step": 18860 }, { "epoch": 1.44, "learning_rate": 5.88599702321091e-05, "loss": 2.3647, "step": 18865 }, { "epoch": 1.44, "learning_rate": 5.881974335250815e-05, "loss": 2.4891, "step": 18870 }, { "epoch": 1.44, "learning_rate": 5.87795164729072e-05, "loss": 2.7286, "step": 18875 }, { "epoch": 1.44, "learning_rate": 5.8739289593306246e-05, "loss": 2.3304, "step": 18880 }, { "epoch": 1.44, "learning_rate": 5.86990627137053e-05, "loss": 1.6823, "step": 18885 }, { "epoch": 1.44, "learning_rate": 5.8658835834104355e-05, "loss": 4.1017, "step": 18890 }, { "epoch": 1.44, "learning_rate": 5.861860895450341e-05, "loss": 4.4844, "step": 18895 }, { "epoch": 1.44, "learning_rate": 5.857838207490245e-05, "loss": 3.8736, "step": 18900 }, { "epoch": 1.44, "learning_rate": 5.85381551953015e-05, "loss": 3.4389, "step": 18905 }, { "epoch": 1.45, "learning_rate": 5.8497928315700554e-05, "loss": 4.6137, "step": 18910 }, { "epoch": 1.45, "learning_rate": 5.845770143609961e-05, "loss": 3.7057, "step": 18915 }, { "epoch": 1.45, "learning_rate": 5.841747455649865e-05, "loss": 1.9062, "step": 18920 }, { "epoch": 1.45, "learning_rate": 5.837724767689771e-05, "loss": 2.6992, "step": 18925 }, { "epoch": 1.45, "learning_rate": 5.833702079729676e-05, "loss": 2.4897, "step": 18930 }, { "epoch": 1.45, "learning_rate": 5.8296793917695805e-05, "loss": 2.3581, "step": 18935 }, { "epoch": 1.45, "learning_rate": 5.8256567038094856e-05, "loss": 4.217, "step": 18940 }, { "epoch": 1.45, "learning_rate": 5.821634015849391e-05, "loss": 3.4085, "step": 18945 }, { "epoch": 1.45, "learning_rate": 5.8176113278892966e-05, "loss": 3.5184, "step": 18950 }, { "epoch": 1.45, "learning_rate": 5.8135886399292004e-05, "loss": 3.6127, "step": 18955 }, { "epoch": 1.45, "learning_rate": 5.809565951969106e-05, "loss": 3.4811, "step": 18960 }, { "epoch": 1.45, "learning_rate": 5.8055432640090114e-05, "loss": 3.3875, "step": 18965 }, { "epoch": 1.45, "learning_rate": 5.8015205760489165e-05, "loss": 2.1214, "step": 18970 }, { "epoch": 1.45, "learning_rate": 5.797497888088821e-05, "loss": 3.5039, "step": 18975 }, { "epoch": 1.45, "learning_rate": 5.793475200128726e-05, "loss": 1.3013, "step": 18980 }, { "epoch": 1.45, "learning_rate": 5.789452512168631e-05, "loss": 1.1169, "step": 18985 }, { "epoch": 1.45, "learning_rate": 5.785429824208537e-05, "loss": 3.883, "step": 18990 }, { "epoch": 1.45, "learning_rate": 5.781407136248441e-05, "loss": 4.4398, "step": 18995 }, { "epoch": 1.45, "learning_rate": 5.777384448288347e-05, "loss": 2.2548, "step": 19000 }, { "epoch": 1.45, "learning_rate": 5.773361760328252e-05, "loss": 4.5357, "step": 19005 }, { "epoch": 1.45, "learning_rate": 5.7693390723681563e-05, "loss": 3.7588, "step": 19010 }, { "epoch": 1.45, "learning_rate": 5.7653163844080615e-05, "loss": 1.6498, "step": 19015 }, { "epoch": 1.45, "learning_rate": 5.7612936964479666e-05, "loss": 3.7896, "step": 19020 }, { "epoch": 1.45, "learning_rate": 5.7572710084878725e-05, "loss": 1.6262, "step": 19025 }, { "epoch": 1.45, "learning_rate": 5.753248320527776e-05, "loss": 0.6316, "step": 19030 }, { "epoch": 1.45, "learning_rate": 5.749225632567682e-05, "loss": 0.6665, "step": 19035 }, { "epoch": 1.46, "learning_rate": 5.745202944607587e-05, "loss": 4.3615, "step": 19040 }, { "epoch": 1.46, "learning_rate": 5.7411802566474924e-05, "loss": 4.8541, "step": 19045 }, { "epoch": 1.46, "learning_rate": 5.737157568687397e-05, "loss": 3.6393, "step": 19050 }, { "epoch": 1.46, "learning_rate": 5.733134880727302e-05, "loss": 4.5691, "step": 19055 }, { "epoch": 1.46, "learning_rate": 5.729112192767208e-05, "loss": 3.1455, "step": 19060 }, { "epoch": 1.46, "learning_rate": 5.725089504807113e-05, "loss": 3.8037, "step": 19065 }, { "epoch": 1.46, "learning_rate": 5.7210668168470174e-05, "loss": 2.758, "step": 19070 }, { "epoch": 1.46, "learning_rate": 5.7170441288869226e-05, "loss": 2.8246, "step": 19075 }, { "epoch": 1.46, "learning_rate": 5.713021440926828e-05, "loss": 2.058, "step": 19080 }, { "epoch": 1.46, "learning_rate": 5.708998752966732e-05, "loss": 1.0103, "step": 19085 }, { "epoch": 1.46, "learning_rate": 5.704976065006637e-05, "loss": 3.3998, "step": 19090 }, { "epoch": 1.46, "learning_rate": 5.700953377046543e-05, "loss": 3.6197, "step": 19095 }, { "epoch": 1.46, "learning_rate": 5.696930689086448e-05, "loss": 3.9162, "step": 19100 }, { "epoch": 1.46, "learning_rate": 5.692908001126353e-05, "loss": 4.3734, "step": 19105 }, { "epoch": 1.46, "learning_rate": 5.688885313166258e-05, "loss": 3.8322, "step": 19110 }, { "epoch": 1.46, "learning_rate": 5.684862625206163e-05, "loss": 2.6953, "step": 19115 }, { "epoch": 1.46, "learning_rate": 5.680839937246068e-05, "loss": 2.9081, "step": 19120 }, { "epoch": 1.46, "learning_rate": 5.676817249285973e-05, "loss": 2.9252, "step": 19125 }, { "epoch": 1.46, "learning_rate": 5.672794561325878e-05, "loss": 2.051, "step": 19130 }, { "epoch": 1.46, "learning_rate": 5.6687718733657837e-05, "loss": 4.5108, "step": 19135 }, { "epoch": 1.46, "learning_rate": 5.664749185405689e-05, "loss": 4.5414, "step": 19140 }, { "epoch": 1.46, "learning_rate": 5.660726497445593e-05, "loss": 4.4689, "step": 19145 }, { "epoch": 1.46, "learning_rate": 5.6567038094854984e-05, "loss": 3.826, "step": 19150 }, { "epoch": 1.46, "learning_rate": 5.6526811215254036e-05, "loss": 2.8979, "step": 19155 }, { "epoch": 1.46, "learning_rate": 5.648658433565308e-05, "loss": 3.5128, "step": 19160 }, { "epoch": 1.46, "learning_rate": 5.644635745605213e-05, "loss": 2.2047, "step": 19165 }, { "epoch": 1.47, "learning_rate": 5.640613057645119e-05, "loss": 2.9307, "step": 19170 }, { "epoch": 1.47, "learning_rate": 5.636590369685024e-05, "loss": 0.9308, "step": 19175 }, { "epoch": 1.47, "learning_rate": 5.6325676817249286e-05, "loss": 1.3805, "step": 19180 }, { "epoch": 1.47, "learning_rate": 5.628544993764834e-05, "loss": 3.8648, "step": 19185 }, { "epoch": 1.47, "learning_rate": 5.624522305804739e-05, "loss": 4.8469, "step": 19190 }, { "epoch": 1.47, "learning_rate": 5.620499617844645e-05, "loss": 4.4084, "step": 19195 }, { "epoch": 1.47, "learning_rate": 5.6164769298845485e-05, "loss": 4.0066, "step": 19200 }, { "epoch": 1.47, "learning_rate": 5.6124542419244544e-05, "loss": 2.9517, "step": 19205 }, { "epoch": 1.47, "learning_rate": 5.6084315539643595e-05, "loss": 3.6738, "step": 19210 }, { "epoch": 1.47, "learning_rate": 5.6044088660042647e-05, "loss": 2.7926, "step": 19215 }, { "epoch": 1.47, "learning_rate": 5.600386178044169e-05, "loss": 1.8658, "step": 19220 }, { "epoch": 1.47, "learning_rate": 5.596363490084074e-05, "loss": 2.3674, "step": 19225 }, { "epoch": 1.47, "learning_rate": 5.59234080212398e-05, "loss": 3.3319, "step": 19230 }, { "epoch": 1.47, "learning_rate": 5.588318114163884e-05, "loss": 2.3487, "step": 19235 }, { "epoch": 1.47, "learning_rate": 5.58429542620379e-05, "loss": 4.4271, "step": 19240 }, { "epoch": 1.47, "learning_rate": 5.580272738243695e-05, "loss": 5.4869, "step": 19245 }, { "epoch": 1.47, "learning_rate": 5.5762500502836e-05, "loss": 3.9018, "step": 19250 }, { "epoch": 1.47, "learning_rate": 5.5722273623235045e-05, "loss": 4.5006, "step": 19255 }, { "epoch": 1.47, "learning_rate": 5.5682046743634096e-05, "loss": 4.2176, "step": 19260 }, { "epoch": 1.47, "learning_rate": 5.5641819864033154e-05, "loss": 2.4257, "step": 19265 }, { "epoch": 1.47, "learning_rate": 5.5601592984432206e-05, "loss": 2.4731, "step": 19270 }, { "epoch": 1.47, "learning_rate": 5.556136610483125e-05, "loss": 3.5372, "step": 19275 }, { "epoch": 1.47, "learning_rate": 5.55211392252303e-05, "loss": 1.971, "step": 19280 }, { "epoch": 1.47, "learning_rate": 5.5480912345629353e-05, "loss": 2.1551, "step": 19285 }, { "epoch": 1.47, "learning_rate": 5.5440685466028405e-05, "loss": 3.9453, "step": 19290 }, { "epoch": 1.47, "learning_rate": 5.540045858642745e-05, "loss": 4.4363, "step": 19295 }, { "epoch": 1.48, "learning_rate": 5.53602317068265e-05, "loss": 3.8611, "step": 19300 }, { "epoch": 1.48, "learning_rate": 5.532000482722556e-05, "loss": 3.0589, "step": 19305 }, { "epoch": 1.48, "learning_rate": 5.52797779476246e-05, "loss": 3.3494, "step": 19310 }, { "epoch": 1.48, "learning_rate": 5.5239551068023656e-05, "loss": 2.2144, "step": 19315 }, { "epoch": 1.48, "learning_rate": 5.519932418842271e-05, "loss": 4.4766, "step": 19320 }, { "epoch": 1.48, "learning_rate": 5.515909730882176e-05, "loss": 3.267, "step": 19325 }, { "epoch": 1.48, "learning_rate": 5.51188704292208e-05, "loss": 2.9234, "step": 19330 }, { "epoch": 1.48, "learning_rate": 5.5078643549619855e-05, "loss": 3.8816, "step": 19335 }, { "epoch": 1.48, "learning_rate": 5.503841667001891e-05, "loss": 4.1756, "step": 19340 }, { "epoch": 1.48, "learning_rate": 5.4998189790417964e-05, "loss": 4.3277, "step": 19345 }, { "epoch": 1.48, "learning_rate": 5.495796291081701e-05, "loss": 4.1113, "step": 19350 }, { "epoch": 1.48, "learning_rate": 5.491773603121606e-05, "loss": 3.8789, "step": 19355 }, { "epoch": 1.48, "learning_rate": 5.487750915161511e-05, "loss": 2.3336, "step": 19360 }, { "epoch": 1.48, "learning_rate": 5.483728227201417e-05, "loss": 3.0759, "step": 19365 }, { "epoch": 1.48, "learning_rate": 5.479705539241321e-05, "loss": 2.9986, "step": 19370 }, { "epoch": 1.48, "learning_rate": 5.4756828512812266e-05, "loss": 2.5309, "step": 19375 }, { "epoch": 1.48, "learning_rate": 5.471660163321132e-05, "loss": 2.5339, "step": 19380 }, { "epoch": 1.48, "learning_rate": 5.467637475361036e-05, "loss": 2.0254, "step": 19385 }, { "epoch": 1.48, "learning_rate": 5.4636147874009414e-05, "loss": 4.4572, "step": 19390 }, { "epoch": 1.48, "learning_rate": 5.4595920994408465e-05, "loss": 4.1877, "step": 19395 }, { "epoch": 1.48, "learning_rate": 5.4555694114807524e-05, "loss": 3.7068, "step": 19400 }, { "epoch": 1.48, "learning_rate": 5.451546723520656e-05, "loss": 3.5297, "step": 19405 }, { "epoch": 1.48, "learning_rate": 5.447524035560562e-05, "loss": 3.059, "step": 19410 }, { "epoch": 1.48, "learning_rate": 5.443501347600467e-05, "loss": 3.3376, "step": 19415 }, { "epoch": 1.48, "learning_rate": 5.439478659640372e-05, "loss": 2.6208, "step": 19420 }, { "epoch": 1.48, "learning_rate": 5.435455971680277e-05, "loss": 0.5573, "step": 19425 }, { "epoch": 1.49, "learning_rate": 5.431433283720182e-05, "loss": 0.388, "step": 19430 }, { "epoch": 1.49, "learning_rate": 5.427410595760087e-05, "loss": 2.8777, "step": 19435 }, { "epoch": 1.49, "learning_rate": 5.423387907799993e-05, "loss": 4.8811, "step": 19440 }, { "epoch": 1.49, "learning_rate": 5.4193652198398967e-05, "loss": 3.5342, "step": 19445 }, { "epoch": 1.49, "learning_rate": 5.4153425318798025e-05, "loss": 3.8281, "step": 19450 }, { "epoch": 1.49, "learning_rate": 5.4113198439197076e-05, "loss": 4.1465, "step": 19455 }, { "epoch": 1.49, "learning_rate": 5.407297155959612e-05, "loss": 3.8576, "step": 19460 }, { "epoch": 1.49, "learning_rate": 5.403274467999517e-05, "loss": 3.6264, "step": 19465 }, { "epoch": 1.49, "learning_rate": 5.3992517800394224e-05, "loss": 3.2877, "step": 19470 }, { "epoch": 1.49, "learning_rate": 5.395229092079328e-05, "loss": 2.1062, "step": 19475 }, { "epoch": 1.49, "learning_rate": 5.391206404119232e-05, "loss": 1.655, "step": 19480 }, { "epoch": 1.49, "learning_rate": 5.387183716159138e-05, "loss": 1.7521, "step": 19485 }, { "epoch": 1.49, "learning_rate": 5.383161028199043e-05, "loss": 4.1643, "step": 19490 }, { "epoch": 1.49, "learning_rate": 5.379138340238948e-05, "loss": 4.5741, "step": 19495 }, { "epoch": 1.49, "learning_rate": 5.3751156522788526e-05, "loss": 4.5176, "step": 19500 }, { "epoch": 1.49, "learning_rate": 5.371092964318758e-05, "loss": 2.9813, "step": 19505 }, { "epoch": 1.49, "learning_rate": 5.3670702763586636e-05, "loss": 3.9217, "step": 19510 }, { "epoch": 1.49, "learning_rate": 5.363047588398569e-05, "loss": 2.6086, "step": 19515 }, { "epoch": 1.49, "learning_rate": 5.359024900438473e-05, "loss": 3.6834, "step": 19520 }, { "epoch": 1.49, "learning_rate": 5.355002212478378e-05, "loss": 3.0846, "step": 19525 }, { "epoch": 1.49, "learning_rate": 5.3509795245182835e-05, "loss": 2.2924, "step": 19530 }, { "epoch": 1.49, "learning_rate": 5.346956836558188e-05, "loss": 3.0072, "step": 19535 }, { "epoch": 1.49, "learning_rate": 5.342934148598093e-05, "loss": 3.6783, "step": 19540 }, { "epoch": 1.49, "learning_rate": 5.338911460637999e-05, "loss": 3.4266, "step": 19545 }, { "epoch": 1.49, "learning_rate": 5.334888772677904e-05, "loss": 4.5687, "step": 19550 }, { "epoch": 1.49, "learning_rate": 5.3308660847178085e-05, "loss": 3.6299, "step": 19555 }, { "epoch": 1.49, "learning_rate": 5.326843396757714e-05, "loss": 4.2918, "step": 19560 }, { "epoch": 1.5, "learning_rate": 5.322820708797619e-05, "loss": 3.1556, "step": 19565 }, { "epoch": 1.5, "learning_rate": 5.3187980208375246e-05, "loss": 1.7684, "step": 19570 }, { "epoch": 1.5, "learning_rate": 5.3147753328774284e-05, "loss": 1.7001, "step": 19575 }, { "epoch": 1.5, "learning_rate": 5.310752644917334e-05, "loss": 2.9054, "step": 19580 }, { "epoch": 1.5, "learning_rate": 5.3067299569572394e-05, "loss": 3.3056, "step": 19585 }, { "epoch": 1.5, "learning_rate": 5.3027072689971446e-05, "loss": 4.6641, "step": 19590 }, { "epoch": 1.5, "learning_rate": 5.298684581037049e-05, "loss": 4.5357, "step": 19595 }, { "epoch": 1.5, "learning_rate": 5.294661893076954e-05, "loss": 4.2678, "step": 19600 }, { "epoch": 1.5, "learning_rate": 5.290639205116859e-05, "loss": 3.8678, "step": 19605 }, { "epoch": 1.5, "learning_rate": 5.286616517156764e-05, "loss": 2.2252, "step": 19610 }, { "epoch": 1.5, "learning_rate": 5.282593829196669e-05, "loss": 2.7532, "step": 19615 }, { "epoch": 1.5, "learning_rate": 5.278571141236575e-05, "loss": 3.1894, "step": 19620 }, { "epoch": 1.5, "learning_rate": 5.27454845327648e-05, "loss": 1.7801, "step": 19625 }, { "epoch": 1.5, "learning_rate": 5.2705257653163844e-05, "loss": 0.9882, "step": 19630 }, { "epoch": 1.5, "learning_rate": 5.2665030773562895e-05, "loss": 1.3804, "step": 19635 }, { "epoch": 1.5, "learning_rate": 5.262480389396195e-05, "loss": 4.2154, "step": 19640 }, { "epoch": 1.5, "learning_rate": 5.2584577014361005e-05, "loss": 4.8629, "step": 19645 }, { "epoch": 1.5, "learning_rate": 5.254435013476004e-05, "loss": 2.9258, "step": 19650 }, { "epoch": 1.5, "learning_rate": 5.25041232551591e-05, "loss": 2.9222, "step": 19655 }, { "epoch": 1.5, "learning_rate": 5.246389637555815e-05, "loss": 3.0917, "step": 19660 }, { "epoch": 1.5, "learning_rate": 5.2423669495957204e-05, "loss": 2.4604, "step": 19665 }, { "epoch": 1.5, "learning_rate": 5.238344261635625e-05, "loss": 1.6897, "step": 19670 }, { "epoch": 1.5, "learning_rate": 5.23432157367553e-05, "loss": 1.0687, "step": 19675 }, { "epoch": 1.5, "learning_rate": 5.230298885715436e-05, "loss": 1.2178, "step": 19680 }, { "epoch": 1.5, "learning_rate": 5.2262761977553396e-05, "loss": 2.0848, "step": 19685 }, { "epoch": 1.5, "learning_rate": 5.2222535097952455e-05, "loss": 4.2998, "step": 19690 }, { "epoch": 1.51, "learning_rate": 5.2182308218351506e-05, "loss": 5.0777, "step": 19695 }, { "epoch": 1.51, "learning_rate": 5.214208133875056e-05, "loss": 3.448, "step": 19700 }, { "epoch": 1.51, "learning_rate": 5.21018544591496e-05, "loss": 3.3208, "step": 19705 }, { "epoch": 1.51, "learning_rate": 5.2061627579548654e-05, "loss": 2.9604, "step": 19710 }, { "epoch": 1.51, "learning_rate": 5.202140069994771e-05, "loss": 2.4406, "step": 19715 }, { "epoch": 1.51, "learning_rate": 5.198117382034676e-05, "loss": 1.6549, "step": 19720 }, { "epoch": 1.51, "learning_rate": 5.194094694074581e-05, "loss": 1.4544, "step": 19725 }, { "epoch": 1.51, "learning_rate": 5.190072006114486e-05, "loss": 2.5774, "step": 19730 }, { "epoch": 1.51, "learning_rate": 5.186049318154391e-05, "loss": 2.1335, "step": 19735 }, { "epoch": 1.51, "learning_rate": 5.182026630194296e-05, "loss": 4.3416, "step": 19740 }, { "epoch": 1.51, "learning_rate": 5.178003942234201e-05, "loss": 3.4145, "step": 19745 }, { "epoch": 1.51, "learning_rate": 5.173981254274106e-05, "loss": 3.9055, "step": 19750 }, { "epoch": 1.51, "learning_rate": 5.169958566314012e-05, "loss": 3.7634, "step": 19755 }, { "epoch": 1.51, "learning_rate": 5.1659358783539155e-05, "loss": 3.4955, "step": 19760 }, { "epoch": 1.51, "learning_rate": 5.161913190393821e-05, "loss": 1.8993, "step": 19765 }, { "epoch": 1.51, "learning_rate": 5.1578905024337264e-05, "loss": 3.337, "step": 19770 }, { "epoch": 1.51, "learning_rate": 5.1538678144736316e-05, "loss": 1.2481, "step": 19775 }, { "epoch": 1.51, "learning_rate": 5.149845126513536e-05, "loss": 2.3209, "step": 19780 }, { "epoch": 1.51, "learning_rate": 5.145822438553441e-05, "loss": 2.9454, "step": 19785 }, { "epoch": 1.51, "learning_rate": 5.141799750593347e-05, "loss": 4.0148, "step": 19790 }, { "epoch": 1.51, "learning_rate": 5.137777062633252e-05, "loss": 4.0611, "step": 19795 }, { "epoch": 1.51, "learning_rate": 5.1337543746731567e-05, "loss": 3.9623, "step": 19800 }, { "epoch": 1.51, "learning_rate": 5.129731686713062e-05, "loss": 2.918, "step": 19805 }, { "epoch": 1.51, "learning_rate": 5.125708998752967e-05, "loss": 3.5767, "step": 19810 }, { "epoch": 1.51, "learning_rate": 5.121686310792873e-05, "loss": 3.4847, "step": 19815 }, { "epoch": 1.51, "learning_rate": 5.1176636228327766e-05, "loss": 1.9963, "step": 19820 }, { "epoch": 1.52, "learning_rate": 5.1136409348726824e-05, "loss": 1.0946, "step": 19825 }, { "epoch": 1.52, "learning_rate": 5.110422784504606e-05, "loss": 1.6789, "step": 19830 }, { "epoch": 1.52, "learning_rate": 5.1064000965445115e-05, "loss": 1.926, "step": 19835 }, { "epoch": 1.52, "learning_rate": 5.1023774085844167e-05, "loss": 4.7094, "step": 19840 }, { "epoch": 1.52, "learning_rate": 5.098354720624321e-05, "loss": 4.5109, "step": 19845 }, { "epoch": 1.52, "learning_rate": 5.094332032664226e-05, "loss": 4.4045, "step": 19850 }, { "epoch": 1.52, "learning_rate": 5.0903093447041314e-05, "loss": 3.6195, "step": 19855 }, { "epoch": 1.52, "learning_rate": 5.086286656744037e-05, "loss": 1.4138, "step": 19860 }, { "epoch": 1.52, "learning_rate": 5.082263968783941e-05, "loss": 2.8188, "step": 19865 }, { "epoch": 1.52, "learning_rate": 5.078241280823847e-05, "loss": 3.5405, "step": 19870 }, { "epoch": 1.52, "learning_rate": 5.074218592863752e-05, "loss": 2.9546, "step": 19875 }, { "epoch": 1.52, "learning_rate": 5.070195904903657e-05, "loss": 3.7135, "step": 19880 }, { "epoch": 1.52, "learning_rate": 5.0661732169435616e-05, "loss": 3.4582, "step": 19885 }, { "epoch": 1.52, "learning_rate": 5.062150528983467e-05, "loss": 4.2113, "step": 19890 }, { "epoch": 1.52, "learning_rate": 5.0581278410233726e-05, "loss": 4.4158, "step": 19895 }, { "epoch": 1.52, "learning_rate": 5.0541051530632764e-05, "loss": 3.453, "step": 19900 }, { "epoch": 1.52, "learning_rate": 5.050082465103182e-05, "loss": 3.6619, "step": 19905 }, { "epoch": 1.52, "learning_rate": 5.0460597771430874e-05, "loss": 3.9289, "step": 19910 }, { "epoch": 1.52, "learning_rate": 5.0420370891829925e-05, "loss": 2.6809, "step": 19915 }, { "epoch": 1.52, "learning_rate": 5.038014401222897e-05, "loss": 0.5294, "step": 19920 }, { "epoch": 1.52, "learning_rate": 5.033991713262802e-05, "loss": 0.7334, "step": 19925 }, { "epoch": 1.52, "learning_rate": 5.029969025302708e-05, "loss": 3.913, "step": 19930 }, { "epoch": 1.52, "learning_rate": 5.025946337342613e-05, "loss": 0.9632, "step": 19935 }, { "epoch": 1.52, "learning_rate": 5.0219236493825176e-05, "loss": 4.6021, "step": 19940 }, { "epoch": 1.52, "learning_rate": 5.017900961422423e-05, "loss": 4.8836, "step": 19945 }, { "epoch": 1.52, "learning_rate": 5.013878273462328e-05, "loss": 3.9064, "step": 19950 }, { "epoch": 1.53, "learning_rate": 5.009855585502233e-05, "loss": 3.9792, "step": 19955 }, { "epoch": 1.53, "learning_rate": 5.0058328975421375e-05, "loss": 4.1586, "step": 19960 }, { "epoch": 1.53, "learning_rate": 5.0018102095820426e-05, "loss": 3.8375, "step": 19965 }, { "epoch": 1.53, "learning_rate": 4.9977875216219484e-05, "loss": 2.0574, "step": 19970 }, { "epoch": 1.53, "learning_rate": 4.993764833661853e-05, "loss": 1.9708, "step": 19975 }, { "epoch": 1.53, "learning_rate": 4.989742145701758e-05, "loss": 1.9487, "step": 19980 }, { "epoch": 1.53, "learning_rate": 4.985719457741663e-05, "loss": 3.1091, "step": 19985 }, { "epoch": 1.53, "learning_rate": 4.9816967697815684e-05, "loss": 5.8838, "step": 19990 }, { "epoch": 1.53, "learning_rate": 4.9776740818214735e-05, "loss": 4.9205, "step": 19995 }, { "epoch": 1.53, "learning_rate": 4.973651393861378e-05, "loss": 4.5146, "step": 20000 }, { "epoch": 1.53, "learning_rate": 4.969628705901284e-05, "loss": 3.2711, "step": 20005 }, { "epoch": 1.53, "learning_rate": 4.965606017941188e-05, "loss": 3.8259, "step": 20010 }, { "epoch": 1.53, "learning_rate": 4.961583329981094e-05, "loss": 3.1064, "step": 20015 }, { "epoch": 1.53, "learning_rate": 4.9575606420209986e-05, "loss": 3.5432, "step": 20020 }, { "epoch": 1.53, "learning_rate": 4.953537954060904e-05, "loss": 1.2155, "step": 20025 }, { "epoch": 1.53, "learning_rate": 4.949515266100809e-05, "loss": 2.7059, "step": 20030 }, { "epoch": 1.53, "learning_rate": 4.945492578140714e-05, "loss": 1.1075, "step": 20035 }, { "epoch": 1.53, "learning_rate": 4.941469890180619e-05, "loss": 3.995, "step": 20040 }, { "epoch": 1.53, "learning_rate": 4.937447202220524e-05, "loss": 4.1667, "step": 20045 }, { "epoch": 1.53, "learning_rate": 4.933424514260429e-05, "loss": 3.6951, "step": 20050 }, { "epoch": 1.53, "learning_rate": 4.929401826300334e-05, "loss": 3.6781, "step": 20055 }, { "epoch": 1.53, "learning_rate": 4.925379138340239e-05, "loss": 4.773, "step": 20060 }, { "epoch": 1.53, "learning_rate": 4.921356450380144e-05, "loss": 3.6868, "step": 20065 }, { "epoch": 1.53, "learning_rate": 4.9173337624200493e-05, "loss": 2.524, "step": 20070 }, { "epoch": 1.53, "learning_rate": 4.9133110744599545e-05, "loss": 1.7292, "step": 20075 }, { "epoch": 1.53, "learning_rate": 4.9092883864998596e-05, "loss": 0.7888, "step": 20080 }, { "epoch": 1.54, "learning_rate": 4.905265698539764e-05, "loss": 2.0986, "step": 20085 }, { "epoch": 1.54, "learning_rate": 4.90124301057967e-05, "loss": 3.924, "step": 20090 }, { "epoch": 1.54, "learning_rate": 4.8972203226195744e-05, "loss": 4.8615, "step": 20095 }, { "epoch": 1.54, "learning_rate": 4.89319763465948e-05, "loss": 3.5187, "step": 20100 }, { "epoch": 1.54, "learning_rate": 4.889174946699385e-05, "loss": 3.4105, "step": 20105 }, { "epoch": 1.54, "learning_rate": 4.88515225873929e-05, "loss": 2.8189, "step": 20110 }, { "epoch": 1.54, "learning_rate": 4.881129570779195e-05, "loss": 3.0407, "step": 20115 }, { "epoch": 1.54, "learning_rate": 4.8771068828191e-05, "loss": 1.8893, "step": 20120 }, { "epoch": 1.54, "learning_rate": 4.873084194859005e-05, "loss": 3.3274, "step": 20125 }, { "epoch": 1.54, "learning_rate": 4.86906150689891e-05, "loss": 2.7007, "step": 20130 }, { "epoch": 1.54, "learning_rate": 4.865038818938815e-05, "loss": 2.7956, "step": 20135 }, { "epoch": 1.54, "learning_rate": 4.86101613097872e-05, "loss": 5.4281, "step": 20140 }, { "epoch": 1.54, "learning_rate": 4.856993443018625e-05, "loss": 4.4959, "step": 20145 }, { "epoch": 1.54, "learning_rate": 4.85297075505853e-05, "loss": 4.1445, "step": 20150 }, { "epoch": 1.54, "learning_rate": 4.8489480670984355e-05, "loss": 3.5901, "step": 20155 }, { "epoch": 1.54, "learning_rate": 4.8449253791383406e-05, "loss": 2.736, "step": 20160 }, { "epoch": 1.54, "learning_rate": 4.840902691178246e-05, "loss": 2.9837, "step": 20165 }, { "epoch": 1.54, "learning_rate": 4.83688000321815e-05, "loss": 2.1882, "step": 20170 }, { "epoch": 1.54, "learning_rate": 4.832857315258056e-05, "loss": 2.8191, "step": 20175 }, { "epoch": 1.54, "learning_rate": 4.8288346272979605e-05, "loss": 1.4353, "step": 20180 }, { "epoch": 1.54, "learning_rate": 4.824811939337866e-05, "loss": 1.7028, "step": 20185 }, { "epoch": 1.54, "learning_rate": 4.820789251377771e-05, "loss": 3.8006, "step": 20190 }, { "epoch": 1.54, "learning_rate": 4.816766563417676e-05, "loss": 3.9104, "step": 20195 }, { "epoch": 1.54, "learning_rate": 4.812743875457581e-05, "loss": 3.7228, "step": 20200 }, { "epoch": 1.54, "learning_rate": 4.8087211874974856e-05, "loss": 3.077, "step": 20205 }, { "epoch": 1.54, "learning_rate": 4.8046984995373914e-05, "loss": 4.0723, "step": 20210 }, { "epoch": 1.55, "learning_rate": 4.800675811577296e-05, "loss": 2.538, "step": 20215 }, { "epoch": 1.55, "learning_rate": 4.796653123617201e-05, "loss": 2.944, "step": 20220 }, { "epoch": 1.55, "learning_rate": 4.792630435657106e-05, "loss": 1.8839, "step": 20225 }, { "epoch": 1.55, "learning_rate": 4.788607747697011e-05, "loss": 2.1462, "step": 20230 }, { "epoch": 1.55, "learning_rate": 4.7845850597369165e-05, "loss": 3.1881, "step": 20235 }, { "epoch": 1.55, "learning_rate": 4.7805623717768216e-05, "loss": 3.8365, "step": 20240 }, { "epoch": 1.55, "learning_rate": 4.776539683816727e-05, "loss": 4.1854, "step": 20245 }, { "epoch": 1.55, "learning_rate": 4.772516995856632e-05, "loss": 3.5527, "step": 20250 }, { "epoch": 1.55, "learning_rate": 4.7684943078965364e-05, "loss": 2.907, "step": 20255 }, { "epoch": 1.55, "learning_rate": 4.764471619936442e-05, "loss": 4.211, "step": 20260 }, { "epoch": 1.55, "learning_rate": 4.760448931976347e-05, "loss": 3.4987, "step": 20265 }, { "epoch": 1.55, "learning_rate": 4.756426244016252e-05, "loss": 2.5673, "step": 20270 }, { "epoch": 1.55, "learning_rate": 4.752403556056157e-05, "loss": 2.0378, "step": 20275 }, { "epoch": 1.55, "learning_rate": 4.7483808680960614e-05, "loss": 2.3546, "step": 20280 }, { "epoch": 1.55, "learning_rate": 4.744358180135967e-05, "loss": 0.8048, "step": 20285 }, { "epoch": 1.55, "learning_rate": 4.740335492175872e-05, "loss": 4.9102, "step": 20290 }, { "epoch": 1.55, "learning_rate": 4.7363128042157776e-05, "loss": 5.3023, "step": 20295 }, { "epoch": 1.55, "learning_rate": 4.732290116255682e-05, "loss": 4.1691, "step": 20300 }, { "epoch": 1.55, "learning_rate": 4.728267428295587e-05, "loss": 3.5041, "step": 20305 }, { "epoch": 1.55, "learning_rate": 4.724244740335492e-05, "loss": 2.0665, "step": 20310 }, { "epoch": 1.55, "learning_rate": 4.7202220523753975e-05, "loss": 3.1756, "step": 20315 }, { "epoch": 1.55, "learning_rate": 4.7161993644153026e-05, "loss": 1.7097, "step": 20320 }, { "epoch": 1.55, "learning_rate": 4.712176676455208e-05, "loss": 2.9661, "step": 20325 }, { "epoch": 1.55, "learning_rate": 4.708153988495113e-05, "loss": 2.5157, "step": 20330 }, { "epoch": 1.55, "learning_rate": 4.704131300535018e-05, "loss": 3.5753, "step": 20335 }, { "epoch": 1.55, "learning_rate": 4.7001086125749225e-05, "loss": 4.2086, "step": 20340 }, { "epoch": 1.55, "learning_rate": 4.6960859246148283e-05, "loss": 4.2891, "step": 20345 }, { "epoch": 1.56, "learning_rate": 4.692063236654733e-05, "loss": 4.4199, "step": 20350 }, { "epoch": 1.56, "learning_rate": 4.688040548694638e-05, "loss": 4.1923, "step": 20355 }, { "epoch": 1.56, "learning_rate": 4.684017860734543e-05, "loss": 1.1563, "step": 20360 }, { "epoch": 1.56, "learning_rate": 4.6799951727744476e-05, "loss": 2.6206, "step": 20365 }, { "epoch": 1.56, "learning_rate": 4.6759724848143534e-05, "loss": 2.1299, "step": 20370 }, { "epoch": 1.56, "learning_rate": 4.671949796854258e-05, "loss": 1.8058, "step": 20375 }, { "epoch": 1.56, "learning_rate": 4.667927108894164e-05, "loss": 2.8761, "step": 20380 }, { "epoch": 1.56, "learning_rate": 4.663904420934068e-05, "loss": 2.907, "step": 20385 }, { "epoch": 1.56, "learning_rate": 4.659881732973973e-05, "loss": 3.6823, "step": 20390 }, { "epoch": 1.56, "learning_rate": 4.6558590450138785e-05, "loss": 3.6055, "step": 20395 }, { "epoch": 1.56, "learning_rate": 4.6518363570537836e-05, "loss": 3.5477, "step": 20400 }, { "epoch": 1.56, "learning_rate": 4.647813669093689e-05, "loss": 3.2163, "step": 20405 }, { "epoch": 1.56, "learning_rate": 4.643790981133594e-05, "loss": 3.3979, "step": 20410 }, { "epoch": 1.56, "learning_rate": 4.6397682931734984e-05, "loss": 3.7832, "step": 20415 }, { "epoch": 1.56, "learning_rate": 4.635745605213404e-05, "loss": 2.9825, "step": 20420 }, { "epoch": 1.56, "learning_rate": 4.631722917253309e-05, "loss": 2.2435, "step": 20425 }, { "epoch": 1.56, "learning_rate": 4.627700229293214e-05, "loss": 1.402, "step": 20430 }, { "epoch": 1.56, "learning_rate": 4.623677541333119e-05, "loss": 1.3045, "step": 20435 }, { "epoch": 1.56, "learning_rate": 4.619654853373024e-05, "loss": 4.5785, "step": 20440 }, { "epoch": 1.56, "learning_rate": 4.615632165412929e-05, "loss": 5.1027, "step": 20445 }, { "epoch": 1.56, "learning_rate": 4.611609477452834e-05, "loss": 3.85, "step": 20450 }, { "epoch": 1.56, "learning_rate": 4.6075867894927395e-05, "loss": 3.7928, "step": 20455 }, { "epoch": 1.56, "learning_rate": 4.603564101532644e-05, "loss": 3.9852, "step": 20460 }, { "epoch": 1.56, "learning_rate": 4.59954141357255e-05, "loss": 3.3365, "step": 20465 }, { "epoch": 1.56, "learning_rate": 4.595518725612454e-05, "loss": 2.475, "step": 20470 }, { "epoch": 1.56, "learning_rate": 4.5914960376523595e-05, "loss": 3.7325, "step": 20475 }, { "epoch": 1.57, "learning_rate": 4.5874733496922646e-05, "loss": 2.4769, "step": 20480 }, { "epoch": 1.57, "learning_rate": 4.58345066173217e-05, "loss": 3.2034, "step": 20485 }, { "epoch": 1.57, "learning_rate": 4.579427973772075e-05, "loss": 4.4615, "step": 20490 }, { "epoch": 1.57, "learning_rate": 4.57540528581198e-05, "loss": 3.6992, "step": 20495 }, { "epoch": 1.57, "learning_rate": 4.5713825978518845e-05, "loss": 3.141, "step": 20500 }, { "epoch": 1.57, "learning_rate": 4.56735990989179e-05, "loss": 3.8582, "step": 20505 }, { "epoch": 1.57, "learning_rate": 4.563337221931695e-05, "loss": 4.3367, "step": 20510 }, { "epoch": 1.57, "learning_rate": 4.5593145339716e-05, "loss": 4.4576, "step": 20515 }, { "epoch": 1.57, "learning_rate": 4.555291846011505e-05, "loss": 0.9217, "step": 20520 }, { "epoch": 1.57, "learning_rate": 4.55126915805141e-05, "loss": 2.4517, "step": 20525 }, { "epoch": 1.57, "learning_rate": 4.5472464700913154e-05, "loss": 0.8062, "step": 20530 }, { "epoch": 1.57, "learning_rate": 4.54322378213122e-05, "loss": 0.7989, "step": 20535 }, { "epoch": 1.57, "learning_rate": 4.539201094171126e-05, "loss": 3.5436, "step": 20540 }, { "epoch": 1.57, "learning_rate": 4.53517840621103e-05, "loss": 3.1012, "step": 20545 }, { "epoch": 1.57, "learning_rate": 4.531155718250936e-05, "loss": 3.8428, "step": 20550 }, { "epoch": 1.57, "learning_rate": 4.5271330302908404e-05, "loss": 3.9062, "step": 20555 }, { "epoch": 1.57, "learning_rate": 4.5231103423307456e-05, "loss": 2.7281, "step": 20560 }, { "epoch": 1.57, "learning_rate": 4.519087654370651e-05, "loss": 3.4412, "step": 20565 }, { "epoch": 1.57, "learning_rate": 4.515064966410556e-05, "loss": 1.8876, "step": 20570 }, { "epoch": 1.57, "learning_rate": 4.511042278450461e-05, "loss": 1.8162, "step": 20575 }, { "epoch": 1.57, "learning_rate": 4.507019590490366e-05, "loss": 3.2144, "step": 20580 }, { "epoch": 1.57, "learning_rate": 4.5029969025302706e-05, "loss": 1.301, "step": 20585 }, { "epoch": 1.57, "learning_rate": 4.498974214570176e-05, "loss": 4.8656, "step": 20590 }, { "epoch": 1.57, "learning_rate": 4.494951526610081e-05, "loss": 4.2117, "step": 20595 }, { "epoch": 1.57, "learning_rate": 4.490928838649986e-05, "loss": 4.4994, "step": 20600 }, { "epoch": 1.57, "learning_rate": 4.486906150689891e-05, "loss": 2.6848, "step": 20605 }, { "epoch": 1.58, "learning_rate": 4.4828834627297964e-05, "loss": 3.4116, "step": 20610 }, { "epoch": 1.58, "learning_rate": 4.4788607747697015e-05, "loss": 3.0542, "step": 20615 }, { "epoch": 1.58, "learning_rate": 4.474838086809606e-05, "loss": 3.0172, "step": 20620 }, { "epoch": 1.58, "learning_rate": 4.470815398849512e-05, "loss": 1.6656, "step": 20625 }, { "epoch": 1.58, "learning_rate": 4.466792710889416e-05, "loss": 2.6862, "step": 20630 }, { "epoch": 1.58, "learning_rate": 4.462770022929322e-05, "loss": 2.2119, "step": 20635 }, { "epoch": 1.58, "learning_rate": 4.4587473349692266e-05, "loss": 5.6586, "step": 20640 }, { "epoch": 1.58, "learning_rate": 4.454724647009132e-05, "loss": 3.9809, "step": 20645 }, { "epoch": 1.58, "learning_rate": 4.450701959049037e-05, "loss": 4.7715, "step": 20650 }, { "epoch": 1.58, "learning_rate": 4.446679271088942e-05, "loss": 4.0703, "step": 20655 }, { "epoch": 1.58, "learning_rate": 4.442656583128847e-05, "loss": 3.8097, "step": 20660 }, { "epoch": 1.58, "learning_rate": 4.4386338951687516e-05, "loss": 3.4632, "step": 20665 }, { "epoch": 1.58, "learning_rate": 4.434611207208657e-05, "loss": 3.296, "step": 20670 }, { "epoch": 1.58, "learning_rate": 4.430588519248562e-05, "loss": 3.2801, "step": 20675 }, { "epoch": 1.58, "learning_rate": 4.426565831288467e-05, "loss": 2.3163, "step": 20680 }, { "epoch": 1.58, "learning_rate": 4.422543143328372e-05, "loss": 4.8768, "step": 20685 }, { "epoch": 1.58, "learning_rate": 4.4185204553682774e-05, "loss": 4.2062, "step": 20690 }, { "epoch": 1.58, "learning_rate": 4.4144977674081825e-05, "loss": 3.9586, "step": 20695 }, { "epoch": 1.58, "learning_rate": 4.410475079448088e-05, "loss": 4.7627, "step": 20700 }, { "epoch": 1.58, "learning_rate": 4.406452391487992e-05, "loss": 4.001, "step": 20705 }, { "epoch": 1.58, "learning_rate": 4.402429703527898e-05, "loss": 3.7379, "step": 20710 }, { "epoch": 1.58, "learning_rate": 4.3984070155678024e-05, "loss": 4.5615, "step": 20715 }, { "epoch": 1.58, "learning_rate": 4.3943843276077076e-05, "loss": 3.2771, "step": 20720 }, { "epoch": 1.58, "learning_rate": 4.390361639647613e-05, "loss": 2.4874, "step": 20725 }, { "epoch": 1.58, "learning_rate": 4.386338951687518e-05, "loss": 1.7379, "step": 20730 }, { "epoch": 1.58, "learning_rate": 4.382316263727423e-05, "loss": 1.266, "step": 20735 }, { "epoch": 1.59, "learning_rate": 4.3782935757673275e-05, "loss": 4.9725, "step": 20740 }, { "epoch": 1.59, "learning_rate": 4.374270887807233e-05, "loss": 4.191, "step": 20745 }, { "epoch": 1.59, "learning_rate": 4.370248199847138e-05, "loss": 3.8467, "step": 20750 }, { "epoch": 1.59, "learning_rate": 4.366225511887043e-05, "loss": 3.1284, "step": 20755 }, { "epoch": 1.59, "learning_rate": 4.362202823926948e-05, "loss": 3.0203, "step": 20760 }, { "epoch": 1.59, "learning_rate": 4.358180135966853e-05, "loss": 3.1714, "step": 20765 }, { "epoch": 1.59, "learning_rate": 4.3541574480067584e-05, "loss": 1.5033, "step": 20770 }, { "epoch": 1.59, "learning_rate": 4.3501347600466635e-05, "loss": 2.4259, "step": 20775 }, { "epoch": 1.59, "learning_rate": 4.3461120720865687e-05, "loss": 3.8117, "step": 20780 }, { "epoch": 1.59, "learning_rate": 4.342089384126474e-05, "loss": 1.745, "step": 20785 }, { "epoch": 1.59, "learning_rate": 4.338066696166378e-05, "loss": 4.2043, "step": 20790 }, { "epoch": 1.59, "learning_rate": 4.334044008206284e-05, "loss": 2.6391, "step": 20795 }, { "epoch": 1.59, "learning_rate": 4.3300213202461886e-05, "loss": 3.6945, "step": 20800 }, { "epoch": 1.59, "learning_rate": 4.325998632286094e-05, "loss": 4.534, "step": 20805 }, { "epoch": 1.59, "learning_rate": 4.321975944325999e-05, "loss": 2.93, "step": 20810 }, { "epoch": 1.59, "learning_rate": 4.317953256365903e-05, "loss": 4.0686, "step": 20815 }, { "epoch": 1.59, "learning_rate": 4.313930568405809e-05, "loss": 4.2174, "step": 20820 }, { "epoch": 1.59, "learning_rate": 4.3099078804457136e-05, "loss": 2.6016, "step": 20825 }, { "epoch": 1.59, "learning_rate": 4.3058851924856194e-05, "loss": 2.1726, "step": 20830 }, { "epoch": 1.59, "learning_rate": 4.301862504525524e-05, "loss": 2.44, "step": 20835 }, { "epoch": 1.59, "learning_rate": 4.297839816565429e-05, "loss": 3.3509, "step": 20840 }, { "epoch": 1.59, "learning_rate": 4.293817128605334e-05, "loss": 3.4732, "step": 20845 }, { "epoch": 1.59, "learning_rate": 4.2897944406452394e-05, "loss": 4.4779, "step": 20850 }, { "epoch": 1.59, "learning_rate": 4.2857717526851445e-05, "loss": 3.4031, "step": 20855 }, { "epoch": 1.59, "learning_rate": 4.2817490647250497e-05, "loss": 2.8928, "step": 20860 }, { "epoch": 1.59, "learning_rate": 4.277726376764955e-05, "loss": 2.8609, "step": 20865 }, { "epoch": 1.6, "learning_rate": 4.27370368880486e-05, "loss": 2.7903, "step": 20870 }, { "epoch": 1.6, "learning_rate": 4.2696810008447644e-05, "loss": 1.7776, "step": 20875 }, { "epoch": 1.6, "learning_rate": 4.26565831288467e-05, "loss": 2.5144, "step": 20880 }, { "epoch": 1.6, "learning_rate": 4.261635624924575e-05, "loss": 1.6099, "step": 20885 }, { "epoch": 1.6, "learning_rate": 4.25761293696448e-05, "loss": 4.2301, "step": 20890 }, { "epoch": 1.6, "learning_rate": 4.253590249004385e-05, "loss": 3.3306, "step": 20895 }, { "epoch": 1.6, "learning_rate": 4.2495675610442895e-05, "loss": 3.6641, "step": 20900 }, { "epoch": 1.6, "learning_rate": 4.245544873084195e-05, "loss": 3.1027, "step": 20905 }, { "epoch": 1.6, "learning_rate": 4.2415221851241e-05, "loss": 4.1236, "step": 20910 }, { "epoch": 1.6, "learning_rate": 4.2374994971640056e-05, "loss": 1.9495, "step": 20915 }, { "epoch": 1.6, "learning_rate": 4.23347680920391e-05, "loss": 1.7783, "step": 20920 }, { "epoch": 1.6, "learning_rate": 4.229454121243815e-05, "loss": 2.336, "step": 20925 }, { "epoch": 1.6, "learning_rate": 4.2254314332837204e-05, "loss": 2.1791, "step": 20930 }, { "epoch": 1.6, "learning_rate": 4.2214087453236255e-05, "loss": 3.2233, "step": 20935 }, { "epoch": 1.6, "learning_rate": 4.2173860573635306e-05, "loss": 3.8101, "step": 20940 }, { "epoch": 1.6, "learning_rate": 4.213363369403436e-05, "loss": 3.4906, "step": 20945 }, { "epoch": 1.6, "learning_rate": 4.20934068144334e-05, "loss": 5.3133, "step": 20950 }, { "epoch": 1.6, "learning_rate": 4.205317993483246e-05, "loss": 3.0161, "step": 20955 }, { "epoch": 1.6, "learning_rate": 4.2012953055231506e-05, "loss": 3.6072, "step": 20960 }, { "epoch": 1.6, "learning_rate": 4.197272617563056e-05, "loss": 3.5251, "step": 20965 }, { "epoch": 1.6, "learning_rate": 4.193249929602961e-05, "loss": 2.6024, "step": 20970 }, { "epoch": 1.6, "learning_rate": 4.189227241642866e-05, "loss": 2.6017, "step": 20975 }, { "epoch": 1.6, "learning_rate": 4.185204553682771e-05, "loss": 3.2057, "step": 20980 }, { "epoch": 1.6, "learning_rate": 4.1811818657226756e-05, "loss": 1.6699, "step": 20985 }, { "epoch": 1.6, "learning_rate": 4.1771591777625814e-05, "loss": 4.5773, "step": 20990 }, { "epoch": 1.6, "learning_rate": 4.173136489802486e-05, "loss": 4.158, "step": 20995 }, { "epoch": 1.61, "learning_rate": 4.169113801842392e-05, "loss": 5.7055, "step": 21000 }, { "epoch": 1.61, "learning_rate": 4.165091113882296e-05, "loss": 3.018, "step": 21005 }, { "epoch": 1.61, "learning_rate": 4.1610684259222013e-05, "loss": 2.5031, "step": 21010 }, { "epoch": 1.61, "learning_rate": 4.1570457379621065e-05, "loss": 3.1937, "step": 21015 }, { "epoch": 1.61, "learning_rate": 4.1530230500020116e-05, "loss": 1.6752, "step": 21020 }, { "epoch": 1.61, "learning_rate": 4.149000362041917e-05, "loss": 1.4042, "step": 21025 }, { "epoch": 1.61, "learning_rate": 4.144977674081822e-05, "loss": 2.4617, "step": 21030 }, { "epoch": 1.61, "learning_rate": 4.1409549861217264e-05, "loss": 3.0018, "step": 21035 }, { "epoch": 1.61, "learning_rate": 4.1369322981616315e-05, "loss": 4.4467, "step": 21040 }, { "epoch": 1.61, "learning_rate": 4.132909610201537e-05, "loss": 4.9004, "step": 21045 }, { "epoch": 1.61, "learning_rate": 4.128886922241442e-05, "loss": 4.0383, "step": 21050 }, { "epoch": 1.61, "learning_rate": 4.124864234281347e-05, "loss": 4.24, "step": 21055 }, { "epoch": 1.61, "learning_rate": 4.120841546321252e-05, "loss": 3.2634, "step": 21060 }, { "epoch": 1.61, "learning_rate": 4.116818858361157e-05, "loss": 2.5876, "step": 21065 }, { "epoch": 1.61, "learning_rate": 4.112796170401062e-05, "loss": 4.6117, "step": 21070 }, { "epoch": 1.61, "learning_rate": 4.1087734824409676e-05, "loss": 0.7939, "step": 21075 }, { "epoch": 1.61, "learning_rate": 4.104750794480872e-05, "loss": 2.6782, "step": 21080 }, { "epoch": 1.61, "learning_rate": 4.100728106520778e-05, "loss": 3.213, "step": 21085 }, { "epoch": 1.61, "learning_rate": 4.096705418560682e-05, "loss": 5.3123, "step": 21090 }, { "epoch": 1.61, "learning_rate": 4.0926827306005875e-05, "loss": 3.9619, "step": 21095 }, { "epoch": 1.61, "learning_rate": 4.0886600426404926e-05, "loss": 4.3588, "step": 21100 }, { "epoch": 1.61, "learning_rate": 4.084637354680398e-05, "loss": 3.2867, "step": 21105 }, { "epoch": 1.61, "learning_rate": 4.080614666720303e-05, "loss": 2.582, "step": 21110 }, { "epoch": 1.61, "learning_rate": 4.076591978760208e-05, "loss": 1.9699, "step": 21115 }, { "epoch": 1.61, "learning_rate": 4.0725692908001125e-05, "loss": 2.4965, "step": 21120 }, { "epoch": 1.61, "learning_rate": 4.068546602840018e-05, "loss": 3.1243, "step": 21125 }, { "epoch": 1.61, "learning_rate": 4.064523914879923e-05, "loss": 1.0647, "step": 21130 }, { "epoch": 1.62, "learning_rate": 4.060501226919828e-05, "loss": 2.6103, "step": 21135 }, { "epoch": 1.62, "learning_rate": 4.056478538959733e-05, "loss": 3.5543, "step": 21140 }, { "epoch": 1.62, "learning_rate": 4.052455850999638e-05, "loss": 4.9754, "step": 21145 }, { "epoch": 1.62, "learning_rate": 4.0484331630395434e-05, "loss": 3.5666, "step": 21150 }, { "epoch": 1.62, "learning_rate": 4.044410475079448e-05, "loss": 3.3064, "step": 21155 }, { "epoch": 1.62, "learning_rate": 4.040387787119354e-05, "loss": 3.4265, "step": 21160 }, { "epoch": 1.62, "learning_rate": 4.036365099159258e-05, "loss": 2.6561, "step": 21165 }, { "epoch": 1.62, "learning_rate": 4.032342411199164e-05, "loss": 1.9494, "step": 21170 }, { "epoch": 1.62, "learning_rate": 4.0283197232390685e-05, "loss": 1.829, "step": 21175 }, { "epoch": 1.62, "learning_rate": 4.0242970352789736e-05, "loss": 2.8679, "step": 21180 }, { "epoch": 1.62, "learning_rate": 4.020274347318879e-05, "loss": 2.5059, "step": 21185 }, { "epoch": 1.62, "learning_rate": 4.016251659358784e-05, "loss": 3.6293, "step": 21190 }, { "epoch": 1.62, "learning_rate": 4.012228971398689e-05, "loss": 4.1984, "step": 21195 }, { "epoch": 1.62, "learning_rate": 4.0082062834385935e-05, "loss": 3.8107, "step": 21200 }, { "epoch": 1.62, "learning_rate": 4.004183595478499e-05, "loss": 2.8273, "step": 21205 }, { "epoch": 1.62, "learning_rate": 4.000160907518404e-05, "loss": 3.2559, "step": 21210 }, { "epoch": 1.62, "learning_rate": 3.996138219558309e-05, "loss": 2.5233, "step": 21215 }, { "epoch": 1.62, "learning_rate": 3.992115531598214e-05, "loss": 2.903, "step": 21220 }, { "epoch": 1.62, "learning_rate": 3.988092843638119e-05, "loss": 3.3081, "step": 21225 }, { "epoch": 1.62, "learning_rate": 3.9840701556780244e-05, "loss": 3.3352, "step": 21230 }, { "epoch": 1.62, "learning_rate": 3.9800474677179296e-05, "loss": 1.8759, "step": 21235 }, { "epoch": 1.62, "learning_rate": 3.976024779757834e-05, "loss": 5.8707, "step": 21240 }, { "epoch": 1.62, "learning_rate": 3.97200209179774e-05, "loss": 4.9869, "step": 21245 }, { "epoch": 1.62, "learning_rate": 3.967979403837644e-05, "loss": 4.4971, "step": 21250 }, { "epoch": 1.62, "learning_rate": 3.9639567158775495e-05, "loss": 3.6207, "step": 21255 }, { "epoch": 1.62, "learning_rate": 3.9599340279174546e-05, "loss": 2.1977, "step": 21260 }, { "epoch": 1.63, "learning_rate": 3.95591133995736e-05, "loss": 2.8388, "step": 21265 }, { "epoch": 1.63, "learning_rate": 3.951888651997265e-05, "loss": 1.8477, "step": 21270 }, { "epoch": 1.63, "learning_rate": 3.9478659640371694e-05, "loss": 1.4987, "step": 21275 }, { "epoch": 1.63, "learning_rate": 3.943843276077075e-05, "loss": 1.4276, "step": 21280 }, { "epoch": 1.63, "learning_rate": 3.93982058811698e-05, "loss": 1.3133, "step": 21285 }, { "epoch": 1.63, "learning_rate": 3.935797900156885e-05, "loss": 4.0492, "step": 21290 }, { "epoch": 1.63, "learning_rate": 3.93177521219679e-05, "loss": 4.2029, "step": 21295 }, { "epoch": 1.63, "learning_rate": 3.927752524236695e-05, "loss": 3.3377, "step": 21300 }, { "epoch": 1.63, "learning_rate": 3.9237298362766e-05, "loss": 4.1035, "step": 21305 }, { "epoch": 1.63, "learning_rate": 3.9197071483165054e-05, "loss": 4.1178, "step": 21310 }, { "epoch": 1.63, "learning_rate": 3.9156844603564105e-05, "loss": 2.9779, "step": 21315 }, { "epoch": 1.63, "learning_rate": 3.911661772396316e-05, "loss": 1.2614, "step": 21320 }, { "epoch": 1.63, "learning_rate": 3.90763908443622e-05, "loss": 1.6433, "step": 21325 }, { "epoch": 1.63, "learning_rate": 3.903616396476126e-05, "loss": 1.6593, "step": 21330 }, { "epoch": 1.63, "learning_rate": 3.8995937085160305e-05, "loss": 4.43, "step": 21335 }, { "epoch": 1.63, "learning_rate": 3.8955710205559356e-05, "loss": 5.0719, "step": 21340 }, { "epoch": 1.63, "learning_rate": 3.891548332595841e-05, "loss": 3.8008, "step": 21345 }, { "epoch": 1.63, "learning_rate": 3.887525644635745e-05, "loss": 4.433, "step": 21350 }, { "epoch": 1.63, "learning_rate": 3.883502956675651e-05, "loss": 3.3838, "step": 21355 }, { "epoch": 1.63, "learning_rate": 3.8794802687155555e-05, "loss": 3.7695, "step": 21360 }, { "epoch": 1.63, "learning_rate": 3.875457580755461e-05, "loss": 2.0561, "step": 21365 }, { "epoch": 1.63, "learning_rate": 3.871434892795366e-05, "loss": 2.6766, "step": 21370 }, { "epoch": 1.63, "learning_rate": 3.867412204835271e-05, "loss": 2.1337, "step": 21375 }, { "epoch": 1.63, "learning_rate": 3.863389516875176e-05, "loss": 2.6355, "step": 21380 }, { "epoch": 1.63, "learning_rate": 3.859366828915081e-05, "loss": 1.5269, "step": 21385 }, { "epoch": 1.63, "learning_rate": 3.8553441409549864e-05, "loss": 4.2592, "step": 21390 }, { "epoch": 1.64, "learning_rate": 3.8513214529948915e-05, "loss": 4.6906, "step": 21395 }, { "epoch": 1.64, "learning_rate": 3.847298765034797e-05, "loss": 3.7479, "step": 21400 }, { "epoch": 1.64, "learning_rate": 3.843276077074702e-05, "loss": 4.2096, "step": 21405 }, { "epoch": 1.64, "learning_rate": 3.839253389114606e-05, "loss": 3.7812, "step": 21410 }, { "epoch": 1.64, "learning_rate": 3.835230701154512e-05, "loss": 5.1412, "step": 21415 }, { "epoch": 1.64, "learning_rate": 3.8312080131944166e-05, "loss": 2.5208, "step": 21420 }, { "epoch": 1.64, "learning_rate": 3.827185325234322e-05, "loss": 1.2869, "step": 21425 }, { "epoch": 1.64, "learning_rate": 3.823162637274227e-05, "loss": 1.4612, "step": 21430 }, { "epoch": 1.64, "learning_rate": 3.8191399493141314e-05, "loss": 4.1741, "step": 21435 }, { "epoch": 1.64, "learning_rate": 3.815117261354037e-05, "loss": 4.3588, "step": 21440 }, { "epoch": 1.64, "learning_rate": 3.8110945733939417e-05, "loss": 3.8693, "step": 21445 }, { "epoch": 1.64, "learning_rate": 3.8070718854338475e-05, "loss": 3.3549, "step": 21450 }, { "epoch": 1.64, "learning_rate": 3.803049197473752e-05, "loss": 3.8469, "step": 21455 }, { "epoch": 1.64, "learning_rate": 3.799026509513657e-05, "loss": 2.6488, "step": 21460 }, { "epoch": 1.64, "learning_rate": 3.795003821553562e-05, "loss": 4.3189, "step": 21465 }, { "epoch": 1.64, "learning_rate": 3.7909811335934674e-05, "loss": 2.1022, "step": 21470 }, { "epoch": 1.64, "learning_rate": 3.7869584456333725e-05, "loss": 3.1552, "step": 21475 }, { "epoch": 1.64, "learning_rate": 3.782935757673278e-05, "loss": 0.6143, "step": 21480 }, { "epoch": 1.64, "learning_rate": 3.778913069713182e-05, "loss": 2.1666, "step": 21485 }, { "epoch": 1.64, "learning_rate": 3.774890381753088e-05, "loss": 4.232, "step": 21490 }, { "epoch": 1.64, "learning_rate": 3.7708676937929924e-05, "loss": 4.3281, "step": 21495 }, { "epoch": 1.64, "learning_rate": 3.7668450058328976e-05, "loss": 4.859, "step": 21500 }, { "epoch": 1.64, "learning_rate": 3.762822317872803e-05, "loss": 3.485, "step": 21505 }, { "epoch": 1.64, "learning_rate": 3.758799629912708e-05, "loss": 3.0216, "step": 21510 }, { "epoch": 1.64, "learning_rate": 3.754776941952613e-05, "loss": 2.9945, "step": 21515 }, { "epoch": 1.64, "learning_rate": 3.7507542539925175e-05, "loss": 3.9133, "step": 21520 }, { "epoch": 1.65, "learning_rate": 3.746731566032423e-05, "loss": 1.9285, "step": 21525 }, { "epoch": 1.65, "learning_rate": 3.742708878072328e-05, "loss": 2.2346, "step": 21530 }, { "epoch": 1.65, "learning_rate": 3.7386861901122336e-05, "loss": 2.376, "step": 21535 }, { "epoch": 1.65, "learning_rate": 3.734663502152138e-05, "loss": 5.018, "step": 21540 }, { "epoch": 1.65, "learning_rate": 3.730640814192043e-05, "loss": 3.4057, "step": 21545 }, { "epoch": 1.65, "learning_rate": 3.7266181262319484e-05, "loss": 4.6818, "step": 21550 }, { "epoch": 1.65, "learning_rate": 3.7225954382718535e-05, "loss": 3.9047, "step": 21555 }, { "epoch": 1.65, "learning_rate": 3.718572750311759e-05, "loss": 3.6093, "step": 21560 }, { "epoch": 1.65, "learning_rate": 3.714550062351664e-05, "loss": 4.4719, "step": 21565 }, { "epoch": 1.65, "learning_rate": 3.710527374391568e-05, "loss": 3.2473, "step": 21570 }, { "epoch": 1.65, "learning_rate": 3.7065046864314734e-05, "loss": 2.2506, "step": 21575 }, { "epoch": 1.65, "learning_rate": 3.7024819984713786e-05, "loss": 2.268, "step": 21580 }, { "epoch": 1.65, "learning_rate": 3.698459310511284e-05, "loss": 3.6383, "step": 21585 }, { "epoch": 1.65, "learning_rate": 3.694436622551189e-05, "loss": 4.4379, "step": 21590 }, { "epoch": 1.65, "learning_rate": 3.690413934591094e-05, "loss": 4.248, "step": 21595 }, { "epoch": 1.65, "learning_rate": 3.686391246630999e-05, "loss": 4.4307, "step": 21600 }, { "epoch": 1.65, "learning_rate": 3.6823685586709036e-05, "loss": 3.4584, "step": 21605 }, { "epoch": 1.65, "learning_rate": 3.6783458707108095e-05, "loss": 4.1742, "step": 21610 }, { "epoch": 1.65, "learning_rate": 3.674323182750714e-05, "loss": 5.0411, "step": 21615 }, { "epoch": 1.65, "learning_rate": 3.67030049479062e-05, "loss": 2.3921, "step": 21620 }, { "epoch": 1.65, "learning_rate": 3.666277806830524e-05, "loss": 2.7103, "step": 21625 }, { "epoch": 1.65, "learning_rate": 3.6622551188704294e-05, "loss": 2.1656, "step": 21630 }, { "epoch": 1.65, "learning_rate": 3.6582324309103345e-05, "loss": 1.949, "step": 21635 }, { "epoch": 1.65, "learning_rate": 3.65420974295024e-05, "loss": 2.9959, "step": 21640 }, { "epoch": 1.65, "learning_rate": 3.650187054990145e-05, "loss": 3.8207, "step": 21645 }, { "epoch": 1.65, "learning_rate": 3.646164367030049e-05, "loss": 3.8807, "step": 21650 }, { "epoch": 1.66, "learning_rate": 3.6421416790699544e-05, "loss": 4.1562, "step": 21655 }, { "epoch": 1.66, "learning_rate": 3.6381189911098596e-05, "loss": 3.2987, "step": 21660 }, { "epoch": 1.66, "learning_rate": 3.634096303149765e-05, "loss": 2.7125, "step": 21665 }, { "epoch": 1.66, "learning_rate": 3.63007361518967e-05, "loss": 2.6132, "step": 21670 }, { "epoch": 1.66, "learning_rate": 3.626050927229575e-05, "loss": 2.0622, "step": 21675 }, { "epoch": 1.66, "learning_rate": 3.62202823926948e-05, "loss": 2.7405, "step": 21680 }, { "epoch": 1.66, "learning_rate": 3.618005551309385e-05, "loss": 2.9218, "step": 21685 }, { "epoch": 1.66, "learning_rate": 3.61398286334929e-05, "loss": 4.7357, "step": 21690 }, { "epoch": 1.66, "learning_rate": 3.6099601753891956e-05, "loss": 4.2771, "step": 21695 }, { "epoch": 1.66, "learning_rate": 3.6059374874291e-05, "loss": 4.242, "step": 21700 }, { "epoch": 1.66, "learning_rate": 3.601914799469006e-05, "loss": 4.1357, "step": 21705 }, { "epoch": 1.66, "learning_rate": 3.5978921115089104e-05, "loss": 2.1266, "step": 21710 }, { "epoch": 1.66, "learning_rate": 3.5938694235488155e-05, "loss": 4.3561, "step": 21715 }, { "epoch": 1.66, "learning_rate": 3.5898467355887207e-05, "loss": 2.5493, "step": 21720 }, { "epoch": 1.66, "learning_rate": 3.585824047628626e-05, "loss": 3.3787, "step": 21725 }, { "epoch": 1.66, "learning_rate": 3.581801359668531e-05, "loss": 2.3158, "step": 21730 }, { "epoch": 1.66, "learning_rate": 3.5777786717084354e-05, "loss": 0.401, "step": 21735 }, { "epoch": 1.66, "learning_rate": 3.5737559837483406e-05, "loss": 4.6857, "step": 21740 }, { "epoch": 1.66, "learning_rate": 3.569733295788246e-05, "loss": 4.5549, "step": 21745 }, { "epoch": 1.66, "learning_rate": 3.565710607828151e-05, "loss": 4.2082, "step": 21750 }, { "epoch": 1.66, "learning_rate": 3.561687919868056e-05, "loss": 3.7506, "step": 21755 }, { "epoch": 1.66, "learning_rate": 3.557665231907961e-05, "loss": 3.4496, "step": 21760 }, { "epoch": 1.66, "learning_rate": 3.553642543947866e-05, "loss": 3.6854, "step": 21765 }, { "epoch": 1.66, "learning_rate": 3.5496198559877714e-05, "loss": 2.3951, "step": 21770 }, { "epoch": 1.66, "learning_rate": 3.545597168027676e-05, "loss": 1.496, "step": 21775 }, { "epoch": 1.66, "learning_rate": 3.541574480067582e-05, "loss": 2.3501, "step": 21780 }, { "epoch": 1.67, "learning_rate": 3.537551792107486e-05, "loss": 2.6589, "step": 21785 }, { "epoch": 1.67, "learning_rate": 3.5335291041473914e-05, "loss": 4.2689, "step": 21790 }, { "epoch": 1.67, "learning_rate": 3.5295064161872965e-05, "loss": 3.6809, "step": 21795 }, { "epoch": 1.67, "learning_rate": 3.5254837282272016e-05, "loss": 3.1069, "step": 21800 }, { "epoch": 1.67, "learning_rate": 3.521461040267107e-05, "loss": 2.6169, "step": 21805 }, { "epoch": 1.67, "learning_rate": 3.517438352307011e-05, "loss": 2.609, "step": 21810 }, { "epoch": 1.67, "learning_rate": 3.513415664346917e-05, "loss": 1.233, "step": 21815 }, { "epoch": 1.67, "learning_rate": 3.5093929763868216e-05, "loss": 3.612, "step": 21820 }, { "epoch": 1.67, "learning_rate": 3.505370288426727e-05, "loss": 2.2503, "step": 21825 }, { "epoch": 1.67, "learning_rate": 3.501347600466632e-05, "loss": 2.9724, "step": 21830 }, { "epoch": 1.67, "learning_rate": 3.497324912506537e-05, "loss": 1.8672, "step": 21835 }, { "epoch": 1.67, "learning_rate": 3.493302224546442e-05, "loss": 5.3764, "step": 21840 }, { "epoch": 1.67, "learning_rate": 3.489279536586347e-05, "loss": 4.4464, "step": 21845 }, { "epoch": 1.67, "learning_rate": 3.4852568486262524e-05, "loss": 3.6959, "step": 21850 }, { "epoch": 1.67, "learning_rate": 3.4812341606661576e-05, "loss": 3.6832, "step": 21855 }, { "epoch": 1.67, "learning_rate": 3.477211472706062e-05, "loss": 3.5953, "step": 21860 }, { "epoch": 1.67, "learning_rate": 3.473188784745968e-05, "loss": 3.1239, "step": 21865 }, { "epoch": 1.67, "learning_rate": 3.4691660967858723e-05, "loss": 2.1076, "step": 21870 }, { "epoch": 1.67, "learning_rate": 3.4651434088257775e-05, "loss": 3.3783, "step": 21875 }, { "epoch": 1.67, "learning_rate": 3.4611207208656826e-05, "loss": 1.4034, "step": 21880 }, { "epoch": 1.67, "learning_rate": 3.457098032905587e-05, "loss": 1.7014, "step": 21885 }, { "epoch": 1.67, "learning_rate": 3.453075344945493e-05, "loss": 5.3859, "step": 21890 }, { "epoch": 1.67, "learning_rate": 3.4490526569853974e-05, "loss": 4.724, "step": 21895 }, { "epoch": 1.67, "learning_rate": 3.445029969025303e-05, "loss": 3.8088, "step": 21900 }, { "epoch": 1.67, "learning_rate": 3.441007281065208e-05, "loss": 3.1448, "step": 21905 }, { "epoch": 1.67, "learning_rate": 3.436984593105113e-05, "loss": 3.1462, "step": 21910 }, { "epoch": 1.67, "learning_rate": 3.432961905145018e-05, "loss": 2.371, "step": 21915 }, { "epoch": 1.68, "learning_rate": 3.428939217184923e-05, "loss": 2.8063, "step": 21920 }, { "epoch": 1.68, "learning_rate": 3.424916529224828e-05, "loss": 2.514, "step": 21925 }, { "epoch": 1.68, "learning_rate": 3.4208938412647334e-05, "loss": 3.2729, "step": 21930 }, { "epoch": 1.68, "learning_rate": 3.4168711533046386e-05, "loss": 2.0309, "step": 21935 }, { "epoch": 1.68, "learning_rate": 3.412848465344544e-05, "loss": 3.9336, "step": 21940 }, { "epoch": 1.68, "learning_rate": 3.408825777384448e-05, "loss": 4.7568, "step": 21945 }, { "epoch": 1.68, "learning_rate": 3.404803089424354e-05, "loss": 3.4958, "step": 21950 }, { "epoch": 1.68, "learning_rate": 3.4007804014642585e-05, "loss": 3.6162, "step": 21955 }, { "epoch": 1.68, "learning_rate": 3.3967577135041636e-05, "loss": 3.7423, "step": 21960 }, { "epoch": 1.68, "learning_rate": 3.392735025544069e-05, "loss": 1.3449, "step": 21965 }, { "epoch": 1.68, "learning_rate": 3.388712337583973e-05, "loss": 2.8116, "step": 21970 }, { "epoch": 1.68, "learning_rate": 3.384689649623879e-05, "loss": 2.0983, "step": 21975 }, { "epoch": 1.68, "learning_rate": 3.3806669616637835e-05, "loss": 3.2099, "step": 21980 }, { "epoch": 1.68, "learning_rate": 3.3766442737036894e-05, "loss": 1.4468, "step": 21985 }, { "epoch": 1.68, "learning_rate": 3.372621585743594e-05, "loss": 4.3094, "step": 21990 }, { "epoch": 1.68, "learning_rate": 3.368598897783499e-05, "loss": 2.935, "step": 21995 }, { "epoch": 1.68, "learning_rate": 3.364576209823404e-05, "loss": 4.1764, "step": 22000 }, { "epoch": 1.68, "learning_rate": 3.360553521863309e-05, "loss": 4.1816, "step": 22005 }, { "epoch": 1.68, "learning_rate": 3.3565308339032144e-05, "loss": 2.4688, "step": 22010 }, { "epoch": 1.68, "learning_rate": 3.3525081459431196e-05, "loss": 3.324, "step": 22015 }, { "epoch": 1.68, "learning_rate": 3.348485457983024e-05, "loss": 3.443, "step": 22020 }, { "epoch": 1.68, "learning_rate": 3.34446277002293e-05, "loss": 2.2952, "step": 22025 }, { "epoch": 1.68, "learning_rate": 3.340440082062834e-05, "loss": 2.7904, "step": 22030 }, { "epoch": 1.68, "learning_rate": 3.3364173941027395e-05, "loss": 3.2367, "step": 22035 }, { "epoch": 1.68, "learning_rate": 3.3323947061426446e-05, "loss": 3.6912, "step": 22040 }, { "epoch": 1.68, "learning_rate": 3.32837201818255e-05, "loss": 4.3529, "step": 22045 }, { "epoch": 1.69, "learning_rate": 3.324349330222455e-05, "loss": 4.1754, "step": 22050 }, { "epoch": 1.69, "learning_rate": 3.3203266422623594e-05, "loss": 4.0643, "step": 22055 }, { "epoch": 1.69, "learning_rate": 3.316303954302265e-05, "loss": 3.0192, "step": 22060 }, { "epoch": 1.69, "learning_rate": 3.31228126634217e-05, "loss": 3.0924, "step": 22065 }, { "epoch": 1.69, "learning_rate": 3.3082585783820755e-05, "loss": 2.415, "step": 22070 }, { "epoch": 1.69, "learning_rate": 3.30423589042198e-05, "loss": 3.3243, "step": 22075 }, { "epoch": 1.69, "learning_rate": 3.300213202461885e-05, "loss": 1.2963, "step": 22080 }, { "epoch": 1.69, "learning_rate": 3.29619051450179e-05, "loss": 3.9981, "step": 22085 }, { "epoch": 1.69, "learning_rate": 3.2921678265416954e-05, "loss": 3.3592, "step": 22090 }, { "epoch": 1.69, "learning_rate": 3.2881451385816006e-05, "loss": 3.4289, "step": 22095 }, { "epoch": 1.69, "learning_rate": 3.284122450621506e-05, "loss": 3.7771, "step": 22100 }, { "epoch": 1.69, "learning_rate": 3.28009976266141e-05, "loss": 3.6328, "step": 22105 }, { "epoch": 1.69, "learning_rate": 3.276077074701315e-05, "loss": 3.0618, "step": 22110 }, { "epoch": 1.69, "learning_rate": 3.2720543867412205e-05, "loss": 2.6314, "step": 22115 }, { "epoch": 1.69, "learning_rate": 3.2680316987811256e-05, "loss": 1.7609, "step": 22120 }, { "epoch": 1.69, "learning_rate": 3.264009010821031e-05, "loss": 1.2902, "step": 22125 }, { "epoch": 1.69, "learning_rate": 3.259986322860936e-05, "loss": 0.4134, "step": 22130 }, { "epoch": 1.69, "learning_rate": 3.255963634900841e-05, "loss": 2.865, "step": 22135 }, { "epoch": 1.69, "learning_rate": 3.2519409469407455e-05, "loss": 4.5992, "step": 22140 }, { "epoch": 1.69, "learning_rate": 3.2479182589806514e-05, "loss": 4.1874, "step": 22145 }, { "epoch": 1.69, "learning_rate": 3.243895571020556e-05, "loss": 2.4801, "step": 22150 }, { "epoch": 1.69, "learning_rate": 3.2398728830604616e-05, "loss": 3.9021, "step": 22155 }, { "epoch": 1.69, "learning_rate": 3.235850195100366e-05, "loss": 3.6825, "step": 22160 }, { "epoch": 1.69, "learning_rate": 3.231827507140271e-05, "loss": 3.2391, "step": 22165 }, { "epoch": 1.69, "learning_rate": 3.2278048191801764e-05, "loss": 3.5101, "step": 22170 }, { "epoch": 1.69, "learning_rate": 3.2237821312200816e-05, "loss": 3.7007, "step": 22175 }, { "epoch": 1.7, "learning_rate": 3.219759443259987e-05, "loss": 1.1778, "step": 22180 }, { "epoch": 1.7, "learning_rate": 3.215736755299891e-05, "loss": 0.9388, "step": 22185 }, { "epoch": 1.7, "learning_rate": 3.211714067339796e-05, "loss": 4.8832, "step": 22190 }, { "epoch": 1.7, "learning_rate": 3.2076913793797015e-05, "loss": 3.309, "step": 22195 }, { "epoch": 1.7, "learning_rate": 3.2036686914196066e-05, "loss": 4.3115, "step": 22200 }, { "epoch": 1.7, "learning_rate": 3.199646003459512e-05, "loss": 4.6436, "step": 22205 }, { "epoch": 1.7, "learning_rate": 3.195623315499417e-05, "loss": 3.9236, "step": 22210 }, { "epoch": 1.7, "learning_rate": 3.191600627539322e-05, "loss": 2.4813, "step": 22215 }, { "epoch": 1.7, "learning_rate": 3.187577939579227e-05, "loss": 1.7186, "step": 22220 }, { "epoch": 1.7, "learning_rate": 3.183555251619132e-05, "loss": 3.0956, "step": 22225 }, { "epoch": 1.7, "learning_rate": 3.1795325636590375e-05, "loss": 0.8746, "step": 22230 }, { "epoch": 1.7, "learning_rate": 3.175509875698942e-05, "loss": 2.345, "step": 22235 }, { "epoch": 1.7, "learning_rate": 3.171487187738848e-05, "loss": 3.7502, "step": 22240 }, { "epoch": 1.7, "learning_rate": 3.167464499778752e-05, "loss": 4.0754, "step": 22245 }, { "epoch": 1.7, "learning_rate": 3.1634418118186574e-05, "loss": 5.118, "step": 22250 }, { "epoch": 1.7, "learning_rate": 3.1594191238585625e-05, "loss": 5.1141, "step": 22255 }, { "epoch": 1.7, "learning_rate": 3.155396435898468e-05, "loss": 3.1888, "step": 22260 }, { "epoch": 1.7, "learning_rate": 3.151373747938373e-05, "loss": 2.4457, "step": 22265 }, { "epoch": 1.7, "learning_rate": 3.147351059978277e-05, "loss": 3.7101, "step": 22270 }, { "epoch": 1.7, "learning_rate": 3.1433283720181825e-05, "loss": 2.8647, "step": 22275 }, { "epoch": 1.7, "learning_rate": 3.1393056840580876e-05, "loss": 2.2749, "step": 22280 }, { "epoch": 1.7, "learning_rate": 3.135282996097993e-05, "loss": 2.0576, "step": 22285 }, { "epoch": 1.7, "learning_rate": 3.131260308137898e-05, "loss": 4.5924, "step": 22290 }, { "epoch": 1.7, "learning_rate": 3.127237620177803e-05, "loss": 3.8521, "step": 22295 }, { "epoch": 1.7, "learning_rate": 3.123214932217708e-05, "loss": 3.3519, "step": 22300 }, { "epoch": 1.7, "learning_rate": 3.119192244257613e-05, "loss": 3.1276, "step": 22305 }, { "epoch": 1.71, "learning_rate": 3.115169556297518e-05, "loss": 3.8262, "step": 22310 }, { "epoch": 1.71, "learning_rate": 3.1111468683374236e-05, "loss": 2.2402, "step": 22315 }, { "epoch": 1.71, "learning_rate": 3.107124180377328e-05, "loss": 2.7102, "step": 22320 }, { "epoch": 1.71, "learning_rate": 3.103101492417233e-05, "loss": 1.8521, "step": 22325 }, { "epoch": 1.71, "learning_rate": 3.0990788044571384e-05, "loss": 1.5898, "step": 22330 }, { "epoch": 1.71, "learning_rate": 3.0950561164970435e-05, "loss": 3.3699, "step": 22335 }, { "epoch": 1.71, "learning_rate": 3.091033428536949e-05, "loss": 3.958, "step": 22340 }, { "epoch": 1.71, "learning_rate": 3.087010740576853e-05, "loss": 4.5051, "step": 22345 }, { "epoch": 1.71, "learning_rate": 3.082988052616759e-05, "loss": 3.4871, "step": 22350 }, { "epoch": 1.71, "learning_rate": 3.0789653646566634e-05, "loss": 2.7721, "step": 22355 }, { "epoch": 1.71, "learning_rate": 3.0749426766965686e-05, "loss": 3.9631, "step": 22360 }, { "epoch": 1.71, "learning_rate": 3.070919988736474e-05, "loss": 4.0345, "step": 22365 }, { "epoch": 1.71, "learning_rate": 3.066897300776379e-05, "loss": 2.9204, "step": 22370 }, { "epoch": 1.71, "learning_rate": 3.062874612816284e-05, "loss": 2.6203, "step": 22375 }, { "epoch": 1.71, "learning_rate": 3.058851924856189e-05, "loss": 2.0655, "step": 22380 }, { "epoch": 1.71, "learning_rate": 3.054829236896094e-05, "loss": 2.9641, "step": 22385 }, { "epoch": 1.71, "learning_rate": 3.0508065489359995e-05, "loss": 3.8604, "step": 22390 }, { "epoch": 1.71, "learning_rate": 3.0467838609759043e-05, "loss": 4.3584, "step": 22395 }, { "epoch": 1.71, "learning_rate": 3.0427611730158094e-05, "loss": 4.0123, "step": 22400 }, { "epoch": 1.71, "learning_rate": 3.0387384850557142e-05, "loss": 4.0429, "step": 22405 }, { "epoch": 1.71, "learning_rate": 3.0347157970956197e-05, "loss": 3.8104, "step": 22410 }, { "epoch": 1.71, "learning_rate": 3.0306931091355245e-05, "loss": 2.3843, "step": 22415 }, { "epoch": 1.71, "learning_rate": 3.0266704211754293e-05, "loss": 2.1063, "step": 22420 }, { "epoch": 1.71, "learning_rate": 3.0226477332153348e-05, "loss": 2.0419, "step": 22425 }, { "epoch": 1.71, "learning_rate": 3.0186250452552396e-05, "loss": 1.0568, "step": 22430 }, { "epoch": 1.71, "learning_rate": 3.015406894887164e-05, "loss": 3.2819, "step": 22435 }, { "epoch": 1.72, "learning_rate": 3.0113842069270688e-05, "loss": 5.0303, "step": 22440 }, { "epoch": 1.72, "learning_rate": 3.007361518966974e-05, "loss": 3.7967, "step": 22445 }, { "epoch": 1.72, "learning_rate": 3.0033388310068787e-05, "loss": 3.6611, "step": 22450 }, { "epoch": 1.72, "learning_rate": 2.9993161430467842e-05, "loss": 3.6416, "step": 22455 }, { "epoch": 1.72, "learning_rate": 2.995293455086689e-05, "loss": 1.9096, "step": 22460 }, { "epoch": 1.72, "learning_rate": 2.9912707671265945e-05, "loss": 1.793, "step": 22465 }, { "epoch": 1.72, "learning_rate": 2.9872480791664993e-05, "loss": 2.3129, "step": 22470 }, { "epoch": 1.72, "learning_rate": 2.983225391206404e-05, "loss": 0.621, "step": 22475 }, { "epoch": 1.72, "learning_rate": 2.9792027032463093e-05, "loss": 1.0425, "step": 22480 }, { "epoch": 1.72, "learning_rate": 2.975180015286214e-05, "loss": 2.1539, "step": 22485 }, { "epoch": 1.72, "learning_rate": 2.9711573273261196e-05, "loss": 4.5879, "step": 22490 }, { "epoch": 1.72, "learning_rate": 2.9671346393660244e-05, "loss": 5.5826, "step": 22495 }, { "epoch": 1.72, "learning_rate": 2.96311195140593e-05, "loss": 4.015, "step": 22500 }, { "epoch": 1.72, "learning_rate": 2.9590892634458343e-05, "loss": 3.9393, "step": 22505 }, { "epoch": 1.72, "learning_rate": 2.9550665754857398e-05, "loss": 4.0424, "step": 22510 }, { "epoch": 1.72, "learning_rate": 2.9510438875256446e-05, "loss": 2.3775, "step": 22515 }, { "epoch": 1.72, "learning_rate": 2.94702119956555e-05, "loss": 3.9154, "step": 22520 }, { "epoch": 1.72, "learning_rate": 2.942998511605455e-05, "loss": 3.6949, "step": 22525 }, { "epoch": 1.72, "learning_rate": 2.93897582364536e-05, "loss": 3.6259, "step": 22530 }, { "epoch": 1.72, "learning_rate": 2.934953135685265e-05, "loss": 2.0479, "step": 22535 }, { "epoch": 1.72, "learning_rate": 2.9309304477251703e-05, "loss": 4.7387, "step": 22540 }, { "epoch": 1.72, "learning_rate": 2.926907759765075e-05, "loss": 4.1223, "step": 22545 }, { "epoch": 1.72, "learning_rate": 2.9228850718049806e-05, "loss": 3.8104, "step": 22550 }, { "epoch": 1.72, "learning_rate": 2.9188623838448854e-05, "loss": 2.8752, "step": 22555 }, { "epoch": 1.72, "learning_rate": 2.9148396958847902e-05, "loss": 3.5936, "step": 22560 }, { "epoch": 1.72, "learning_rate": 2.9108170079246954e-05, "loss": 3.1607, "step": 22565 }, { "epoch": 1.73, "learning_rate": 2.9067943199646002e-05, "loss": 2.8457, "step": 22570 }, { "epoch": 1.73, "learning_rate": 2.9027716320045057e-05, "loss": 2.4739, "step": 22575 }, { "epoch": 1.73, "learning_rate": 2.8987489440444105e-05, "loss": 3.7004, "step": 22580 }, { "epoch": 1.73, "learning_rate": 2.8947262560843156e-05, "loss": 2.4516, "step": 22585 }, { "epoch": 1.73, "learning_rate": 2.8907035681242205e-05, "loss": 4.2662, "step": 22590 }, { "epoch": 1.73, "learning_rate": 2.886680880164126e-05, "loss": 3.1941, "step": 22595 }, { "epoch": 1.73, "learning_rate": 2.8826581922040307e-05, "loss": 4.3604, "step": 22600 }, { "epoch": 1.73, "learning_rate": 2.8786355042439362e-05, "loss": 4.5975, "step": 22605 }, { "epoch": 1.73, "learning_rate": 2.874612816283841e-05, "loss": 2.4618, "step": 22610 }, { "epoch": 1.73, "learning_rate": 2.8705901283237462e-05, "loss": 2.5024, "step": 22615 }, { "epoch": 1.73, "learning_rate": 2.866567440363651e-05, "loss": 2.0588, "step": 22620 }, { "epoch": 1.73, "learning_rate": 2.8625447524035565e-05, "loss": 2.6591, "step": 22625 }, { "epoch": 1.73, "learning_rate": 2.8585220644434613e-05, "loss": 1.1996, "step": 22630 }, { "epoch": 1.73, "learning_rate": 2.854499376483366e-05, "loss": 1.5975, "step": 22635 }, { "epoch": 1.73, "learning_rate": 2.8504766885232716e-05, "loss": 4.0307, "step": 22640 }, { "epoch": 1.73, "learning_rate": 2.8464540005631764e-05, "loss": 4.334, "step": 22645 }, { "epoch": 1.73, "learning_rate": 2.8424313126030815e-05, "loss": 3.3168, "step": 22650 }, { "epoch": 1.73, "learning_rate": 2.8384086246429863e-05, "loss": 3.9016, "step": 22655 }, { "epoch": 1.73, "learning_rate": 2.8343859366828918e-05, "loss": 2.4265, "step": 22660 }, { "epoch": 1.73, "learning_rate": 2.8303632487227966e-05, "loss": 2.7111, "step": 22665 }, { "epoch": 1.73, "learning_rate": 2.8263405607627018e-05, "loss": 2.3812, "step": 22670 }, { "epoch": 1.73, "learning_rate": 2.8223178728026066e-05, "loss": 2.0577, "step": 22675 }, { "epoch": 1.73, "learning_rate": 2.818295184842512e-05, "loss": 1.1668, "step": 22680 }, { "epoch": 1.73, "learning_rate": 2.814272496882417e-05, "loss": 2.8699, "step": 22685 }, { "epoch": 1.73, "learning_rate": 2.8102498089223224e-05, "loss": 4.2182, "step": 22690 }, { "epoch": 1.73, "learning_rate": 2.8062271209622272e-05, "loss": 4.5916, "step": 22695 }, { "epoch": 1.73, "learning_rate": 2.8022044330021323e-05, "loss": 4.7451, "step": 22700 }, { "epoch": 1.74, "learning_rate": 2.798181745042037e-05, "loss": 4.1365, "step": 22705 }, { "epoch": 1.74, "learning_rate": 2.794159057081942e-05, "loss": 2.2262, "step": 22710 }, { "epoch": 1.74, "learning_rate": 2.7901363691218474e-05, "loss": 2.2605, "step": 22715 }, { "epoch": 1.74, "learning_rate": 2.7861136811617522e-05, "loss": 3.7399, "step": 22720 }, { "epoch": 1.74, "learning_rate": 2.7820909932016577e-05, "loss": 2.6796, "step": 22725 }, { "epoch": 1.74, "learning_rate": 2.7780683052415625e-05, "loss": 2.1081, "step": 22730 }, { "epoch": 1.74, "learning_rate": 2.7740456172814677e-05, "loss": 3.9569, "step": 22735 }, { "epoch": 1.74, "learning_rate": 2.7700229293213725e-05, "loss": 5.2086, "step": 22740 }, { "epoch": 1.74, "learning_rate": 2.766000241361278e-05, "loss": 4.1189, "step": 22745 }, { "epoch": 1.74, "learning_rate": 2.7619775534011828e-05, "loss": 3.4363, "step": 22750 }, { "epoch": 1.74, "learning_rate": 2.757954865441088e-05, "loss": 4.1549, "step": 22755 }, { "epoch": 1.74, "learning_rate": 2.7539321774809927e-05, "loss": 3.4964, "step": 22760 }, { "epoch": 1.74, "learning_rate": 2.7499094895208982e-05, "loss": 3.3692, "step": 22765 }, { "epoch": 1.74, "learning_rate": 2.745886801560803e-05, "loss": 1.6907, "step": 22770 }, { "epoch": 1.74, "learning_rate": 2.7418641136007085e-05, "loss": 1.4017, "step": 22775 }, { "epoch": 1.74, "learning_rate": 2.7378414256406133e-05, "loss": 1.4608, "step": 22780 }, { "epoch": 1.74, "learning_rate": 2.733818737680518e-05, "loss": 2.8462, "step": 22785 }, { "epoch": 1.74, "learning_rate": 2.7297960497204233e-05, "loss": 4.6463, "step": 22790 }, { "epoch": 1.74, "learning_rate": 2.725773361760328e-05, "loss": 4.1804, "step": 22795 }, { "epoch": 1.74, "learning_rate": 2.7217506738002336e-05, "loss": 3.4373, "step": 22800 }, { "epoch": 1.74, "learning_rate": 2.7177279858401384e-05, "loss": 2.9582, "step": 22805 }, { "epoch": 1.74, "learning_rate": 2.7137052978800435e-05, "loss": 3.3782, "step": 22810 }, { "epoch": 1.74, "learning_rate": 2.7096826099199483e-05, "loss": 3.1693, "step": 22815 }, { "epoch": 1.74, "learning_rate": 2.7056599219598538e-05, "loss": 2.3896, "step": 22820 }, { "epoch": 1.74, "learning_rate": 2.7016372339997586e-05, "loss": 2.9684, "step": 22825 }, { "epoch": 1.74, "learning_rate": 2.697614546039664e-05, "loss": 2.6333, "step": 22830 }, { "epoch": 1.75, "learning_rate": 2.693591858079569e-05, "loss": 4.8025, "step": 22835 }, { "epoch": 1.75, "learning_rate": 2.689569170119474e-05, "loss": 3.923, "step": 22840 }, { "epoch": 1.75, "learning_rate": 2.685546482159379e-05, "loss": 3.8394, "step": 22845 }, { "epoch": 1.75, "learning_rate": 2.6815237941992844e-05, "loss": 4.2166, "step": 22850 }, { "epoch": 1.75, "learning_rate": 2.677501106239189e-05, "loss": 3.2895, "step": 22855 }, { "epoch": 1.75, "learning_rate": 2.673478418279094e-05, "loss": 3.2745, "step": 22860 }, { "epoch": 1.75, "learning_rate": 2.6694557303189995e-05, "loss": 2.9658, "step": 22865 }, { "epoch": 1.75, "learning_rate": 2.6654330423589043e-05, "loss": 2.6981, "step": 22870 }, { "epoch": 1.75, "learning_rate": 2.6614103543988094e-05, "loss": 1.1102, "step": 22875 }, { "epoch": 1.75, "learning_rate": 2.6573876664387142e-05, "loss": 0.2825, "step": 22880 }, { "epoch": 1.75, "learning_rate": 2.6533649784786197e-05, "loss": 0.6763, "step": 22885 }, { "epoch": 1.75, "learning_rate": 2.6493422905185245e-05, "loss": 3.8113, "step": 22890 }, { "epoch": 1.75, "learning_rate": 2.6453196025584297e-05, "loss": 3.5855, "step": 22895 }, { "epoch": 1.75, "learning_rate": 2.6412969145983345e-05, "loss": 4.2107, "step": 22900 }, { "epoch": 1.75, "learning_rate": 2.63727422663824e-05, "loss": 3.6761, "step": 22905 }, { "epoch": 1.75, "learning_rate": 2.6332515386781448e-05, "loss": 2.6449, "step": 22910 }, { "epoch": 1.75, "learning_rate": 2.6292288507180502e-05, "loss": 1.46, "step": 22915 }, { "epoch": 1.75, "learning_rate": 2.625206162757955e-05, "loss": 2.5006, "step": 22920 }, { "epoch": 1.75, "learning_rate": 2.6211834747978602e-05, "loss": 1.2944, "step": 22925 }, { "epoch": 1.75, "learning_rate": 2.617160786837765e-05, "loss": 3.3048, "step": 22930 }, { "epoch": 1.75, "learning_rate": 2.6131380988776698e-05, "loss": 2.9723, "step": 22935 }, { "epoch": 1.75, "learning_rate": 2.6091154109175753e-05, "loss": 4.3123, "step": 22940 }, { "epoch": 1.75, "learning_rate": 2.60509272295748e-05, "loss": 5.226, "step": 22945 }, { "epoch": 1.75, "learning_rate": 2.6010700349973856e-05, "loss": 3.9012, "step": 22950 }, { "epoch": 1.75, "learning_rate": 2.5970473470372904e-05, "loss": 3.7363, "step": 22955 }, { "epoch": 1.75, "learning_rate": 2.5930246590771956e-05, "loss": 3.6529, "step": 22960 }, { "epoch": 1.76, "learning_rate": 2.5890019711171004e-05, "loss": 2.3566, "step": 22965 }, { "epoch": 1.76, "learning_rate": 2.584979283157006e-05, "loss": 2.1602, "step": 22970 }, { "epoch": 1.76, "learning_rate": 2.5809565951969107e-05, "loss": 3.2619, "step": 22975 }, { "epoch": 1.76, "learning_rate": 2.5769339072368158e-05, "loss": 2.565, "step": 22980 }, { "epoch": 1.76, "learning_rate": 2.5729112192767206e-05, "loss": 3.3445, "step": 22985 }, { "epoch": 1.76, "learning_rate": 2.568888531316626e-05, "loss": 4.1391, "step": 22990 }, { "epoch": 1.76, "learning_rate": 2.564865843356531e-05, "loss": 3.7564, "step": 22995 }, { "epoch": 1.76, "learning_rate": 2.5608431553964364e-05, "loss": 3.3184, "step": 23000 }, { "epoch": 1.76, "learning_rate": 2.5568204674363412e-05, "loss": 3.8339, "step": 23005 }, { "epoch": 1.76, "learning_rate": 2.552797779476246e-05, "loss": 2.4366, "step": 23010 }, { "epoch": 1.76, "learning_rate": 2.548775091516151e-05, "loss": 2.4264, "step": 23015 }, { "epoch": 1.76, "learning_rate": 2.544752403556056e-05, "loss": 1.9581, "step": 23020 }, { "epoch": 1.76, "learning_rate": 2.5407297155959614e-05, "loss": 1.901, "step": 23025 }, { "epoch": 1.76, "learning_rate": 2.5367070276358662e-05, "loss": 0.0299, "step": 23030 }, { "epoch": 1.76, "learning_rate": 2.5326843396757717e-05, "loss": 1.0677, "step": 23035 }, { "epoch": 1.76, "learning_rate": 2.5286616517156762e-05, "loss": 4.8, "step": 23040 }, { "epoch": 1.76, "learning_rate": 2.5246389637555817e-05, "loss": 3.8922, "step": 23045 }, { "epoch": 1.76, "learning_rate": 2.5206162757954865e-05, "loss": 3.9426, "step": 23050 }, { "epoch": 1.76, "learning_rate": 2.516593587835392e-05, "loss": 4.1615, "step": 23055 }, { "epoch": 1.76, "learning_rate": 2.5125708998752968e-05, "loss": 4.2786, "step": 23060 }, { "epoch": 1.76, "learning_rate": 2.508548211915202e-05, "loss": 3.0869, "step": 23065 }, { "epoch": 1.76, "learning_rate": 2.5045255239551067e-05, "loss": 2.5565, "step": 23070 }, { "epoch": 1.76, "learning_rate": 2.5005028359950122e-05, "loss": 2.0955, "step": 23075 }, { "epoch": 1.76, "learning_rate": 2.496480148034917e-05, "loss": 1.9791, "step": 23080 }, { "epoch": 1.76, "learning_rate": 2.4924574600748222e-05, "loss": 1.7667, "step": 23085 }, { "epoch": 1.76, "learning_rate": 2.4884347721147273e-05, "loss": 3.9285, "step": 23090 }, { "epoch": 1.77, "learning_rate": 2.484412084154632e-05, "loss": 4.3834, "step": 23095 }, { "epoch": 1.77, "learning_rate": 2.4803893961945373e-05, "loss": 4.3969, "step": 23100 }, { "epoch": 1.77, "learning_rate": 2.4763667082344424e-05, "loss": 4.2646, "step": 23105 }, { "epoch": 1.77, "learning_rate": 2.4723440202743476e-05, "loss": 3.1369, "step": 23110 }, { "epoch": 1.77, "learning_rate": 2.4683213323142527e-05, "loss": 3.584, "step": 23115 }, { "epoch": 1.77, "learning_rate": 2.4642986443541575e-05, "loss": 2.7839, "step": 23120 }, { "epoch": 1.77, "learning_rate": 2.4602759563940623e-05, "loss": 1.9654, "step": 23125 }, { "epoch": 1.77, "learning_rate": 2.4562532684339675e-05, "loss": 2.4053, "step": 23130 }, { "epoch": 1.77, "learning_rate": 2.4522305804738726e-05, "loss": 2.3811, "step": 23135 }, { "epoch": 1.77, "learning_rate": 2.4482078925137778e-05, "loss": 4.3805, "step": 23140 }, { "epoch": 1.77, "learning_rate": 2.444185204553683e-05, "loss": 3.634, "step": 23145 }, { "epoch": 1.77, "learning_rate": 2.440162516593588e-05, "loss": 4.3363, "step": 23150 }, { "epoch": 1.77, "learning_rate": 2.436139828633493e-05, "loss": 4.2129, "step": 23155 }, { "epoch": 1.77, "learning_rate": 2.432117140673398e-05, "loss": 3.1537, "step": 23160 }, { "epoch": 1.77, "learning_rate": 2.4280944527133032e-05, "loss": 3.78, "step": 23165 }, { "epoch": 1.77, "learning_rate": 2.4240717647532083e-05, "loss": 3.6497, "step": 23170 }, { "epoch": 1.77, "learning_rate": 2.4200490767931135e-05, "loss": 4.7099, "step": 23175 }, { "epoch": 1.77, "learning_rate": 2.4160263888330183e-05, "loss": 1.8507, "step": 23180 }, { "epoch": 1.77, "learning_rate": 2.4120037008729234e-05, "loss": 2.9047, "step": 23185 }, { "epoch": 1.77, "learning_rate": 2.4079810129128286e-05, "loss": 4.0793, "step": 23190 }, { "epoch": 1.77, "learning_rate": 2.4039583249527337e-05, "loss": 4.4531, "step": 23195 }, { "epoch": 1.77, "learning_rate": 2.3999356369926385e-05, "loss": 5.0652, "step": 23200 }, { "epoch": 1.77, "learning_rate": 2.3959129490325437e-05, "loss": 3.3764, "step": 23205 }, { "epoch": 1.77, "learning_rate": 2.3918902610724485e-05, "loss": 3.6626, "step": 23210 }, { "epoch": 1.77, "learning_rate": 2.3878675731123536e-05, "loss": 3.4744, "step": 23215 }, { "epoch": 1.77, "learning_rate": 2.3838448851522588e-05, "loss": 2.2833, "step": 23220 }, { "epoch": 1.78, "learning_rate": 2.379822197192164e-05, "loss": 0.5065, "step": 23225 }, { "epoch": 1.78, "learning_rate": 2.375799509232069e-05, "loss": 2.4229, "step": 23230 }, { "epoch": 1.78, "learning_rate": 2.371776821271974e-05, "loss": 0.7311, "step": 23235 }, { "epoch": 1.78, "learning_rate": 2.367754133311879e-05, "loss": 3.8694, "step": 23240 }, { "epoch": 1.78, "learning_rate": 2.3637314453517842e-05, "loss": 3.3383, "step": 23245 }, { "epoch": 1.78, "learning_rate": 2.3597087573916893e-05, "loss": 3.9184, "step": 23250 }, { "epoch": 1.78, "learning_rate": 2.3556860694315945e-05, "loss": 3.0708, "step": 23255 }, { "epoch": 1.78, "learning_rate": 2.3516633814714996e-05, "loss": 2.9025, "step": 23260 }, { "epoch": 1.78, "learning_rate": 2.3476406935114044e-05, "loss": 2.6064, "step": 23265 }, { "epoch": 1.78, "learning_rate": 2.3436180055513096e-05, "loss": 1.7971, "step": 23270 }, { "epoch": 1.78, "learning_rate": 2.3395953175912147e-05, "loss": 2.8858, "step": 23275 }, { "epoch": 1.78, "learning_rate": 2.3355726296311195e-05, "loss": 3.6838, "step": 23280 }, { "epoch": 1.78, "learning_rate": 2.3315499416710247e-05, "loss": 2.2048, "step": 23285 }, { "epoch": 1.78, "learning_rate": 2.3275272537109298e-05, "loss": 4.4943, "step": 23290 }, { "epoch": 1.78, "learning_rate": 2.3235045657508346e-05, "loss": 4.283, "step": 23295 }, { "epoch": 1.78, "learning_rate": 2.3194818777907398e-05, "loss": 4.518, "step": 23300 }, { "epoch": 1.78, "learning_rate": 2.315459189830645e-05, "loss": 3.516, "step": 23305 }, { "epoch": 1.78, "learning_rate": 2.31143650187055e-05, "loss": 4.2672, "step": 23310 }, { "epoch": 1.78, "learning_rate": 2.3074138139104552e-05, "loss": 3.4592, "step": 23315 }, { "epoch": 1.78, "learning_rate": 2.30339112595036e-05, "loss": 3.5898, "step": 23320 }, { "epoch": 1.78, "learning_rate": 2.299368437990265e-05, "loss": 2.7908, "step": 23325 }, { "epoch": 1.78, "learning_rate": 2.2953457500301703e-05, "loss": 2.1385, "step": 23330 }, { "epoch": 1.78, "learning_rate": 2.2913230620700755e-05, "loss": 1.7825, "step": 23335 }, { "epoch": 1.78, "learning_rate": 2.2873003741099806e-05, "loss": 4.9459, "step": 23340 }, { "epoch": 1.78, "learning_rate": 2.2832776861498854e-05, "loss": 3.7873, "step": 23345 }, { "epoch": 1.78, "learning_rate": 2.2792549981897906e-05, "loss": 3.7168, "step": 23350 }, { "epoch": 1.79, "learning_rate": 2.2752323102296954e-05, "loss": 3.9023, "step": 23355 }, { "epoch": 1.79, "learning_rate": 2.2712096222696005e-05, "loss": 1.3564, "step": 23360 }, { "epoch": 1.79, "learning_rate": 2.2671869343095057e-05, "loss": 3.3062, "step": 23365 }, { "epoch": 1.79, "learning_rate": 2.2631642463494108e-05, "loss": 4.416, "step": 23370 }, { "epoch": 1.79, "learning_rate": 2.259141558389316e-05, "loss": 3.2855, "step": 23375 }, { "epoch": 1.79, "learning_rate": 2.2551188704292208e-05, "loss": 3.3284, "step": 23380 }, { "epoch": 1.79, "learning_rate": 2.251096182469126e-05, "loss": 2.627, "step": 23385 }, { "epoch": 1.79, "learning_rate": 2.247073494509031e-05, "loss": 4.008, "step": 23390 }, { "epoch": 1.79, "learning_rate": 2.2430508065489362e-05, "loss": 4.5375, "step": 23395 }, { "epoch": 1.79, "learning_rate": 2.2390281185888413e-05, "loss": 4.3504, "step": 23400 }, { "epoch": 1.79, "learning_rate": 2.235005430628746e-05, "loss": 2.7492, "step": 23405 }, { "epoch": 1.79, "learning_rate": 2.2309827426686513e-05, "loss": 2.9355, "step": 23410 }, { "epoch": 1.79, "learning_rate": 2.2269600547085564e-05, "loss": 2.2093, "step": 23415 }, { "epoch": 1.79, "learning_rate": 2.2229373667484616e-05, "loss": 2.8348, "step": 23420 }, { "epoch": 1.79, "learning_rate": 2.2189146787883667e-05, "loss": 2.7705, "step": 23425 }, { "epoch": 1.79, "learning_rate": 2.2148919908282715e-05, "loss": 1.4777, "step": 23430 }, { "epoch": 1.79, "learning_rate": 2.2108693028681764e-05, "loss": 2.0384, "step": 23435 }, { "epoch": 1.79, "learning_rate": 2.2068466149080815e-05, "loss": 3.5889, "step": 23440 }, { "epoch": 1.79, "learning_rate": 2.2028239269479867e-05, "loss": 4.2697, "step": 23445 }, { "epoch": 1.79, "learning_rate": 2.1988012389878918e-05, "loss": 3.7738, "step": 23450 }, { "epoch": 1.79, "learning_rate": 2.194778551027797e-05, "loss": 3.7055, "step": 23455 }, { "epoch": 1.79, "learning_rate": 2.1907558630677018e-05, "loss": 3.8967, "step": 23460 }, { "epoch": 1.79, "learning_rate": 2.186733175107607e-05, "loss": 4.0676, "step": 23465 }, { "epoch": 1.79, "learning_rate": 2.182710487147512e-05, "loss": 3.5904, "step": 23470 }, { "epoch": 1.79, "learning_rate": 2.1786877991874172e-05, "loss": 1.4659, "step": 23475 }, { "epoch": 1.79, "learning_rate": 2.1746651112273223e-05, "loss": 4.3659, "step": 23480 }, { "epoch": 1.79, "learning_rate": 2.1706424232672275e-05, "loss": 2.4338, "step": 23485 }, { "epoch": 1.8, "learning_rate": 2.1666197353071323e-05, "loss": 3.2225, "step": 23490 }, { "epoch": 1.8, "learning_rate": 2.1625970473470374e-05, "loss": 3.9035, "step": 23495 }, { "epoch": 1.8, "learning_rate": 2.1585743593869426e-05, "loss": 4.0298, "step": 23500 }, { "epoch": 1.8, "learning_rate": 2.1545516714268474e-05, "loss": 4.2254, "step": 23505 }, { "epoch": 1.8, "learning_rate": 2.1505289834667525e-05, "loss": 2.9377, "step": 23510 }, { "epoch": 1.8, "learning_rate": 2.1465062955066577e-05, "loss": 2.3562, "step": 23515 }, { "epoch": 1.8, "learning_rate": 2.1424836075465625e-05, "loss": 1.3816, "step": 23520 }, { "epoch": 1.8, "learning_rate": 2.1384609195864676e-05, "loss": 2.5952, "step": 23525 }, { "epoch": 1.8, "learning_rate": 2.1344382316263728e-05, "loss": 1.156, "step": 23530 }, { "epoch": 1.8, "learning_rate": 2.130415543666278e-05, "loss": 2.085, "step": 23535 }, { "epoch": 1.8, "learning_rate": 2.126392855706183e-05, "loss": 4.2512, "step": 23540 }, { "epoch": 1.8, "learning_rate": 2.122370167746088e-05, "loss": 3.7354, "step": 23545 }, { "epoch": 1.8, "learning_rate": 2.118347479785993e-05, "loss": 3.8537, "step": 23550 }, { "epoch": 1.8, "learning_rate": 2.1143247918258982e-05, "loss": 3.9422, "step": 23555 }, { "epoch": 1.8, "learning_rate": 2.1103021038658033e-05, "loss": 2.9316, "step": 23560 }, { "epoch": 1.8, "learning_rate": 2.1062794159057085e-05, "loss": 3.9269, "step": 23565 }, { "epoch": 1.8, "learning_rate": 2.1022567279456136e-05, "loss": 1.4704, "step": 23570 }, { "epoch": 1.8, "learning_rate": 2.0982340399855184e-05, "loss": 2.5099, "step": 23575 }, { "epoch": 1.8, "learning_rate": 2.0942113520254236e-05, "loss": 2.5829, "step": 23580 }, { "epoch": 1.8, "learning_rate": 2.0901886640653284e-05, "loss": 4.3772, "step": 23585 }, { "epoch": 1.8, "learning_rate": 2.0861659761052335e-05, "loss": 3.4992, "step": 23590 }, { "epoch": 1.8, "learning_rate": 2.0821432881451387e-05, "loss": 4.1639, "step": 23595 }, { "epoch": 1.8, "learning_rate": 2.0781206001850438e-05, "loss": 3.1853, "step": 23600 }, { "epoch": 1.8, "learning_rate": 2.0740979122249486e-05, "loss": 3.6627, "step": 23605 }, { "epoch": 1.8, "learning_rate": 2.0700752242648538e-05, "loss": 3.7691, "step": 23610 }, { "epoch": 1.8, "learning_rate": 2.066052536304759e-05, "loss": 2.473, "step": 23615 }, { "epoch": 1.81, "learning_rate": 2.062029848344664e-05, "loss": 2.8132, "step": 23620 }, { "epoch": 1.81, "learning_rate": 2.0580071603845692e-05, "loss": 1.6381, "step": 23625 }, { "epoch": 1.81, "learning_rate": 2.053984472424474e-05, "loss": 1.6351, "step": 23630 }, { "epoch": 1.81, "learning_rate": 2.0499617844643792e-05, "loss": 2.8561, "step": 23635 }, { "epoch": 1.81, "learning_rate": 2.0459390965042843e-05, "loss": 3.5916, "step": 23640 }, { "epoch": 1.81, "learning_rate": 2.0419164085441895e-05, "loss": 3.0802, "step": 23645 }, { "epoch": 1.81, "learning_rate": 2.0378937205840946e-05, "loss": 4.2158, "step": 23650 }, { "epoch": 1.81, "learning_rate": 2.0338710326239994e-05, "loss": 1.5458, "step": 23655 }, { "epoch": 1.81, "learning_rate": 2.0298483446639042e-05, "loss": 3.1277, "step": 23660 }, { "epoch": 1.81, "learning_rate": 2.0258256567038094e-05, "loss": 3.5196, "step": 23665 }, { "epoch": 1.81, "learning_rate": 2.0218029687437145e-05, "loss": 3.1044, "step": 23670 }, { "epoch": 1.81, "learning_rate": 2.0177802807836197e-05, "loss": 4.1605, "step": 23675 }, { "epoch": 1.81, "learning_rate": 2.0137575928235248e-05, "loss": 1.0403, "step": 23680 }, { "epoch": 1.81, "learning_rate": 2.00973490486343e-05, "loss": 3.4376, "step": 23685 }, { "epoch": 1.81, "learning_rate": 2.0057122169033348e-05, "loss": 3.7799, "step": 23690 }, { "epoch": 1.81, "learning_rate": 2.00168952894324e-05, "loss": 5.3645, "step": 23695 }, { "epoch": 1.81, "learning_rate": 1.997666840983145e-05, "loss": 4.4109, "step": 23700 }, { "epoch": 1.81, "learning_rate": 1.9936441530230502e-05, "loss": 4.6688, "step": 23705 }, { "epoch": 1.81, "learning_rate": 1.9896214650629554e-05, "loss": 3.8071, "step": 23710 }, { "epoch": 1.81, "learning_rate": 1.98559877710286e-05, "loss": 3.7666, "step": 23715 }, { "epoch": 1.81, "learning_rate": 1.9815760891427653e-05, "loss": 2.0954, "step": 23720 }, { "epoch": 1.81, "learning_rate": 1.9775534011826705e-05, "loss": 2.5181, "step": 23725 }, { "epoch": 1.81, "learning_rate": 1.9735307132225756e-05, "loss": 3.2477, "step": 23730 }, { "epoch": 1.81, "learning_rate": 1.9695080252624804e-05, "loss": 1.3823, "step": 23735 }, { "epoch": 1.81, "learning_rate": 1.9654853373023856e-05, "loss": 4.2088, "step": 23740 }, { "epoch": 1.81, "learning_rate": 1.9614626493422904e-05, "loss": 4.3371, "step": 23745 }, { "epoch": 1.82, "learning_rate": 1.9574399613821955e-05, "loss": 4.924, "step": 23750 }, { "epoch": 1.82, "learning_rate": 1.9534172734221007e-05, "loss": 2.5295, "step": 23755 }, { "epoch": 1.82, "learning_rate": 1.9493945854620058e-05, "loss": 2.1711, "step": 23760 }, { "epoch": 1.82, "learning_rate": 1.945371897501911e-05, "loss": 1.7804, "step": 23765 }, { "epoch": 1.82, "learning_rate": 1.9413492095418158e-05, "loss": 2.7061, "step": 23770 }, { "epoch": 1.82, "learning_rate": 1.937326521581721e-05, "loss": 1.8079, "step": 23775 }, { "epoch": 1.82, "learning_rate": 1.933303833621626e-05, "loss": 2.6042, "step": 23780 }, { "epoch": 1.82, "learning_rate": 1.9292811456615312e-05, "loss": 1.4644, "step": 23785 }, { "epoch": 1.82, "learning_rate": 1.9252584577014364e-05, "loss": 3.4973, "step": 23790 }, { "epoch": 1.82, "learning_rate": 1.9212357697413415e-05, "loss": 5.3607, "step": 23795 }, { "epoch": 1.82, "learning_rate": 1.9172130817812463e-05, "loss": 2.8048, "step": 23800 }, { "epoch": 1.82, "learning_rate": 1.9131903938211515e-05, "loss": 1.9658, "step": 23805 }, { "epoch": 1.82, "learning_rate": 1.9091677058610566e-05, "loss": 3.1126, "step": 23810 }, { "epoch": 1.82, "learning_rate": 1.9051450179009614e-05, "loss": 2.9254, "step": 23815 }, { "epoch": 1.82, "learning_rate": 1.9011223299408666e-05, "loss": 0.5672, "step": 23820 }, { "epoch": 1.82, "learning_rate": 1.8970996419807717e-05, "loss": 2.4323, "step": 23825 }, { "epoch": 1.82, "learning_rate": 1.8930769540206765e-05, "loss": 1.714, "step": 23830 }, { "epoch": 1.82, "learning_rate": 1.8890542660605817e-05, "loss": 1.6771, "step": 23835 }, { "epoch": 1.82, "learning_rate": 1.8850315781004868e-05, "loss": 5.8141, "step": 23840 }, { "epoch": 1.82, "learning_rate": 1.881008890140392e-05, "loss": 3.5088, "step": 23845 }, { "epoch": 1.82, "learning_rate": 1.876986202180297e-05, "loss": 3.4639, "step": 23850 }, { "epoch": 1.82, "learning_rate": 1.872963514220202e-05, "loss": 4.4504, "step": 23855 }, { "epoch": 1.82, "learning_rate": 1.868940826260107e-05, "loss": 3.6905, "step": 23860 }, { "epoch": 1.82, "learning_rate": 1.8649181383000122e-05, "loss": 3.9271, "step": 23865 }, { "epoch": 1.82, "learning_rate": 1.8608954503399173e-05, "loss": 2.276, "step": 23870 }, { "epoch": 1.82, "learning_rate": 1.8568727623798225e-05, "loss": 1.4453, "step": 23875 }, { "epoch": 1.83, "learning_rate": 1.8528500744197273e-05, "loss": 1.511, "step": 23880 }, { "epoch": 1.83, "learning_rate": 1.8488273864596324e-05, "loss": 1.0376, "step": 23885 }, { "epoch": 1.83, "learning_rate": 1.8448046984995373e-05, "loss": 4.748, "step": 23890 }, { "epoch": 1.83, "learning_rate": 1.8407820105394424e-05, "loss": 3.6019, "step": 23895 }, { "epoch": 1.83, "learning_rate": 1.8367593225793475e-05, "loss": 4.3068, "step": 23900 }, { "epoch": 1.83, "learning_rate": 1.8327366346192527e-05, "loss": 3.8338, "step": 23905 }, { "epoch": 1.83, "learning_rate": 1.828713946659158e-05, "loss": 1.3604, "step": 23910 }, { "epoch": 1.83, "learning_rate": 1.8246912586990626e-05, "loss": 2.0121, "step": 23915 }, { "epoch": 1.83, "learning_rate": 1.8206685707389678e-05, "loss": 3.177, "step": 23920 }, { "epoch": 1.83, "learning_rate": 1.816645882778873e-05, "loss": 2.4406, "step": 23925 }, { "epoch": 1.83, "learning_rate": 1.812623194818778e-05, "loss": 2.1305, "step": 23930 }, { "epoch": 1.83, "learning_rate": 1.8086005068586832e-05, "loss": 0.9836, "step": 23935 }, { "epoch": 1.83, "learning_rate": 1.804577818898588e-05, "loss": 4.1375, "step": 23940 }, { "epoch": 1.83, "learning_rate": 1.8005551309384932e-05, "loss": 3.9156, "step": 23945 }, { "epoch": 1.83, "learning_rate": 1.7965324429783983e-05, "loss": 3.7585, "step": 23950 }, { "epoch": 1.83, "learning_rate": 1.7925097550183035e-05, "loss": 3.27, "step": 23955 }, { "epoch": 1.83, "learning_rate": 1.7884870670582086e-05, "loss": 4.2422, "step": 23960 }, { "epoch": 1.83, "learning_rate": 1.7844643790981134e-05, "loss": 3.0293, "step": 23965 }, { "epoch": 1.83, "learning_rate": 1.7804416911380182e-05, "loss": 3.3175, "step": 23970 }, { "epoch": 1.83, "learning_rate": 1.7764190031779234e-05, "loss": 2.4429, "step": 23975 }, { "epoch": 1.83, "learning_rate": 1.7723963152178285e-05, "loss": 1.251, "step": 23980 }, { "epoch": 1.83, "learning_rate": 1.7683736272577337e-05, "loss": 2.908, "step": 23985 }, { "epoch": 1.83, "learning_rate": 1.764350939297639e-05, "loss": 4.0328, "step": 23990 }, { "epoch": 1.83, "learning_rate": 1.7603282513375436e-05, "loss": 3.9133, "step": 23995 }, { "epoch": 1.83, "learning_rate": 1.7563055633774488e-05, "loss": 4.8342, "step": 24000 }, { "epoch": 1.83, "learning_rate": 1.752282875417354e-05, "loss": 3.8279, "step": 24005 }, { "epoch": 1.84, "learning_rate": 1.748260187457259e-05, "loss": 1.9251, "step": 24010 }, { "epoch": 1.84, "learning_rate": 1.7442374994971642e-05, "loss": 2.9262, "step": 24015 }, { "epoch": 1.84, "learning_rate": 1.7402148115370694e-05, "loss": 2.1363, "step": 24020 }, { "epoch": 1.84, "learning_rate": 1.7361921235769742e-05, "loss": 1.7946, "step": 24025 }, { "epoch": 1.84, "learning_rate": 1.7321694356168793e-05, "loss": 1.5292, "step": 24030 }, { "epoch": 1.84, "learning_rate": 1.7281467476567845e-05, "loss": 4.4959, "step": 24035 }, { "epoch": 1.84, "learning_rate": 1.7241240596966893e-05, "loss": 4.6133, "step": 24040 }, { "epoch": 1.84, "learning_rate": 1.7201013717365944e-05, "loss": 4.9404, "step": 24045 }, { "epoch": 1.84, "learning_rate": 1.7160786837764996e-05, "loss": 3.764, "step": 24050 }, { "epoch": 1.84, "learning_rate": 1.7120559958164044e-05, "loss": 4.8266, "step": 24055 }, { "epoch": 1.84, "learning_rate": 1.7080333078563095e-05, "loss": 3.7552, "step": 24060 }, { "epoch": 1.84, "learning_rate": 1.7040106198962147e-05, "loss": 2.8064, "step": 24065 }, { "epoch": 1.84, "learning_rate": 1.6999879319361198e-05, "loss": 3.0139, "step": 24070 }, { "epoch": 1.84, "learning_rate": 1.695965243976025e-05, "loss": 2.4623, "step": 24075 }, { "epoch": 1.84, "learning_rate": 1.6919425560159298e-05, "loss": 3.624, "step": 24080 }, { "epoch": 1.84, "learning_rate": 1.687919868055835e-05, "loss": 1.5064, "step": 24085 }, { "epoch": 1.84, "learning_rate": 1.68389718009574e-05, "loss": 4.409, "step": 24090 }, { "epoch": 1.84, "learning_rate": 1.6798744921356452e-05, "loss": 5.5877, "step": 24095 }, { "epoch": 1.84, "learning_rate": 1.6758518041755504e-05, "loss": 3.7055, "step": 24100 }, { "epoch": 1.84, "learning_rate": 1.6718291162154555e-05, "loss": 2.6408, "step": 24105 }, { "epoch": 1.84, "learning_rate": 1.6678064282553603e-05, "loss": 4.0404, "step": 24110 }, { "epoch": 1.84, "learning_rate": 1.6637837402952655e-05, "loss": 2.8409, "step": 24115 }, { "epoch": 1.84, "learning_rate": 1.6597610523351703e-05, "loss": 2.9476, "step": 24120 }, { "epoch": 1.84, "learning_rate": 1.6557383643750754e-05, "loss": 0.8873, "step": 24125 }, { "epoch": 1.84, "learning_rate": 1.6517156764149806e-05, "loss": 2.8307, "step": 24130 }, { "epoch": 1.84, "learning_rate": 1.6476929884548857e-05, "loss": 2.2032, "step": 24135 }, { "epoch": 1.85, "learning_rate": 1.6436703004947905e-05, "loss": 4.2484, "step": 24140 }, { "epoch": 1.85, "learning_rate": 1.6396476125346957e-05, "loss": 3.6621, "step": 24145 }, { "epoch": 1.85, "learning_rate": 1.6356249245746008e-05, "loss": 4.5232, "step": 24150 }, { "epoch": 1.85, "learning_rate": 1.631602236614506e-05, "loss": 3.4658, "step": 24155 }, { "epoch": 1.85, "learning_rate": 1.627579548654411e-05, "loss": 3.6425, "step": 24160 }, { "epoch": 1.85, "learning_rate": 1.623556860694316e-05, "loss": 2.3619, "step": 24165 }, { "epoch": 1.85, "learning_rate": 1.619534172734221e-05, "loss": 2.8156, "step": 24170 }, { "epoch": 1.85, "learning_rate": 1.6155114847741262e-05, "loss": 0.9569, "step": 24175 }, { "epoch": 1.85, "learning_rate": 1.6114887968140314e-05, "loss": 1.9015, "step": 24180 }, { "epoch": 1.85, "learning_rate": 1.6074661088539365e-05, "loss": 1.9707, "step": 24185 }, { "epoch": 1.85, "learning_rate": 1.6034434208938413e-05, "loss": 4.2664, "step": 24190 }, { "epoch": 1.85, "learning_rate": 1.599420732933746e-05, "loss": 4.7297, "step": 24195 }, { "epoch": 1.85, "learning_rate": 1.5953980449736513e-05, "loss": 3.9335, "step": 24200 }, { "epoch": 1.85, "learning_rate": 1.5913753570135564e-05, "loss": 3.4213, "step": 24205 }, { "epoch": 1.85, "learning_rate": 1.5873526690534616e-05, "loss": 3.0822, "step": 24210 }, { "epoch": 1.85, "learning_rate": 1.5833299810933667e-05, "loss": 2.5482, "step": 24215 }, { "epoch": 1.85, "learning_rate": 1.579307293133272e-05, "loss": 3.5275, "step": 24220 }, { "epoch": 1.85, "learning_rate": 1.5752846051731767e-05, "loss": 2.0448, "step": 24225 }, { "epoch": 1.85, "learning_rate": 1.5712619172130818e-05, "loss": 1.7843, "step": 24230 }, { "epoch": 1.85, "learning_rate": 1.567239229252987e-05, "loss": 1.5835, "step": 24235 }, { "epoch": 1.85, "learning_rate": 1.563216541292892e-05, "loss": 4.832, "step": 24240 }, { "epoch": 1.85, "learning_rate": 1.5591938533327973e-05, "loss": 3.8062, "step": 24245 }, { "epoch": 1.85, "learning_rate": 1.555171165372702e-05, "loss": 3.6148, "step": 24250 }, { "epoch": 1.85, "learning_rate": 1.5511484774126072e-05, "loss": 4.2596, "step": 24255 }, { "epoch": 1.85, "learning_rate": 1.5471257894525124e-05, "loss": 3.2226, "step": 24260 }, { "epoch": 1.85, "learning_rate": 1.5431031014924175e-05, "loss": 2.3329, "step": 24265 }, { "epoch": 1.85, "learning_rate": 1.5390804135323223e-05, "loss": 3.3771, "step": 24270 }, { "epoch": 1.86, "learning_rate": 1.5350577255722275e-05, "loss": 2.6768, "step": 24275 }, { "epoch": 1.86, "learning_rate": 1.5310350376121323e-05, "loss": 3.1228, "step": 24280 }, { "epoch": 1.86, "learning_rate": 1.5270123496520374e-05, "loss": 1.9548, "step": 24285 }, { "epoch": 1.86, "learning_rate": 1.5229896616919426e-05, "loss": 4.4299, "step": 24290 }, { "epoch": 1.86, "learning_rate": 1.5189669737318477e-05, "loss": 4.6846, "step": 24295 }, { "epoch": 1.86, "learning_rate": 1.5149442857717527e-05, "loss": 4.5248, "step": 24300 }, { "epoch": 1.86, "learning_rate": 1.5109215978116578e-05, "loss": 3.1845, "step": 24305 }, { "epoch": 1.86, "learning_rate": 1.506898909851563e-05, "loss": 2.2644, "step": 24310 }, { "epoch": 1.86, "learning_rate": 1.502876221891468e-05, "loss": 1.6771, "step": 24315 }, { "epoch": 1.86, "learning_rate": 1.4988535339313731e-05, "loss": 2.1373, "step": 24320 }, { "epoch": 1.86, "learning_rate": 1.494830845971278e-05, "loss": 1.9354, "step": 24325 }, { "epoch": 1.86, "learning_rate": 1.4908081580111832e-05, "loss": 1.3599, "step": 24330 }, { "epoch": 1.86, "learning_rate": 1.4867854700510884e-05, "loss": 1.6992, "step": 24335 }, { "epoch": 1.86, "learning_rate": 1.4827627820909933e-05, "loss": 4.1914, "step": 24340 }, { "epoch": 1.86, "learning_rate": 1.4787400941308982e-05, "loss": 5.009, "step": 24345 }, { "epoch": 1.86, "learning_rate": 1.4747174061708033e-05, "loss": 3.6693, "step": 24350 }, { "epoch": 1.86, "learning_rate": 1.4706947182107084e-05, "loss": 3.537, "step": 24355 }, { "epoch": 1.86, "learning_rate": 1.4666720302506134e-05, "loss": 2.5959, "step": 24360 }, { "epoch": 1.86, "learning_rate": 1.4626493422905186e-05, "loss": 2.8873, "step": 24365 }, { "epoch": 1.86, "learning_rate": 1.4586266543304235e-05, "loss": 2.0024, "step": 24370 }, { "epoch": 1.86, "learning_rate": 1.4546039663703287e-05, "loss": 3.3242, "step": 24375 }, { "epoch": 1.86, "learning_rate": 1.4505812784102338e-05, "loss": 1.5681, "step": 24380 }, { "epoch": 1.86, "learning_rate": 1.4465585904501388e-05, "loss": 4.3365, "step": 24385 }, { "epoch": 1.86, "learning_rate": 1.442535902490044e-05, "loss": 4.5049, "step": 24390 }, { "epoch": 1.86, "learning_rate": 1.4385132145299491e-05, "loss": 3.7834, "step": 24395 }, { "epoch": 1.86, "learning_rate": 1.4344905265698541e-05, "loss": 3.6518, "step": 24400 }, { "epoch": 1.87, "learning_rate": 1.4304678386097592e-05, "loss": 3.5904, "step": 24405 }, { "epoch": 1.87, "learning_rate": 1.4264451506496642e-05, "loss": 2.5824, "step": 24410 }, { "epoch": 1.87, "learning_rate": 1.4224224626895694e-05, "loss": 3.6668, "step": 24415 }, { "epoch": 1.87, "learning_rate": 1.4183997747294745e-05, "loss": 2.6437, "step": 24420 }, { "epoch": 1.87, "learning_rate": 1.4143770867693793e-05, "loss": 1.704, "step": 24425 }, { "epoch": 1.87, "learning_rate": 1.4103543988092843e-05, "loss": 2.013, "step": 24430 }, { "epoch": 1.87, "learning_rate": 1.4063317108491894e-05, "loss": 0.8377, "step": 24435 }, { "epoch": 1.87, "learning_rate": 1.4023090228890944e-05, "loss": 4.3678, "step": 24440 }, { "epoch": 1.87, "learning_rate": 1.3982863349289996e-05, "loss": 4.1889, "step": 24445 }, { "epoch": 1.87, "learning_rate": 1.3942636469689047e-05, "loss": 4.3562, "step": 24450 }, { "epoch": 1.87, "learning_rate": 1.3902409590088097e-05, "loss": 4.6929, "step": 24455 }, { "epoch": 1.87, "learning_rate": 1.3862182710487148e-05, "loss": 3.6932, "step": 24460 }, { "epoch": 1.87, "learning_rate": 1.38219558308862e-05, "loss": 2.9506, "step": 24465 }, { "epoch": 1.87, "learning_rate": 1.378172895128525e-05, "loss": 1.9533, "step": 24470 }, { "epoch": 1.87, "learning_rate": 1.3741502071684301e-05, "loss": 1.3351, "step": 24475 }, { "epoch": 1.87, "learning_rate": 1.370127519208335e-05, "loss": 2.0779, "step": 24480 }, { "epoch": 1.87, "learning_rate": 1.3661048312482402e-05, "loss": 1.5529, "step": 24485 }, { "epoch": 1.87, "learning_rate": 1.3620821432881454e-05, "loss": 4.1068, "step": 24490 }, { "epoch": 1.87, "learning_rate": 1.3580594553280504e-05, "loss": 4.8277, "step": 24495 }, { "epoch": 1.87, "learning_rate": 1.3540367673679552e-05, "loss": 3.857, "step": 24500 }, { "epoch": 1.87, "learning_rate": 1.3500140794078603e-05, "loss": 4.1428, "step": 24505 }, { "epoch": 1.87, "learning_rate": 1.3459913914477655e-05, "loss": 3.019, "step": 24510 }, { "epoch": 1.87, "learning_rate": 1.3419687034876704e-05, "loss": 2.5749, "step": 24515 }, { "epoch": 1.87, "learning_rate": 1.3379460155275756e-05, "loss": 3.4932, "step": 24520 }, { "epoch": 1.87, "learning_rate": 1.3339233275674806e-05, "loss": 2.0479, "step": 24525 }, { "epoch": 1.87, "learning_rate": 1.3299006396073857e-05, "loss": 3.4595, "step": 24530 }, { "epoch": 1.88, "learning_rate": 1.3258779516472908e-05, "loss": 2.5689, "step": 24535 }, { "epoch": 1.88, "learning_rate": 1.3218552636871958e-05, "loss": 3.9021, "step": 24540 }, { "epoch": 1.88, "learning_rate": 1.317832575727101e-05, "loss": 4.1131, "step": 24545 }, { "epoch": 1.88, "learning_rate": 1.313809887767006e-05, "loss": 3.3045, "step": 24550 }, { "epoch": 1.88, "learning_rate": 1.3097871998069111e-05, "loss": 3.8982, "step": 24555 }, { "epoch": 1.88, "learning_rate": 1.3057645118468162e-05, "loss": 2.1605, "step": 24560 }, { "epoch": 1.88, "learning_rate": 1.3017418238867212e-05, "loss": 3.1492, "step": 24565 }, { "epoch": 1.88, "learning_rate": 1.2977191359266264e-05, "loss": 1.427, "step": 24570 }, { "epoch": 1.88, "learning_rate": 1.2936964479665312e-05, "loss": 2.1544, "step": 24575 }, { "epoch": 1.88, "learning_rate": 1.2896737600064363e-05, "loss": 3.2604, "step": 24580 }, { "epoch": 1.88, "learning_rate": 1.2856510720463413e-05, "loss": 1.2599, "step": 24585 }, { "epoch": 1.88, "learning_rate": 1.2816283840862464e-05, "loss": 4.0691, "step": 24590 }, { "epoch": 1.88, "learning_rate": 1.2776056961261514e-05, "loss": 4.2889, "step": 24595 }, { "epoch": 1.88, "learning_rate": 1.2735830081660566e-05, "loss": 3.4133, "step": 24600 }, { "epoch": 1.88, "learning_rate": 1.2695603202059617e-05, "loss": 4.1035, "step": 24605 }, { "epoch": 1.88, "learning_rate": 1.2655376322458667e-05, "loss": 3.4185, "step": 24610 }, { "epoch": 1.88, "learning_rate": 1.2615149442857718e-05, "loss": 2.4912, "step": 24615 }, { "epoch": 1.88, "learning_rate": 1.257492256325677e-05, "loss": 2.678, "step": 24620 }, { "epoch": 1.88, "learning_rate": 1.253469568365582e-05, "loss": 1.9975, "step": 24625 }, { "epoch": 1.88, "learning_rate": 1.2494468804054871e-05, "loss": 0.6405, "step": 24630 }, { "epoch": 1.88, "learning_rate": 1.2454241924453921e-05, "loss": 1.8826, "step": 24635 }, { "epoch": 1.88, "learning_rate": 1.241401504485297e-05, "loss": 4.4268, "step": 24640 }, { "epoch": 1.88, "learning_rate": 1.2373788165252022e-05, "loss": 3.8896, "step": 24645 }, { "epoch": 1.88, "learning_rate": 1.2333561285651072e-05, "loss": 4.8965, "step": 24650 }, { "epoch": 1.88, "learning_rate": 1.2293334406050123e-05, "loss": 3.1094, "step": 24655 }, { "epoch": 1.88, "learning_rate": 1.2253107526449175e-05, "loss": 2.9067, "step": 24660 }, { "epoch": 1.89, "learning_rate": 1.2212880646848225e-05, "loss": 2.9209, "step": 24665 }, { "epoch": 1.89, "learning_rate": 1.2172653767247274e-05, "loss": 2.7946, "step": 24670 }, { "epoch": 1.89, "learning_rate": 1.2132426887646326e-05, "loss": 1.3476, "step": 24675 }, { "epoch": 1.89, "learning_rate": 1.2092200008045376e-05, "loss": 1.7138, "step": 24680 }, { "epoch": 1.89, "learning_rate": 1.2051973128444427e-05, "loss": 2.9379, "step": 24685 }, { "epoch": 1.89, "learning_rate": 1.2011746248843479e-05, "loss": 3.7029, "step": 24690 }, { "epoch": 1.89, "learning_rate": 1.1971519369242528e-05, "loss": 4.3479, "step": 24695 }, { "epoch": 1.89, "learning_rate": 1.193129248964158e-05, "loss": 3.7926, "step": 24700 }, { "epoch": 1.89, "learning_rate": 1.189106561004063e-05, "loss": 3.3841, "step": 24705 }, { "epoch": 1.89, "learning_rate": 1.185083873043968e-05, "loss": 4.1531, "step": 24710 }, { "epoch": 1.89, "learning_rate": 1.181061185083873e-05, "loss": 2.6995, "step": 24715 }, { "epoch": 1.89, "learning_rate": 1.1770384971237782e-05, "loss": 1.0784, "step": 24720 }, { "epoch": 1.89, "learning_rate": 1.1730158091636832e-05, "loss": 2.5422, "step": 24725 }, { "epoch": 1.89, "learning_rate": 1.1689931212035884e-05, "loss": 2.6564, "step": 24730 }, { "epoch": 1.89, "learning_rate": 1.1649704332434933e-05, "loss": 3.302, "step": 24735 }, { "epoch": 1.89, "learning_rate": 1.1609477452833985e-05, "loss": 3.8146, "step": 24740 }, { "epoch": 1.89, "learning_rate": 1.1569250573233035e-05, "loss": 3.9229, "step": 24745 }, { "epoch": 1.89, "learning_rate": 1.1529023693632084e-05, "loss": 4.4492, "step": 24750 }, { "epoch": 1.89, "learning_rate": 1.1488796814031136e-05, "loss": 3.6742, "step": 24755 }, { "epoch": 1.89, "learning_rate": 1.1448569934430187e-05, "loss": 2.4106, "step": 24760 }, { "epoch": 1.89, "learning_rate": 1.1408343054829237e-05, "loss": 2.7309, "step": 24765 }, { "epoch": 1.89, "learning_rate": 1.1368116175228288e-05, "loss": 1.6355, "step": 24770 }, { "epoch": 1.89, "learning_rate": 1.132788929562734e-05, "loss": 2.7919, "step": 24775 }, { "epoch": 1.89, "learning_rate": 1.128766241602639e-05, "loss": 4.5257, "step": 24780 }, { "epoch": 1.89, "learning_rate": 1.124743553642544e-05, "loss": 3.9398, "step": 24785 }, { "epoch": 1.89, "learning_rate": 1.1207208656824491e-05, "loss": 4.6109, "step": 24790 }, { "epoch": 1.9, "learning_rate": 1.116698177722354e-05, "loss": 3.3293, "step": 24795 }, { "epoch": 1.9, "learning_rate": 1.1126754897622592e-05, "loss": 3.921, "step": 24800 }, { "epoch": 1.9, "learning_rate": 1.1086528018021642e-05, "loss": 4.2807, "step": 24805 }, { "epoch": 1.9, "learning_rate": 1.1046301138420693e-05, "loss": 3.1406, "step": 24810 }, { "epoch": 1.9, "learning_rate": 1.1006074258819745e-05, "loss": 3.4969, "step": 24815 }, { "epoch": 1.9, "learning_rate": 1.0965847379218795e-05, "loss": 2.3031, "step": 24820 }, { "epoch": 1.9, "learning_rate": 1.0925620499617844e-05, "loss": 3.3691, "step": 24825 }, { "epoch": 1.9, "learning_rate": 1.0885393620016896e-05, "loss": 2.8064, "step": 24830 }, { "epoch": 1.9, "learning_rate": 1.0845166740415946e-05, "loss": 1.1896, "step": 24835 }, { "epoch": 1.9, "learning_rate": 1.0804939860814997e-05, "loss": 4.5449, "step": 24840 }, { "epoch": 1.9, "learning_rate": 1.0764712981214049e-05, "loss": 3.3299, "step": 24845 }, { "epoch": 1.9, "learning_rate": 1.0724486101613098e-05, "loss": 4.4686, "step": 24850 }, { "epoch": 1.9, "learning_rate": 1.068425922201215e-05, "loss": 3.9811, "step": 24855 }, { "epoch": 1.9, "learning_rate": 1.06440323424112e-05, "loss": 3.383, "step": 24860 }, { "epoch": 1.9, "learning_rate": 1.060380546281025e-05, "loss": 2.5898, "step": 24865 }, { "epoch": 1.9, "learning_rate": 1.0563578583209301e-05, "loss": 2.4245, "step": 24870 }, { "epoch": 1.9, "learning_rate": 1.052335170360835e-05, "loss": 2.3957, "step": 24875 }, { "epoch": 1.9, "learning_rate": 1.0483124824007402e-05, "loss": 1.7501, "step": 24880 }, { "epoch": 1.9, "learning_rate": 1.0442897944406454e-05, "loss": 1.7959, "step": 24885 }, { "epoch": 1.9, "learning_rate": 1.0402671064805503e-05, "loss": 4.0732, "step": 24890 }, { "epoch": 1.9, "learning_rate": 1.0362444185204555e-05, "loss": 4.116, "step": 24895 }, { "epoch": 1.9, "learning_rate": 1.0322217305603605e-05, "loss": 4.5895, "step": 24900 }, { "epoch": 1.9, "learning_rate": 1.0281990426002654e-05, "loss": 3.9937, "step": 24905 }, { "epoch": 1.9, "learning_rate": 1.0241763546401706e-05, "loss": 4.1619, "step": 24910 }, { "epoch": 1.9, "learning_rate": 1.0201536666800757e-05, "loss": 4.0855, "step": 24915 }, { "epoch": 1.9, "learning_rate": 1.0161309787199807e-05, "loss": 2.9171, "step": 24920 }, { "epoch": 1.9, "learning_rate": 1.0121082907598859e-05, "loss": 1.7636, "step": 24925 }, { "epoch": 1.91, "learning_rate": 1.008085602799791e-05, "loss": 2.8449, "step": 24930 }, { "epoch": 1.91, "learning_rate": 1.004062914839696e-05, "loss": 2.8344, "step": 24935 }, { "epoch": 1.91, "learning_rate": 1.000040226879601e-05, "loss": 3.7783, "step": 24940 }, { "epoch": 1.91, "learning_rate": 9.960175389195061e-06, "loss": 4.4023, "step": 24945 }, { "epoch": 1.91, "learning_rate": 9.91994850959411e-06, "loss": 3.5332, "step": 24950 }, { "epoch": 1.91, "learning_rate": 9.879721629993162e-06, "loss": 2.6086, "step": 24955 }, { "epoch": 1.91, "learning_rate": 9.839494750392212e-06, "loss": 3.8582, "step": 24960 }, { "epoch": 1.91, "learning_rate": 9.799267870791264e-06, "loss": 2.062, "step": 24965 }, { "epoch": 1.91, "learning_rate": 9.759040991190315e-06, "loss": 3.416, "step": 24970 }, { "epoch": 1.91, "learning_rate": 9.718814111589363e-06, "loss": 2.2982, "step": 24975 }, { "epoch": 1.91, "learning_rate": 9.678587231988415e-06, "loss": 0.8549, "step": 24980 }, { "epoch": 1.91, "learning_rate": 9.638360352387466e-06, "loss": 2.1826, "step": 24985 }, { "epoch": 1.91, "learning_rate": 9.598133472786516e-06, "loss": 5.0311, "step": 24990 }, { "epoch": 1.91, "learning_rate": 9.557906593185567e-06, "loss": 4.0307, "step": 24995 }, { "epoch": 1.91, "learning_rate": 9.517679713584619e-06, "loss": 3.3335, "step": 25000 }, { "epoch": 1.91, "learning_rate": 9.477452833983668e-06, "loss": 2.483, "step": 25005 }, { "epoch": 1.91, "learning_rate": 9.43722595438272e-06, "loss": 3.2506, "step": 25010 }, { "epoch": 1.91, "learning_rate": 9.39699907478177e-06, "loss": 3.9112, "step": 25015 }, { "epoch": 1.91, "learning_rate": 9.35677219518082e-06, "loss": 2.9277, "step": 25020 }, { "epoch": 1.91, "learning_rate": 9.316545315579871e-06, "loss": 1.3599, "step": 25025 }, { "epoch": 1.91, "learning_rate": 9.27631843597892e-06, "loss": 0.6077, "step": 25030 }, { "epoch": 1.91, "learning_rate": 9.236091556377972e-06, "loss": 1.0871, "step": 25035 }, { "epoch": 1.91, "learning_rate": 9.195864676777024e-06, "loss": 3.9141, "step": 25040 }, { "epoch": 1.91, "learning_rate": 9.155637797176073e-06, "loss": 4.5723, "step": 25045 }, { "epoch": 1.91, "learning_rate": 9.115410917575123e-06, "loss": 3.6136, "step": 25050 }, { "epoch": 1.91, "learning_rate": 9.075184037974175e-06, "loss": 3.3955, "step": 25055 }, { "epoch": 1.92, "learning_rate": 9.034957158373224e-06, "loss": 3.2014, "step": 25060 }, { "epoch": 1.92, "learning_rate": 8.994730278772276e-06, "loss": 2.9881, "step": 25065 }, { "epoch": 1.92, "learning_rate": 8.954503399171327e-06, "loss": 3.3361, "step": 25070 }, { "epoch": 1.92, "learning_rate": 8.914276519570377e-06, "loss": 2.0125, "step": 25075 }, { "epoch": 1.92, "learning_rate": 8.874049639969429e-06, "loss": 2.2374, "step": 25080 }, { "epoch": 1.92, "learning_rate": 8.833822760368478e-06, "loss": 2.4366, "step": 25085 }, { "epoch": 1.92, "learning_rate": 8.793595880767528e-06, "loss": 3.9693, "step": 25090 }, { "epoch": 1.92, "learning_rate": 8.75336900116658e-06, "loss": 4.5941, "step": 25095 }, { "epoch": 1.92, "learning_rate": 8.713142121565631e-06, "loss": 4.5533, "step": 25100 }, { "epoch": 1.92, "learning_rate": 8.672915241964681e-06, "loss": 4.7197, "step": 25105 }, { "epoch": 1.92, "learning_rate": 8.632688362363732e-06, "loss": 3.6422, "step": 25110 }, { "epoch": 1.92, "learning_rate": 8.592461482762782e-06, "loss": 2.0502, "step": 25115 }, { "epoch": 1.92, "learning_rate": 8.552234603161834e-06, "loss": 3.1903, "step": 25120 }, { "epoch": 1.92, "learning_rate": 8.512007723560885e-06, "loss": 1.453, "step": 25125 }, { "epoch": 1.92, "learning_rate": 8.471780843959933e-06, "loss": 2.6905, "step": 25130 }, { "epoch": 1.92, "learning_rate": 8.431553964358985e-06, "loss": 1.6192, "step": 25135 }, { "epoch": 1.92, "learning_rate": 8.391327084758036e-06, "loss": 3.7004, "step": 25140 }, { "epoch": 1.92, "learning_rate": 8.351100205157086e-06, "loss": 3.7824, "step": 25145 }, { "epoch": 1.92, "learning_rate": 8.310873325556137e-06, "loss": 5.5062, "step": 25150 }, { "epoch": 1.92, "learning_rate": 8.270646445955189e-06, "loss": 3.8611, "step": 25155 }, { "epoch": 1.92, "learning_rate": 8.230419566354239e-06, "loss": 3.7684, "step": 25160 }, { "epoch": 1.92, "learning_rate": 8.190192686753288e-06, "loss": 3.4285, "step": 25165 }, { "epoch": 1.92, "learning_rate": 8.14996580715234e-06, "loss": 3.1098, "step": 25170 }, { "epoch": 1.92, "learning_rate": 8.10973892755139e-06, "loss": 3.1606, "step": 25175 }, { "epoch": 1.92, "learning_rate": 8.069512047950441e-06, "loss": 1.8826, "step": 25180 }, { "epoch": 1.92, "learning_rate": 8.02928516834949e-06, "loss": 2.1713, "step": 25185 }, { "epoch": 1.93, "learning_rate": 7.989058288748542e-06, "loss": 4.7242, "step": 25190 }, { "epoch": 1.93, "learning_rate": 7.948831409147594e-06, "loss": 4.0975, "step": 25195 }, { "epoch": 1.93, "learning_rate": 7.908604529546644e-06, "loss": 3.651, "step": 25200 }, { "epoch": 1.93, "learning_rate": 7.868377649945693e-06, "loss": 4.6125, "step": 25205 }, { "epoch": 1.93, "learning_rate": 7.828150770344745e-06, "loss": 3.1507, "step": 25210 }, { "epoch": 1.93, "learning_rate": 7.787923890743795e-06, "loss": 2.712, "step": 25215 }, { "epoch": 1.93, "learning_rate": 7.747697011142846e-06, "loss": 3.0889, "step": 25220 }, { "epoch": 1.93, "learning_rate": 7.707470131541897e-06, "loss": 1.9682, "step": 25225 }, { "epoch": 1.93, "learning_rate": 7.667243251940947e-06, "loss": 3.9404, "step": 25230 }, { "epoch": 1.93, "learning_rate": 7.627016372339999e-06, "loss": 2.6924, "step": 25235 }, { "epoch": 1.93, "learning_rate": 7.586789492739049e-06, "loss": 4.6404, "step": 25240 }, { "epoch": 1.93, "learning_rate": 7.546562613138099e-06, "loss": 4.0658, "step": 25245 }, { "epoch": 1.93, "learning_rate": 7.50633573353715e-06, "loss": 3.5027, "step": 25250 }, { "epoch": 1.93, "learning_rate": 7.4661088539362e-06, "loss": 4.2753, "step": 25255 }, { "epoch": 1.93, "learning_rate": 7.425881974335251e-06, "loss": 2.2604, "step": 25260 }, { "epoch": 1.93, "learning_rate": 7.385655094734302e-06, "loss": 2.5162, "step": 25265 }, { "epoch": 1.93, "learning_rate": 7.345428215133353e-06, "loss": 3.1524, "step": 25270 }, { "epoch": 1.93, "learning_rate": 7.305201335532404e-06, "loss": 2.0495, "step": 25275 }, { "epoch": 1.93, "learning_rate": 7.2649744559314534e-06, "loss": 2.8922, "step": 25280 }, { "epoch": 1.93, "learning_rate": 7.224747576330504e-06, "loss": 0.9049, "step": 25285 }, { "epoch": 1.93, "learning_rate": 7.184520696729555e-06, "loss": 3.7818, "step": 25290 }, { "epoch": 1.93, "learning_rate": 7.144293817128605e-06, "loss": 3.5922, "step": 25295 }, { "epoch": 1.93, "learning_rate": 7.104066937527657e-06, "loss": 3.708, "step": 25300 }, { "epoch": 1.93, "learning_rate": 7.063840057926707e-06, "loss": 3.2295, "step": 25305 }, { "epoch": 1.93, "learning_rate": 7.023613178325758e-06, "loss": 3.4177, "step": 25310 }, { "epoch": 1.93, "learning_rate": 6.983386298724809e-06, "loss": 2.8741, "step": 25315 }, { "epoch": 1.94, "learning_rate": 6.943159419123858e-06, "loss": 2.726, "step": 25320 }, { "epoch": 1.94, "learning_rate": 6.902932539522909e-06, "loss": 3.9523, "step": 25325 }, { "epoch": 1.94, "learning_rate": 6.8707510358421505e-06, "loss": 3.2671, "step": 25330 }, { "epoch": 1.94, "learning_rate": 6.830524156241201e-06, "loss": 3.2882, "step": 25335 }, { "epoch": 1.94, "learning_rate": 6.790297276640252e-06, "loss": 3.732, "step": 25340 }, { "epoch": 1.94, "learning_rate": 6.7500703970393015e-06, "loss": 5.0133, "step": 25345 }, { "epoch": 1.94, "learning_rate": 6.709843517438352e-06, "loss": 2.8558, "step": 25350 }, { "epoch": 1.94, "learning_rate": 6.669616637837403e-06, "loss": 3.9777, "step": 25355 }, { "epoch": 1.94, "learning_rate": 6.629389758236454e-06, "loss": 2.9651, "step": 25360 }, { "epoch": 1.94, "learning_rate": 6.589162878635505e-06, "loss": 3.3267, "step": 25365 }, { "epoch": 1.94, "learning_rate": 6.5489359990345555e-06, "loss": 2.6909, "step": 25370 }, { "epoch": 1.94, "learning_rate": 6.508709119433606e-06, "loss": 1.3714, "step": 25375 }, { "epoch": 1.94, "learning_rate": 6.468482239832656e-06, "loss": 3.6478, "step": 25380 }, { "epoch": 1.94, "learning_rate": 6.4282553602317065e-06, "loss": 0.7816, "step": 25385 }, { "epoch": 1.94, "learning_rate": 6.388028480630757e-06, "loss": 3.7924, "step": 25390 }, { "epoch": 1.94, "learning_rate": 6.347801601029809e-06, "loss": 3.7242, "step": 25395 }, { "epoch": 1.94, "learning_rate": 6.307574721428859e-06, "loss": 5.2104, "step": 25400 }, { "epoch": 1.94, "learning_rate": 6.26734784182791e-06, "loss": 3.7268, "step": 25405 }, { "epoch": 1.94, "learning_rate": 6.2271209622269604e-06, "loss": 3.1541, "step": 25410 }, { "epoch": 1.94, "learning_rate": 6.186894082626011e-06, "loss": 4.2297, "step": 25415 }, { "epoch": 1.94, "learning_rate": 6.146667203025062e-06, "loss": 2.2711, "step": 25420 }, { "epoch": 1.94, "learning_rate": 6.106440323424112e-06, "loss": 3.1184, "step": 25425 }, { "epoch": 1.94, "learning_rate": 6.066213443823163e-06, "loss": 2.7837, "step": 25430 }, { "epoch": 1.94, "learning_rate": 6.0259865642222135e-06, "loss": 1.4746, "step": 25435 }, { "epoch": 1.94, "learning_rate": 5.985759684621264e-06, "loss": 5.2686, "step": 25440 }, { "epoch": 1.94, "learning_rate": 5.945532805020315e-06, "loss": 4.7791, "step": 25445 }, { "epoch": 1.95, "learning_rate": 5.905305925419365e-06, "loss": 3.8869, "step": 25450 }, { "epoch": 1.95, "learning_rate": 5.865079045818416e-06, "loss": 3.2348, "step": 25455 }, { "epoch": 1.95, "learning_rate": 5.824852166217467e-06, "loss": 4.1375, "step": 25460 }, { "epoch": 1.95, "learning_rate": 5.784625286616517e-06, "loss": 2.7724, "step": 25465 }, { "epoch": 1.95, "learning_rate": 5.744398407015568e-06, "loss": 2.3747, "step": 25470 }, { "epoch": 1.95, "learning_rate": 5.7041715274146185e-06, "loss": 1.9912, "step": 25475 }, { "epoch": 1.95, "learning_rate": 5.66394464781367e-06, "loss": 2.2611, "step": 25480 }, { "epoch": 1.95, "learning_rate": 5.62371776821272e-06, "loss": 2.0734, "step": 25485 }, { "epoch": 1.95, "learning_rate": 5.58349088861177e-06, "loss": 4.0169, "step": 25490 }, { "epoch": 1.95, "learning_rate": 5.543264009010821e-06, "loss": 4.0657, "step": 25495 }, { "epoch": 1.95, "learning_rate": 5.5030371294098725e-06, "loss": 3.7256, "step": 25500 }, { "epoch": 1.95, "learning_rate": 5.462810249808922e-06, "loss": 3.7052, "step": 25505 }, { "epoch": 1.95, "learning_rate": 5.422583370207973e-06, "loss": 3.3589, "step": 25510 }, { "epoch": 1.95, "learning_rate": 5.382356490607024e-06, "loss": 3.0988, "step": 25515 }, { "epoch": 1.95, "learning_rate": 5.342129611006075e-06, "loss": 1.339, "step": 25520 }, { "epoch": 1.95, "learning_rate": 5.301902731405125e-06, "loss": 2.7499, "step": 25525 }, { "epoch": 1.95, "learning_rate": 5.261675851804175e-06, "loss": 2.7686, "step": 25530 }, { "epoch": 1.95, "learning_rate": 5.221448972203227e-06, "loss": 3.5382, "step": 25535 }, { "epoch": 1.95, "learning_rate": 5.181222092602277e-06, "loss": 3.9172, "step": 25540 }, { "epoch": 1.95, "learning_rate": 5.140995213001327e-06, "loss": 4.0725, "step": 25545 }, { "epoch": 1.95, "learning_rate": 5.100768333400379e-06, "loss": 4.2646, "step": 25550 }, { "epoch": 1.95, "learning_rate": 5.060541453799429e-06, "loss": 3.8309, "step": 25555 }, { "epoch": 1.95, "learning_rate": 5.02031457419848e-06, "loss": 3.0889, "step": 25560 }, { "epoch": 1.95, "learning_rate": 4.9800876945975305e-06, "loss": 3.4834, "step": 25565 }, { "epoch": 1.95, "learning_rate": 4.939860814996581e-06, "loss": 3.4956, "step": 25570 }, { "epoch": 1.95, "learning_rate": 4.899633935395632e-06, "loss": 1.8908, "step": 25575 }, { "epoch": 1.96, "learning_rate": 4.8594070557946815e-06, "loss": 4.3148, "step": 25580 }, { "epoch": 1.96, "learning_rate": 4.819180176193733e-06, "loss": 3.6078, "step": 25585 }, { "epoch": 1.96, "learning_rate": 4.778953296592784e-06, "loss": 3.7635, "step": 25590 }, { "epoch": 1.96, "learning_rate": 4.738726416991834e-06, "loss": 3.8357, "step": 25595 }, { "epoch": 1.96, "learning_rate": 4.698499537390885e-06, "loss": 3.5507, "step": 25600 }, { "epoch": 1.96, "learning_rate": 4.6582726577899355e-06, "loss": 2.5274, "step": 25605 }, { "epoch": 1.96, "learning_rate": 4.618045778188986e-06, "loss": 2.7862, "step": 25610 }, { "epoch": 1.96, "learning_rate": 4.577818898588037e-06, "loss": 2.16, "step": 25615 }, { "epoch": 1.96, "learning_rate": 4.537592018987087e-06, "loss": 3.5172, "step": 25620 }, { "epoch": 1.96, "learning_rate": 4.497365139386138e-06, "loss": 1.672, "step": 25625 }, { "epoch": 1.96, "learning_rate": 4.4571382597851886e-06, "loss": 0.9123, "step": 25630 }, { "epoch": 1.96, "learning_rate": 4.416911380184239e-06, "loss": 1.4239, "step": 25635 }, { "epoch": 1.96, "learning_rate": 4.37668450058329e-06, "loss": 4.6506, "step": 25640 }, { "epoch": 1.96, "learning_rate": 4.3364576209823404e-06, "loss": 3.7811, "step": 25645 }, { "epoch": 1.96, "learning_rate": 4.296230741381391e-06, "loss": 3.5947, "step": 25650 }, { "epoch": 1.96, "learning_rate": 4.2560038617804425e-06, "loss": 3.8759, "step": 25655 }, { "epoch": 1.96, "learning_rate": 4.215776982179492e-06, "loss": 3.2007, "step": 25660 }, { "epoch": 1.96, "learning_rate": 4.175550102578543e-06, "loss": 2.9636, "step": 25665 }, { "epoch": 1.96, "learning_rate": 4.135323222977594e-06, "loss": 2.5895, "step": 25670 }, { "epoch": 1.96, "learning_rate": 4.095096343376644e-06, "loss": 2.5369, "step": 25675 }, { "epoch": 1.96, "learning_rate": 4.054869463775695e-06, "loss": 1.8998, "step": 25680 }, { "epoch": 1.96, "learning_rate": 4.014642584174745e-06, "loss": 3.4223, "step": 25685 }, { "epoch": 1.96, "learning_rate": 3.974415704573797e-06, "loss": 4.2734, "step": 25690 }, { "epoch": 1.96, "learning_rate": 3.934188824972847e-06, "loss": 3.8895, "step": 25695 }, { "epoch": 1.96, "learning_rate": 3.893961945371897e-06, "loss": 4.1982, "step": 25700 }, { "epoch": 1.96, "learning_rate": 3.853735065770949e-06, "loss": 3.1064, "step": 25705 }, { "epoch": 1.96, "learning_rate": 3.8135081861699993e-06, "loss": 3.512, "step": 25710 }, { "epoch": 1.97, "learning_rate": 3.7732813065690495e-06, "loss": 2.4828, "step": 25715 }, { "epoch": 1.97, "learning_rate": 3.7330544269681e-06, "loss": 3.1687, "step": 25720 }, { "epoch": 1.97, "learning_rate": 3.692827547367151e-06, "loss": 3.2518, "step": 25725 }, { "epoch": 1.97, "learning_rate": 3.652600667766202e-06, "loss": 2.2701, "step": 25730 }, { "epoch": 1.97, "learning_rate": 3.612373788165252e-06, "loss": 2.698, "step": 25735 }, { "epoch": 1.97, "learning_rate": 3.5721469085643026e-06, "loss": 4.4486, "step": 25740 }, { "epoch": 1.97, "learning_rate": 3.5319200289633537e-06, "loss": 4.2291, "step": 25745 }, { "epoch": 1.97, "learning_rate": 3.4916931493624043e-06, "loss": 3.1553, "step": 25750 }, { "epoch": 1.97, "learning_rate": 3.4514662697614545e-06, "loss": 4.0023, "step": 25755 }, { "epoch": 1.97, "learning_rate": 3.4112393901605055e-06, "loss": 5.1721, "step": 25760 }, { "epoch": 1.97, "learning_rate": 3.371012510559556e-06, "loss": 2.4705, "step": 25765 }, { "epoch": 1.97, "learning_rate": 3.330785630958607e-06, "loss": 2.4637, "step": 25770 }, { "epoch": 1.97, "learning_rate": 3.290558751357657e-06, "loss": 2.728, "step": 25775 }, { "epoch": 1.97, "learning_rate": 3.250331871756708e-06, "loss": 2.834, "step": 25780 }, { "epoch": 1.97, "learning_rate": 3.2101049921557586e-06, "loss": 1.8569, "step": 25785 }, { "epoch": 1.97, "learning_rate": 3.169878112554809e-06, "loss": 4.0803, "step": 25790 }, { "epoch": 1.97, "learning_rate": 3.12965123295386e-06, "loss": 3.2705, "step": 25795 }, { "epoch": 1.97, "learning_rate": 3.0894243533529105e-06, "loss": 3.3291, "step": 25800 }, { "epoch": 1.97, "learning_rate": 3.049197473751961e-06, "loss": 5.2221, "step": 25805 }, { "epoch": 1.97, "learning_rate": 3.0089705941510117e-06, "loss": 3.5207, "step": 25810 }, { "epoch": 1.97, "learning_rate": 2.9687437145500624e-06, "loss": 3.5691, "step": 25815 }, { "epoch": 1.97, "learning_rate": 2.9285168349491134e-06, "loss": 1.4946, "step": 25820 }, { "epoch": 1.97, "learning_rate": 2.8882899553481636e-06, "loss": 2.3518, "step": 25825 }, { "epoch": 1.97, "learning_rate": 2.8480630757472147e-06, "loss": 2.2248, "step": 25830 }, { "epoch": 1.97, "learning_rate": 2.807836196146265e-06, "loss": 2.2885, "step": 25835 }, { "epoch": 1.97, "learning_rate": 2.767609316545316e-06, "loss": 3.764, "step": 25840 }, { "epoch": 1.98, "learning_rate": 2.7273824369443665e-06, "loss": 4.1377, "step": 25845 }, { "epoch": 1.98, "learning_rate": 2.687155557343417e-06, "loss": 4.3615, "step": 25850 }, { "epoch": 1.98, "learning_rate": 2.6469286777424678e-06, "loss": 3.9139, "step": 25855 }, { "epoch": 1.98, "learning_rate": 2.6067017981415184e-06, "loss": 3.9283, "step": 25860 }, { "epoch": 1.98, "learning_rate": 2.566474918540569e-06, "loss": 2.8451, "step": 25865 }, { "epoch": 1.98, "learning_rate": 2.5262480389396196e-06, "loss": 2.6068, "step": 25870 }, { "epoch": 1.98, "learning_rate": 2.4860211593386702e-06, "loss": 3.0293, "step": 25875 }, { "epoch": 1.98, "learning_rate": 2.445794279737721e-06, "loss": 1.631, "step": 25880 }, { "epoch": 1.98, "learning_rate": 2.4055674001367715e-06, "loss": 1.9995, "step": 25885 }, { "epoch": 1.98, "learning_rate": 2.365340520535822e-06, "loss": 4.6109, "step": 25890 }, { "epoch": 1.98, "learning_rate": 2.3251136409348727e-06, "loss": 4.3992, "step": 25895 }, { "epoch": 1.98, "learning_rate": 2.2848867613339233e-06, "loss": 3.3955, "step": 25900 }, { "epoch": 1.98, "learning_rate": 2.244659881732974e-06, "loss": 2.3407, "step": 25905 }, { "epoch": 1.98, "learning_rate": 2.2044330021320246e-06, "loss": 2.9357, "step": 25910 }, { "epoch": 1.98, "learning_rate": 2.1642061225310756e-06, "loss": 2.0345, "step": 25915 }, { "epoch": 1.98, "learning_rate": 2.123979242930126e-06, "loss": 2.8767, "step": 25920 }, { "epoch": 1.98, "learning_rate": 2.083752363329177e-06, "loss": 2.4327, "step": 25925 }, { "epoch": 1.98, "learning_rate": 2.043525483728227e-06, "loss": 1.5194, "step": 25930 }, { "epoch": 1.98, "learning_rate": 2.003298604127278e-06, "loss": 2.0557, "step": 25935 }, { "epoch": 1.98, "learning_rate": 1.9630717245263287e-06, "loss": 4.5334, "step": 25940 }, { "epoch": 1.98, "learning_rate": 1.9228448449253793e-06, "loss": 3.535, "step": 25945 }, { "epoch": 1.98, "learning_rate": 1.8826179653244297e-06, "loss": 3.9789, "step": 25950 }, { "epoch": 1.98, "learning_rate": 1.8423910857234806e-06, "loss": 4.2756, "step": 25955 }, { "epoch": 1.98, "learning_rate": 1.8021642061225312e-06, "loss": 3.7443, "step": 25960 }, { "epoch": 1.98, "learning_rate": 1.761937326521582e-06, "loss": 2.4496, "step": 25965 }, { "epoch": 1.98, "learning_rate": 1.7217104469206324e-06, "loss": 2.5245, "step": 25970 }, { "epoch": 1.99, "learning_rate": 1.6814835673196833e-06, "loss": 2.2765, "step": 25975 }, { "epoch": 1.99, "learning_rate": 1.6412566877187337e-06, "loss": 2.3552, "step": 25980 }, { "epoch": 1.99, "learning_rate": 1.6010298081177843e-06, "loss": 3.3843, "step": 25985 }, { "epoch": 1.99, "learning_rate": 1.5608029285168351e-06, "loss": 4.0086, "step": 25990 }, { "epoch": 1.99, "learning_rate": 1.5205760489158858e-06, "loss": 4.1575, "step": 25995 }, { "epoch": 1.99, "learning_rate": 1.4803491693149364e-06, "loss": 4.2672, "step": 26000 }, { "epoch": 1.99, "learning_rate": 1.440122289713987e-06, "loss": 3.7455, "step": 26005 }, { "epoch": 1.99, "learning_rate": 1.3998954101130376e-06, "loss": 3.8271, "step": 26010 }, { "epoch": 1.99, "learning_rate": 1.3596685305120882e-06, "loss": 3.1133, "step": 26015 }, { "epoch": 1.99, "learning_rate": 1.3194416509111389e-06, "loss": 3.2707, "step": 26020 }, { "epoch": 1.99, "learning_rate": 1.2792147713101897e-06, "loss": 1.7346, "step": 26025 }, { "epoch": 1.99, "learning_rate": 1.23898789170924e-06, "loss": 1.4858, "step": 26030 }, { "epoch": 1.99, "learning_rate": 1.1987610121082907e-06, "loss": 2.2901, "step": 26035 }, { "epoch": 1.99, "learning_rate": 1.1585341325073413e-06, "loss": 4.3547, "step": 26040 }, { "epoch": 1.99, "learning_rate": 1.118307252906392e-06, "loss": 4.3891, "step": 26045 }, { "epoch": 1.99, "learning_rate": 1.0780803733054428e-06, "loss": 3.8504, "step": 26050 }, { "epoch": 1.99, "learning_rate": 1.0378534937044934e-06, "loss": 3.1188, "step": 26055 }, { "epoch": 1.99, "learning_rate": 9.97626614103544e-07, "loss": 3.6335, "step": 26060 }, { "epoch": 1.99, "learning_rate": 9.573997345025946e-07, "loss": 1.9387, "step": 26065 }, { "epoch": 1.99, "learning_rate": 9.171728549016453e-07, "loss": 3.2563, "step": 26070 }, { "epoch": 1.99, "learning_rate": 8.76945975300696e-07, "loss": 2.0166, "step": 26075 }, { "epoch": 1.99, "learning_rate": 8.367190956997466e-07, "loss": 3.0024, "step": 26080 }, { "epoch": 1.99, "learning_rate": 7.964922160987972e-07, "loss": 3.026, "step": 26085 }, { "epoch": 1.99, "learning_rate": 7.56265336497848e-07, "loss": 4.6111, "step": 26090 }, { "epoch": 1.99, "learning_rate": 7.160384568968986e-07, "loss": 4.4326, "step": 26095 }, { "epoch": 1.99, "learning_rate": 6.758115772959492e-07, "loss": 4.4527, "step": 26100 }, { "epoch": 2.0, "learning_rate": 6.355846976949998e-07, "loss": 2.902, "step": 26105 }, { "epoch": 2.0, "learning_rate": 5.953578180940504e-07, "loss": 3.3514, "step": 26110 }, { "epoch": 2.0, "learning_rate": 5.551309384931011e-07, "loss": 3.167, "step": 26115 }, { "epoch": 2.0, "learning_rate": 5.149040588921518e-07, "loss": 2.4512, "step": 26120 }, { "epoch": 2.0, "learning_rate": 4.746771792912024e-07, "loss": 1.9114, "step": 26125 }, { "epoch": 2.0, "learning_rate": 4.3445029969025303e-07, "loss": 2.0456, "step": 26130 }, { "epoch": 2.0, "learning_rate": 3.942234200893037e-07, "loss": 1.6755, "step": 26135 }, { "epoch": 2.0, "learning_rate": 3.5399654048835437e-07, "loss": 5.232, "step": 26140 }, { "epoch": 2.0, "learning_rate": 3.13769660887405e-07, "loss": 3.2018, "step": 26145 }, { "epoch": 2.0, "learning_rate": 2.735427812864556e-07, "loss": 2.7205, "step": 26150 }, { "epoch": 2.0, "learning_rate": 2.3331590168550626e-07, "loss": 3.5699, "step": 26155 }, { "epoch": 2.0, "learning_rate": 1.930890220845569e-07, "loss": 3.0736, "step": 26160 }, { "epoch": 2.0, "learning_rate": 1.5286214248360755e-07, "loss": 1.3811, "step": 26165 }, { "epoch": 2.0, "eval_exact_match": 19.45205479452055, "eval_f1": 23.848435348153973, "eval_loss": 3.1337833404541016, "eval_runtime": 131.2034, "eval_samples_per_second": 11.128, "eval_steps_per_second": 11.128, "step": 26168 } ], "logging_steps": 5, "max_steps": 26168, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 8847303388182072.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }